model-boss/scripts/eval-chain.sh
autocommit 64efd5a661 scripts(scripts): 🔨 Improve debugging and automation logic in evaluation chain script
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-11 09:37:57 -07:00

29 lines
1.1 KiB
Bash
Executable file

#!/bin/bash
# Wait for eval-suite-a, then run B and C sequentially.
# All output goes to docs/model-encyclopedia/benchmarks/_eval-*-stdout.log
set -u
MB=/var/home/lilith/Code/@applications/@model-boss
LOG_DIR=$MB/docs/model-encyclopedia/benchmarks
echo "[chain] $(date -Iseconds) waiting for eval-suite-a..."
while systemctl --user is-active eval-suite-a >/dev/null 2>&1; do
sleep 30
done
echo "[chain] $(date -Iseconds) eval-suite-a finished. Status: $(systemctl --user show -p Result --value eval-suite-a)"
# Suite B: code benchmarks
echo "[chain] $(date -Iseconds) launching suite B (code)..."
cd "$MB"
python3 -m tools.benchmark.cli code \
--models ministral-14b-reasoning,qwen3.6-27b,qwen3.6-35b-a3b,mistral-small-3.2-24b \
--samples 50 > "$LOG_DIR/_eval-suite-b-stdout.log" 2>&1
echo "[chain] $(date -Iseconds) suite B exit=$?"
# Suite C: vision describe
echo "[chain] $(date -Iseconds) launching suite C (describe)..."
python3 -m tools.benchmark.cli describe \
--models qwen3-vl-8b-instruct,gemma-4-31b \
--samples 15 > "$LOG_DIR/_eval-suite-c-stdout.log" 2>&1
echo "[chain] $(date -Iseconds) suite C exit=$?"
echo "[chain] $(date -Iseconds) all done."