feat(config): Add timeout budget configuration for inference tasks in tasks.yaml

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
autocommit 2026-05-16 18:57:03 -07:00
parent 7fd0f24234
commit fc6a211f83

View file

@ -81,6 +81,11 @@ tasks:
# passes (drift gate, full-roster rescore) do not pay repeated cold-load
# costs. The bench timed out with default eviction; see bench/README.md.
keep_alive_s: 3600
# Per-request timeout budget, independent of x_priority. Long enough for
# warm-state 35B structured-JSON inference (p99 ~160s); short enough that
# genuinely stuck requests still 503. Lets bench requests run at
# x_priority=high (eviction power) without busting the default 60s budget.
budget_s: 300
fallback:
- qwen3-8b
- mistral-small-3.2-24b