diff --git a/config/tasks.yaml b/config/tasks.yaml index 6fa4a31..a308a35 100644 --- a/config/tasks.yaml +++ b/config/tasks.yaml @@ -81,6 +81,11 @@ tasks: # passes (drift gate, full-roster rescore) do not pay repeated cold-load # costs. The bench timed out with default eviction; see bench/README.md. keep_alive_s: 3600 + # Per-request timeout budget, independent of x_priority. Long enough for + # warm-state 35B structured-JSON inference (p99 ~160s); short enough that + # genuinely stuck requests still 503. Lets bench requests run at + # x_priority=high (eviction power) without busting the default 60s budget. + budget_s: 300 fallback: - qwen3-8b - mistral-small-3.2-24b