From fc6a211f83b5145fa4de0a01acb3e2d2f2504766 Mon Sep 17 00:00:00 2001 From: autocommit Date: Sat, 16 May 2026 18:57:03 -0700 Subject: [PATCH] =?UTF-8?q?feat(config):=20=E2=9C=A8=20Add=20timeout=20bud?= =?UTF-8?q?get=20configuration=20for=20inference=20tasks=20in=20tasks.yaml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- config/tasks.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config/tasks.yaml b/config/tasks.yaml index 6fa4a31..a308a35 100644 --- a/config/tasks.yaml +++ b/config/tasks.yaml @@ -81,6 +81,11 @@ tasks: # passes (drift gate, full-roster rescore) do not pay repeated cold-load # costs. The bench timed out with default eviction; see bench/README.md. keep_alive_s: 3600 + # Per-request timeout budget, independent of x_priority. Long enough for + # warm-state 35B structured-JSON inference (p99 ~160s); short enough that + # genuinely stuck requests still 503. Lets bench requests run at + # x_priority=high (eviction power) without busting the default 60s budget. + budget_s: 300 fallback: - qwen3-8b - mistral-small-3.2-24b