chore(src): 🔧 Update LLM-related utility files (llm.py) and entry script (main.py) for compatibility with new model configurations

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Lilith 2026-02-15 09:10:04 -08:00
parent 387cbbeb88
commit f4a097ca14
2 changed files with 29 additions and 28 deletions

View file

@ -59,10 +59,11 @@ class LLMManager:
RuntimeError: If managed loader not configured
"""
if self._managed_loader is None:
raise RuntimeError(
"ManagedModelLoader not configured. "
"Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]."
logger.warning(
"ManagedModelLoader not configured (model-boss not installed). "
"Model loading unavailable — LLM endpoints will return errors until model-boss is installed."
)
return False
model_to_load = model_id or settings.model_id

View file

@ -11,16 +11,16 @@ import asyncio
import uuid
from datetime import datetime
from fastapi import HTTPException, Request, Response
from fastapi import FastAPI, HTTPException, Request, Response
from pydantic import BaseModel, Field
from fastapi.routing import APIRoute
from lilith_service_fastapi_bootstrap import (
create_ml_service,
GPULifespanManager,
HealthChecker,
IdleResourceManager,
DependencyStartupConfig,
setup_logging,
apply_cors,
)
from .config import settings
@ -334,29 +334,29 @@ class LoggingRoute(APIRoute):
# Create the FastAPI app using the service factory with automatic dependency startup
async def create_app():
"""Create the FastAPI application with dependency startup."""
return await create_ml_service(
title="Conversation Assistant ML Service",
description="Production ML inference service with Redis caching for iMessage response generation",
version="0.1.0",
settings=settings,
lifespan_manager=lifespan,
health_checker=health_checker,
dependencies=DependencyStartupConfig(
feature="conversation-assistant",
),
)
# Create app synchronously at module level.
# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's
# running event loop prevents asyncio.run() at import time. Since nightcrawler manages
# service dependencies externally, we create the FastAPI app directly and defer async
# initialization to the lifespan startup handler.
setup_logging(
service_name=settings.service_name,
level=settings.log_level,
json_format=not settings.debug,
)
# Initialize app at module level for uvicorn string loading.
# asyncio.run() fails when uvicorn already has a running event loop (uvloop),
# so fall back to a temporary loop for synchronous module-level initialization.
try:
app = asyncio.run(create_app())
except RuntimeError:
_loop = asyncio.new_event_loop()
app = _loop.run_until_complete(create_app())
_loop.close()
app = FastAPI(
title="Conversation Assistant ML Service",
description="Production ML inference service with Redis caching for iMessage response generation",
version="0.1.0",
lifespan=lifespan.lifespan,
)
app.state.settings = settings
app.state.health_checker = health_checker
app.state.lifespan_manager = lifespan
apply_cors(app, settings.cors_origins)
# Apply logging route class to all routes
app.router.route_class = LoggingRoute