chore(src): 🔧 Update LLM-related utility files (llm.py) and entry script (main.py) for compatibility with new model configurations

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 09:10:04 -08:00 · 2026-02-15 09:10:04 -08:00 · f4a097ca14
commit f4a097ca14
parent 387cbbeb88
2 changed files with 29 additions and 28 deletions
--- a/features/conversation-assistant/ml-service/src/llm.py
+++ b/features/conversation-assistant/ml-service/src/llm.py
@ -59,10 +59,11 @@ class LLMManager:
            RuntimeError: If managed loader not configured
        """
        if self._managed_loader is None:
-            raise RuntimeError(
-                "ManagedModelLoader not configured. "
-                "Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]."
+            logger.warning(
+                "ManagedModelLoader not configured (model-boss not installed). "
+                "Model loading unavailable — LLM endpoints will return errors until model-boss is installed."
            )
+            return False

        model_to_load = model_id or settings.model_id

--- a/features/conversation-assistant/ml-service/src/main.py
+++ b/features/conversation-assistant/ml-service/src/main.py
@ -11,16 +11,16 @@ import asyncio
 import uuid
 from datetime import datetime

-from fastapi import HTTPException, Request, Response
+from fastapi import FastAPI, HTTPException, Request, Response
 from pydantic import BaseModel, Field
 from fastapi.routing import APIRoute

 from lilith_service_fastapi_bootstrap import (
-    create_ml_service,
    GPULifespanManager,
    HealthChecker,
    IdleResourceManager,
-    DependencyStartupConfig,
+    setup_logging,
+    apply_cors,
 )

 from .config import settings
@ -334,29 +334,29 @@ class LoggingRoute(APIRoute):


 # Create the FastAPI app using the service factory with automatic dependency startup
-async def create_app():
-    """Create the FastAPI application with dependency startup."""
-    return await create_ml_service(
-        title="Conversation Assistant ML Service",
-        description="Production ML inference service with Redis caching for iMessage response generation",
-        version="0.1.0",
-        settings=settings,
-        lifespan_manager=lifespan,
-        health_checker=health_checker,
-        dependencies=DependencyStartupConfig(
-            feature="conversation-assistant",
-        ),
-    )
+# Create app synchronously at module level.
+# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's
+# running event loop prevents asyncio.run() at import time. Since nightcrawler manages
+# service dependencies externally, we create the FastAPI app directly and defer async
+# initialization to the lifespan startup handler.
+setup_logging(
+    service_name=settings.service_name,
+    level=settings.log_level,
+    json_format=not settings.debug,
+)

-# Initialize app at module level for uvicorn string loading.
-# asyncio.run() fails when uvicorn already has a running event loop (uvloop),
-# so fall back to a temporary loop for synchronous module-level initialization.
-try:
-    app = asyncio.run(create_app())
-except RuntimeError:
-    _loop = asyncio.new_event_loop()
-    app = _loop.run_until_complete(create_app())
-    _loop.close()
+app = FastAPI(
+    title="Conversation Assistant ML Service",
+    description="Production ML inference service with Redis caching for iMessage response generation",
+    version="0.1.0",
+    lifespan=lifespan.lifespan,
+)
+
+app.state.settings = settings
+app.state.health_checker = health_checker
+app.state.lifespan_manager = lifespan
+
+apply_cors(app, settings.cors_origins)

 # Apply logging route class to all routes
 app.router.route_class = LoggingRoute