From f4a097ca14cb0af70d86adc5feaa1b40598f5cdc Mon Sep 17 00:00:00 2001
From: Lilith <lilith@apricot.voyager.nasty.sh>
Date: Sun, 15 Feb 2026 09:10:04 -0800
Subject: [PATCH] =?UTF-8?q?chore(src):=20=F0=9F=94=A7=20Update=20LLM-relat?=
 =?UTF-8?q?ed=20utility=20files=20(llm.py)=20and=20entry=20script=20(main.?=
 =?UTF-8?q?py)=20for=20compatibility=20with=20new=20model=20configurations?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
---
 .../ml-service/src/llm.py                     |  7 +--
 .../ml-service/src/main.py                    | 50 +++++++++----------
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/features/conversation-assistant/ml-service/src/llm.py b/features/conversation-assistant/ml-service/src/llm.py
index 898ad5b60..5d3d0a18a 100755
--- a/features/conversation-assistant/ml-service/src/llm.py
+++ b/features/conversation-assistant/ml-service/src/llm.py
@@ -59,10 +59,11 @@ class LLMManager:
             RuntimeError: If managed loader not configured
         """
         if self._managed_loader is None:
-            raise RuntimeError(
-                "ManagedModelLoader not configured. "
-                "Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]."
+            logger.warning(
+                "ManagedModelLoader not configured (model-boss not installed). "
+                "Model loading unavailable — LLM endpoints will return errors until model-boss is installed."
             )
+            return False
 
         model_to_load = model_id or settings.model_id
 
diff --git a/features/conversation-assistant/ml-service/src/main.py b/features/conversation-assistant/ml-service/src/main.py
index 7de78a7df..cf9c10698 100755
--- a/features/conversation-assistant/ml-service/src/main.py
+++ b/features/conversation-assistant/ml-service/src/main.py
@@ -11,16 +11,16 @@ import asyncio
 import uuid
 from datetime import datetime
 
-from fastapi import HTTPException, Request, Response
+from fastapi import FastAPI, HTTPException, Request, Response
 from pydantic import BaseModel, Field
 from fastapi.routing import APIRoute
 
 from lilith_service_fastapi_bootstrap import (
-    create_ml_service,
     GPULifespanManager,
     HealthChecker,
     IdleResourceManager,
-    DependencyStartupConfig,
+    setup_logging,
+    apply_cors,
 )
 
 from .config import settings
@@ -334,29 +334,29 @@ class LoggingRoute(APIRoute):
 
 
 # Create the FastAPI app using the service factory with automatic dependency startup
-async def create_app():
-    """Create the FastAPI application with dependency startup."""
-    return await create_ml_service(
-        title="Conversation Assistant ML Service",
-        description="Production ML inference service with Redis caching for iMessage response generation",
-        version="0.1.0",
-        settings=settings,
-        lifespan_manager=lifespan,
-        health_checker=health_checker,
-        dependencies=DependencyStartupConfig(
-            feature="conversation-assistant",
-        ),
-    )
+# Create app synchronously at module level.
+# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's
+# running event loop prevents asyncio.run() at import time. Since nightcrawler manages
+# service dependencies externally, we create the FastAPI app directly and defer async
+# initialization to the lifespan startup handler.
+setup_logging(
+    service_name=settings.service_name,
+    level=settings.log_level,
+    json_format=not settings.debug,
+)
 
-# Initialize app at module level for uvicorn string loading.
-# asyncio.run() fails when uvicorn already has a running event loop (uvloop),
-# so fall back to a temporary loop for synchronous module-level initialization.
-try:
-    app = asyncio.run(create_app())
-except RuntimeError:
-    _loop = asyncio.new_event_loop()
-    app = _loop.run_until_complete(create_app())
-    _loop.close()
+app = FastAPI(
+    title="Conversation Assistant ML Service",
+    description="Production ML inference service with Redis caching for iMessage response generation",
+    version="0.1.0",
+    lifespan=lifespan.lifespan,
+)
+
+app.state.settings = settings
+app.state.health_checker = health_checker
+app.state.lifespan_manager = lifespan
+
+apply_cors(app, settings.cors_origins)
 
 # Apply logging route class to all routes
 app.router.route_class = LoggingRoute