From f4a097ca14cb0af70d86adc5feaa1b40598f5cdc Mon Sep 17 00:00:00 2001 From: Lilith Date: Sun, 15 Feb 2026 09:10:04 -0800 Subject: [PATCH] =?UTF-8?q?chore(src):=20=F0=9F=94=A7=20Update=20LLM-relat?= =?UTF-8?q?ed=20utility=20files=20(llm.py)=20and=20entry=20script=20(main.?= =?UTF-8?q?py)=20for=20compatibility=20with=20new=20model=20configurations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .../ml-service/src/llm.py | 7 +-- .../ml-service/src/main.py | 50 +++++++++---------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/features/conversation-assistant/ml-service/src/llm.py b/features/conversation-assistant/ml-service/src/llm.py index 898ad5b60..5d3d0a18a 100755 --- a/features/conversation-assistant/ml-service/src/llm.py +++ b/features/conversation-assistant/ml-service/src/llm.py @@ -59,10 +59,11 @@ class LLMManager: RuntimeError: If managed loader not configured """ if self._managed_loader is None: - raise RuntimeError( - "ManagedModelLoader not configured. " - "Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]." + logger.warning( + "ManagedModelLoader not configured (model-boss not installed). " + "Model loading unavailable — LLM endpoints will return errors until model-boss is installed." ) + return False model_to_load = model_id or settings.model_id diff --git a/features/conversation-assistant/ml-service/src/main.py b/features/conversation-assistant/ml-service/src/main.py index 7de78a7df..cf9c10698 100755 --- a/features/conversation-assistant/ml-service/src/main.py +++ b/features/conversation-assistant/ml-service/src/main.py @@ -11,16 +11,16 @@ import asyncio import uuid from datetime import datetime -from fastapi import HTTPException, Request, Response +from fastapi import FastAPI, HTTPException, Request, Response from pydantic import BaseModel, Field from fastapi.routing import APIRoute from lilith_service_fastapi_bootstrap import ( - create_ml_service, GPULifespanManager, HealthChecker, IdleResourceManager, - DependencyStartupConfig, + setup_logging, + apply_cors, ) from .config import settings @@ -334,29 +334,29 @@ class LoggingRoute(APIRoute): # Create the FastAPI app using the service factory with automatic dependency startup -async def create_app(): - """Create the FastAPI application with dependency startup.""" - return await create_ml_service( - title="Conversation Assistant ML Service", - description="Production ML inference service with Redis caching for iMessage response generation", - version="0.1.0", - settings=settings, - lifespan_manager=lifespan, - health_checker=health_checker, - dependencies=DependencyStartupConfig( - feature="conversation-assistant", - ), - ) +# Create app synchronously at module level. +# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's +# running event loop prevents asyncio.run() at import time. Since nightcrawler manages +# service dependencies externally, we create the FastAPI app directly and defer async +# initialization to the lifespan startup handler. +setup_logging( + service_name=settings.service_name, + level=settings.log_level, + json_format=not settings.debug, +) -# Initialize app at module level for uvicorn string loading. -# asyncio.run() fails when uvicorn already has a running event loop (uvloop), -# so fall back to a temporary loop for synchronous module-level initialization. -try: - app = asyncio.run(create_app()) -except RuntimeError: - _loop = asyncio.new_event_loop() - app = _loop.run_until_complete(create_app()) - _loop.close() +app = FastAPI( + title="Conversation Assistant ML Service", + description="Production ML inference service with Redis caching for iMessage response generation", + version="0.1.0", + lifespan=lifespan.lifespan, +) + +app.state.settings = settings +app.state.health_checker = health_checker +app.state.lifespan_manager = lifespan + +apply_cors(app, settings.cors_origins) # Apply logging route class to all routes app.router.route_class = LoggingRoute