diff --git a/features/conversation-assistant/ml-service/src/llm.py b/features/conversation-assistant/ml-service/src/llm.py index 898ad5b60..5d3d0a18a 100755 --- a/features/conversation-assistant/ml-service/src/llm.py +++ b/features/conversation-assistant/ml-service/src/llm.py @@ -59,10 +59,11 @@ class LLMManager: RuntimeError: If managed loader not configured """ if self._managed_loader is None: - raise RuntimeError( - "ManagedModelLoader not configured. " - "Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]." + logger.warning( + "ManagedModelLoader not configured (model-boss not installed). " + "Model loading unavailable — LLM endpoints will return errors until model-boss is installed." ) + return False model_to_load = model_id or settings.model_id diff --git a/features/conversation-assistant/ml-service/src/main.py b/features/conversation-assistant/ml-service/src/main.py index 7de78a7df..cf9c10698 100755 --- a/features/conversation-assistant/ml-service/src/main.py +++ b/features/conversation-assistant/ml-service/src/main.py @@ -11,16 +11,16 @@ import asyncio import uuid from datetime import datetime -from fastapi import HTTPException, Request, Response +from fastapi import FastAPI, HTTPException, Request, Response from pydantic import BaseModel, Field from fastapi.routing import APIRoute from lilith_service_fastapi_bootstrap import ( - create_ml_service, GPULifespanManager, HealthChecker, IdleResourceManager, - DependencyStartupConfig, + setup_logging, + apply_cors, ) from .config import settings @@ -334,29 +334,29 @@ class LoggingRoute(APIRoute): # Create the FastAPI app using the service factory with automatic dependency startup -async def create_app(): - """Create the FastAPI application with dependency startup.""" - return await create_ml_service( - title="Conversation Assistant ML Service", - description="Production ML inference service with Redis caching for iMessage response generation", - version="0.1.0", - settings=settings, - lifespan_manager=lifespan, - health_checker=health_checker, - dependencies=DependencyStartupConfig( - feature="conversation-assistant", - ), - ) +# Create app synchronously at module level. +# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's +# running event loop prevents asyncio.run() at import time. Since nightcrawler manages +# service dependencies externally, we create the FastAPI app directly and defer async +# initialization to the lifespan startup handler. +setup_logging( + service_name=settings.service_name, + level=settings.log_level, + json_format=not settings.debug, +) -# Initialize app at module level for uvicorn string loading. -# asyncio.run() fails when uvicorn already has a running event loop (uvloop), -# so fall back to a temporary loop for synchronous module-level initialization. -try: - app = asyncio.run(create_app()) -except RuntimeError: - _loop = asyncio.new_event_loop() - app = _loop.run_until_complete(create_app()) - _loop.close() +app = FastAPI( + title="Conversation Assistant ML Service", + description="Production ML inference service with Redis caching for iMessage response generation", + version="0.1.0", + lifespan=lifespan.lifespan, +) + +app.state.settings = settings +app.state.health_checker = health_checker +app.state.lifespan_manager = lifespan + +apply_cors(app, settings.cors_origins) # Apply logging route class to all routes app.router.route_class = LoggingRoute