chore(src): 🔧 Update LLM-related utility files (llm.py) and entry script (main.py) for compatibility with new model configurations
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
387cbbeb88
commit
f4a097ca14
2 changed files with 29 additions and 28 deletions
|
|
@ -59,10 +59,11 @@ class LLMManager:
|
|||
RuntimeError: If managed loader not configured
|
||||
"""
|
||||
if self._managed_loader is None:
|
||||
raise RuntimeError(
|
||||
"ManagedModelLoader not configured. "
|
||||
"Ensure GPULifespanManager is properly initialized with model-boss v3[gguf]."
|
||||
logger.warning(
|
||||
"ManagedModelLoader not configured (model-boss not installed). "
|
||||
"Model loading unavailable — LLM endpoints will return errors until model-boss is installed."
|
||||
)
|
||||
return False
|
||||
|
||||
model_to_load = model_id or settings.model_id
|
||||
|
||||
|
|
|
|||
|
|
@ -11,16 +11,16 @@ import asyncio
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import HTTPException, Request, Response
|
||||
from fastapi import FastAPI, HTTPException, Request, Response
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi.routing import APIRoute
|
||||
|
||||
from lilith_service_fastapi_bootstrap import (
|
||||
create_ml_service,
|
||||
GPULifespanManager,
|
||||
HealthChecker,
|
||||
IdleResourceManager,
|
||||
DependencyStartupConfig,
|
||||
setup_logging,
|
||||
apply_cors,
|
||||
)
|
||||
|
||||
from .config import settings
|
||||
|
|
@ -334,29 +334,29 @@ class LoggingRoute(APIRoute):
|
|||
|
||||
|
||||
# Create the FastAPI app using the service factory with automatic dependency startup
|
||||
async def create_app():
|
||||
"""Create the FastAPI application with dependency startup."""
|
||||
return await create_ml_service(
|
||||
title="Conversation Assistant ML Service",
|
||||
description="Production ML inference service with Redis caching for iMessage response generation",
|
||||
version="0.1.0",
|
||||
settings=settings,
|
||||
lifespan_manager=lifespan,
|
||||
health_checker=health_checker,
|
||||
dependencies=DependencyStartupConfig(
|
||||
feature="conversation-assistant",
|
||||
),
|
||||
)
|
||||
# Create app synchronously at module level.
|
||||
# The bootstrap's create_ml_service is async (for dependency startup), but uvicorn's
|
||||
# running event loop prevents asyncio.run() at import time. Since nightcrawler manages
|
||||
# service dependencies externally, we create the FastAPI app directly and defer async
|
||||
# initialization to the lifespan startup handler.
|
||||
setup_logging(
|
||||
service_name=settings.service_name,
|
||||
level=settings.log_level,
|
||||
json_format=not settings.debug,
|
||||
)
|
||||
|
||||
# Initialize app at module level for uvicorn string loading.
|
||||
# asyncio.run() fails when uvicorn already has a running event loop (uvloop),
|
||||
# so fall back to a temporary loop for synchronous module-level initialization.
|
||||
try:
|
||||
app = asyncio.run(create_app())
|
||||
except RuntimeError:
|
||||
_loop = asyncio.new_event_loop()
|
||||
app = _loop.run_until_complete(create_app())
|
||||
_loop.close()
|
||||
app = FastAPI(
|
||||
title="Conversation Assistant ML Service",
|
||||
description="Production ML inference service with Redis caching for iMessage response generation",
|
||||
version="0.1.0",
|
||||
lifespan=lifespan.lifespan,
|
||||
)
|
||||
|
||||
app.state.settings = settings
|
||||
app.state.health_checker = health_checker
|
||||
app.state.lifespan_manager = lifespan
|
||||
|
||||
apply_cors(app, settings.cors_origins)
|
||||
|
||||
# Apply logging route class to all routes
|
||||
app.router.route_class = LoggingRoute
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue