ml-model-loader/src_python/tqftw_model_loader/__init__.py
Lilith bbbccd685b feat: universal ML model support with auto-discovery
Add support for all ML model types beyond GGUF:
- New discovery module for auto-scanning model directories
- Detect formats: GGUF, safetensors, ONNX, PyTorch, diffusion pipelines
- CLI commands: discover, scan, sync for manifest management
- Manifest v2.0 with format field, directory support, file lists

Python loaders (v2.0.0):
- ONNXLoader with CUDA/TensorRT execution providers
- WhisperLoader for faster-whisper with transcribe/stream
- get_auto_loader() for automatic backend selection

Breaking: Manifest schema upgraded to v2.0 (auto-migrates v1.x on load)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-01 15:21:52 -08:00

190 lines
4.5 KiB
Python

"""
TQFTW Model Loader - Unified ML model loading and caching.
Provides a consistent interface for loading models across different frameworks:
- HuggingFace Transformers (text, classification, QA, etc.)
- Diffusers (Stable Diffusion, SDXL, Flux)
- GGUF (llama-cpp-python for quantized models)
- ONNX Runtime (cross-platform inference)
- faster-whisper (speech recognition)
Features:
- Auto-discovery of models from directories
- Remote model fetching via rsync/scp
- Local caching with size verification
- GPU/CPU device management
- Manifest-based model registry (v2.0)
- Support for multi-file/directory models
- Async and sync loading APIs
Example:
>>> from tqftw_model_loader import get_loader, ensure_model, get_auto_loader
>>> # Quick path resolution
>>> path = ensure_model("ministral-3b-instruct")
>>> # Auto-select loader based on manifest format
>>> loader = get_auto_loader("whisper-large-v3")
>>> model = await loader.load("whisper-large-v3")
>>> # Load with framework-specific loader
>>> loader = get_loader("hf")
>>> model = await loader.load("Marqo/nsfw-image-detection-384", task="image-classification")
>>> # Load SDXL
>>> loader = get_loader("diffusers")
>>> pipeline = await loader.load("stabilityai/stable-diffusion-xl-base-1.0")
>>> # Load GGUF model
>>> loader = get_loader("gguf")
>>> llm = await loader.load("ministral-3b-instruct", n_gpu_layers=-1)
>>> # Load ONNX model
>>> loader = get_loader("onnx")
>>> session = await loader.load("silero-vad")
>>> # Load Whisper model
>>> loader = get_loader("whisper")
>>> model = await loader.load("large-v3", device="cuda")
"""
__version__ = "2.0.0"
# Base classes and types
from .base import (
BaseModelLoader,
ModelInfo,
ModelLoadError,
ModelNotFoundError,
DeviceNotAvailableError,
)
from .types import (
ModelEntry,
LoadResult,
TransferProgress,
CachedModel,
CacheStats,
ModelFormat,
ModelCategory,
DiffusionComponent,
MODEL_FILE_EXTENSIONS,
)
# Device management
from .device import (
DeviceManager,
DeviceInfo,
get_best_device,
get_device_count,
allocate_device,
clear_gpu_cache,
)
# Registry
from .registry import (
register_loader,
get_loader,
get_loader_class,
list_loaders,
list_aliases,
is_loader_registered,
)
# Original loader (path resolution and caching)
from .loader import (
ModelLoader as PathModelLoader,
ensure_model,
ensure_model_sync,
get_loader as get_path_loader,
)
# Backwards compatibility
ModelLoader = PathModelLoader
# Framework-specific loaders (imported lazily to avoid dependency issues)
# These will auto-register with the registry when imported
try:
from .hf_loader import HFModelLoader
except ImportError:
HFModelLoader = None # type: ignore
try:
from .diffusers_loader import DiffusersLoader
except ImportError:
DiffusersLoader = None # type: ignore
try:
from .gguf_loader import GGUFModelLoader
except ImportError:
GGUFModelLoader = None # type: ignore
try:
from .onnx_loader import ONNXLoader
except ImportError:
ONNXLoader = None # type: ignore
try:
from .whisper_loader import WhisperLoader
except ImportError:
WhisperLoader = None # type: ignore
# Auto-loader selection
from .auto import (
get_auto_loader,
detect_format_from_path,
get_loader_for_format,
get_loader_for_category,
)
__all__ = [
# Version
"__version__",
# Base classes
"BaseModelLoader",
"ModelInfo",
"ModelLoadError",
"ModelNotFoundError",
"DeviceNotAvailableError",
# Types
"ModelEntry",
"LoadResult",
"TransferProgress",
"CachedModel",
"CacheStats",
"ModelFormat",
"ModelCategory",
"DiffusionComponent",
"MODEL_FILE_EXTENSIONS",
# Device management
"DeviceManager",
"DeviceInfo",
"get_best_device",
"get_device_count",
"allocate_device",
"clear_gpu_cache",
# Registry
"register_loader",
"get_loader",
"get_loader_class",
"list_loaders",
"list_aliases",
"is_loader_registered",
# Auto-loader selection
"get_auto_loader",
"detect_format_from_path",
"get_loader_for_format",
"get_loader_for_category",
# Path resolution (original API)
"ModelLoader",
"PathModelLoader",
"ensure_model",
"ensure_model_sync",
"get_path_loader",
# Framework loaders
"HFModelLoader",
"DiffusersLoader",
"GGUFModelLoader",
"ONNXLoader",
"WhisperLoader",
]