ml-model-loader/src_python/tqftw_model_loader/cli.py
Lilith aa01d0f388 chore: rename package @lilith/model-loader -> @lilith/ml-model-loader
Package renamed to follow naming convention:
@lilith/{namespace}-{parent}-{child}

Generated by rename-packages.sh
2025-12-31 01:32:00 -08:00

94 lines
3 KiB
Python

"""
CLI entry point for Python-side operations.
Note: Primary CLI is the TypeScript implementation (bin/model-loader.ts).
This provides Python-native commands for testing and direct usage.
"""
import argparse
import asyncio
import sys
import json
from pathlib import Path
def main() -> None:
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
prog="model-loader",
description="TQFTW Model Loader - ML model loading and caching",
)
subparsers = parser.add_subparsers(dest="command", help="Commands")
# device command
device_parser = subparsers.add_parser("device", help="Show device information")
device_parser.add_argument("--json", action="store_true", help="Output JSON")
# test-load command
test_parser = subparsers.add_parser("test-load", help="Test loading a model")
test_parser.add_argument("model_id", help="Model ID to load")
test_parser.add_argument("--loader", default="gguf", help="Loader type (hf, diffusers, gguf)")
test_parser.add_argument("--device", help="Device to use")
args = parser.parse_args()
if args.command == "device":
from .device import DeviceManager, get_best_device, get_device_count
dm = DeviceManager()
devices = dm.get_cuda_devices()
if args.json:
output = {
"best_device": get_best_device(),
"device_count": get_device_count(),
"cuda_devices": [
{
"name": d.name,
"index": d.index,
"total_memory_mb": d.total_memory_mb,
"free_memory_mb": d.free_memory_mb,
}
for d in devices
],
}
print(json.dumps(output, indent=2))
else:
print(f"Best device: {get_best_device()}")
print(f"Device count: {get_device_count()}")
if devices:
print("\nCUDA devices:")
for d in devices:
print(f" {d.index}: {d.name}")
print(f" Memory: {d.free_memory_mb:.0f} / {d.total_memory_mb:.0f} MB")
elif args.command == "test-load":
from .registry import get_loader
async def test_load():
loader = get_loader(args.loader)
print(f"Loading {args.model_id} with {args.loader} loader...")
kwargs = {}
if args.device:
kwargs["device"] = args.device
model = await loader.load(args.model_id, **kwargs)
print(f"Loaded successfully!")
print(f" Device: {loader.get_device()}")
if loader.model_info:
print(f" Load time: {loader.model_info.load_time_seconds:.2f}s")
print(f" Memory: {loader.model_info.memory_used_mb:.0f} MB")
await loader.unload()
print("Unloaded.")
asyncio.run(test_load())
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()