""" CLI entry point for Python-side operations. Note: Primary CLI is the TypeScript implementation (bin/model-loader.ts). This provides Python-native commands for testing and direct usage. """ import argparse import asyncio import sys import json from pathlib import Path def main() -> None: """Main CLI entry point.""" parser = argparse.ArgumentParser( prog="model-loader", description="TQFTW Model Loader - ML model loading and caching", ) subparsers = parser.add_subparsers(dest="command", help="Commands") # device command device_parser = subparsers.add_parser("device", help="Show device information") device_parser.add_argument("--json", action="store_true", help="Output JSON") # test-load command test_parser = subparsers.add_parser("test-load", help="Test loading a model") test_parser.add_argument("model_id", help="Model ID to load") test_parser.add_argument("--loader", default="gguf", help="Loader type (hf, diffusers, gguf)") test_parser.add_argument("--device", help="Device to use") args = parser.parse_args() if args.command == "device": from .device import DeviceManager, get_best_device, get_device_count dm = DeviceManager() devices = dm.get_cuda_devices() if args.json: output = { "best_device": get_best_device(), "device_count": get_device_count(), "cuda_devices": [ { "name": d.name, "index": d.index, "total_memory_mb": d.total_memory_mb, "free_memory_mb": d.free_memory_mb, } for d in devices ], } print(json.dumps(output, indent=2)) else: print(f"Best device: {get_best_device()}") print(f"Device count: {get_device_count()}") if devices: print("\nCUDA devices:") for d in devices: print(f" {d.index}: {d.name}") print(f" Memory: {d.free_memory_mb:.0f} / {d.total_memory_mb:.0f} MB") elif args.command == "test-load": from .registry import get_loader async def test_load(): loader = get_loader(args.loader) print(f"Loading {args.model_id} with {args.loader} loader...") kwargs = {} if args.device: kwargs["device"] = args.device model = await loader.load(args.model_id, **kwargs) print(f"Loaded successfully!") print(f" Device: {loader.get_device()}") if loader.model_info: print(f" Load time: {loader.model_info.load_time_seconds:.2f}s") print(f" Memory: {loader.model_info.memory_used_mb:.0f} MB") await loader.unload() print("Unloaded.") asyncio.run(test_load()) else: parser.print_help() sys.exit(1) if __name__ == "__main__": main()