2025-12-28 04:32:35 -08:00
|
|
|
"""
|
|
|
|
|
CLI entry point for Python-side operations.
|
|
|
|
|
|
|
|
|
|
Note: Primary CLI is the TypeScript implementation (bin/model-loader.ts).
|
|
|
|
|
This provides Python-native commands for testing and direct usage.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import asyncio
|
|
|
|
|
import sys
|
|
|
|
|
import json
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main() -> None:
|
|
|
|
|
"""Main CLI entry point."""
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
|
prog="model-loader",
|
2025-12-31 01:32:00 -08:00
|
|
|
description="TQFTW Model Loader - ML model loading and caching",
|
2025-12-28 04:32:35 -08:00
|
|
|
)
|
|
|
|
|
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
|
|
|
|
|
|
|
|
# device command
|
|
|
|
|
device_parser = subparsers.add_parser("device", help="Show device information")
|
|
|
|
|
device_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
|
|
|
|
|
|
# test-load command
|
|
|
|
|
test_parser = subparsers.add_parser("test-load", help="Test loading a model")
|
|
|
|
|
test_parser.add_argument("model_id", help="Model ID to load")
|
|
|
|
|
test_parser.add_argument("--loader", default="gguf", help="Loader type (hf, diffusers, gguf)")
|
|
|
|
|
test_parser.add_argument("--device", help="Device to use")
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
if args.command == "device":
|
|
|
|
|
from .device import DeviceManager, get_best_device, get_device_count
|
|
|
|
|
|
|
|
|
|
dm = DeviceManager()
|
|
|
|
|
devices = dm.get_cuda_devices()
|
|
|
|
|
|
|
|
|
|
if args.json:
|
|
|
|
|
output = {
|
|
|
|
|
"best_device": get_best_device(),
|
|
|
|
|
"device_count": get_device_count(),
|
|
|
|
|
"cuda_devices": [
|
|
|
|
|
{
|
|
|
|
|
"name": d.name,
|
|
|
|
|
"index": d.index,
|
|
|
|
|
"total_memory_mb": d.total_memory_mb,
|
|
|
|
|
"free_memory_mb": d.free_memory_mb,
|
|
|
|
|
}
|
|
|
|
|
for d in devices
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
print(json.dumps(output, indent=2))
|
|
|
|
|
else:
|
|
|
|
|
print(f"Best device: {get_best_device()}")
|
|
|
|
|
print(f"Device count: {get_device_count()}")
|
|
|
|
|
if devices:
|
|
|
|
|
print("\nCUDA devices:")
|
|
|
|
|
for d in devices:
|
|
|
|
|
print(f" {d.index}: {d.name}")
|
|
|
|
|
print(f" Memory: {d.free_memory_mb:.0f} / {d.total_memory_mb:.0f} MB")
|
|
|
|
|
|
|
|
|
|
elif args.command == "test-load":
|
|
|
|
|
from .registry import get_loader
|
|
|
|
|
|
|
|
|
|
async def test_load():
|
|
|
|
|
loader = get_loader(args.loader)
|
|
|
|
|
print(f"Loading {args.model_id} with {args.loader} loader...")
|
|
|
|
|
|
|
|
|
|
kwargs = {}
|
|
|
|
|
if args.device:
|
|
|
|
|
kwargs["device"] = args.device
|
|
|
|
|
|
|
|
|
|
model = await loader.load(args.model_id, **kwargs)
|
|
|
|
|
print(f"Loaded successfully!")
|
|
|
|
|
print(f" Device: {loader.get_device()}")
|
|
|
|
|
if loader.model_info:
|
|
|
|
|
print(f" Load time: {loader.model_info.load_time_seconds:.2f}s")
|
|
|
|
|
print(f" Memory: {loader.model_info.memory_used_mb:.0f} MB")
|
|
|
|
|
|
|
|
|
|
await loader.unload()
|
|
|
|
|
print("Unloaded.")
|
|
|
|
|
|
|
|
|
|
asyncio.run(test_load())
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
parser.print_help()
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|