ml-model-loader/src_python/tqftw_model_loader/cli.py

"""
CLI entry point for Python-side operations.

Note: Primary CLI is the TypeScript implementation (bin/model-loader.ts).
This provides Python-native commands for testing and direct usage.
"""

import argparse
import asyncio
import sys
import json
from pathlib import Path


def main() -> None:
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        prog="model-loader",
        description="TQFTW Model Loader - ML model loading and caching",
    )
    subparsers = parser.add_subparsers(dest="command", help="Commands")

    # device command
    device_parser = subparsers.add_parser("device", help="Show device information")
    device_parser.add_argument("--json", action="store_true", help="Output JSON")

    # test-load command
    test_parser = subparsers.add_parser("test-load", help="Test loading a model")
    test_parser.add_argument("model_id", help="Model ID to load")
    test_parser.add_argument("--loader", default="gguf", help="Loader type (hf, diffusers, gguf)")
    test_parser.add_argument("--device", help="Device to use")

    args = parser.parse_args()

    if args.command == "device":
        from .device import DeviceManager, get_best_device, get_device_count

        dm = DeviceManager()
        devices = dm.get_cuda_devices()

        if args.json:
            output = {
                "best_device": get_best_device(),
                "device_count": get_device_count(),
                "cuda_devices": [
                    {
                        "name": d.name,
                        "index": d.index,
                        "total_memory_mb": d.total_memory_mb,
                        "free_memory_mb": d.free_memory_mb,
                    }
                    for d in devices
                ],
            }
            print(json.dumps(output, indent=2))
        else:
            print(f"Best device: {get_best_device()}")
            print(f"Device count: {get_device_count()}")
            if devices:
                print("\nCUDA devices:")
                for d in devices:
                    print(f"  {d.index}: {d.name}")
                    print(f"      Memory: {d.free_memory_mb:.0f} / {d.total_memory_mb:.0f} MB")

    elif args.command == "test-load":
        from .registry import get_loader

        async def test_load():
            loader = get_loader(args.loader)
            print(f"Loading {args.model_id} with {args.loader} loader...")

            kwargs = {}
            if args.device:
                kwargs["device"] = args.device

            model = await loader.load(args.model_id, **kwargs)
            print(f"Loaded successfully!")
            print(f"  Device: {loader.get_device()}")
            if loader.model_info:
                print(f"  Load time: {loader.model_info.load_time_seconds:.2f}s")
                print(f"  Memory: {loader.model_info.memory_used_mb:.0f} MB")

            await loader.unload()
            print("Unloaded.")

        asyncio.run(test_load())

    else:
        parser.print_help()
        sys.exit(1)


if __name__ == "__main__":
    main()
initial: model-loader package with CI 2025-12-28 04:32:35 -08:00			`"""`
			`CLI entry point for Python-side operations.`

			`Note: Primary CLI is the TypeScript implementation (bin/model-loader.ts).`
			`This provides Python-native commands for testing and direct usage.`
			`"""`

			`import argparse`
			`import asyncio`
			`import sys`
			`import json`
			`from pathlib import Path`


			`def main() -> None:`
			`"""Main CLI entry point."""`
			`parser = argparse.ArgumentParser(`
			`prog="model-loader",`
chore: rename package @lilith/model-loader -> @lilith/ml-model-loader Package renamed to follow naming convention: @lilith/{namespace}-{parent}-{child} Generated by rename-packages.sh 2025-12-31 01:32:00 -08:00			`description="TQFTW Model Loader - ML model loading and caching",`
initial: model-loader package with CI 2025-12-28 04:32:35 -08:00			`)`
			`subparsers = parser.add_subparsers(dest="command", help="Commands")`

			`# device command`
			`device_parser = subparsers.add_parser("device", help="Show device information")`
			`device_parser.add_argument("--json", action="store_true", help="Output JSON")`

			`# test-load command`
			`test_parser = subparsers.add_parser("test-load", help="Test loading a model")`
			`test_parser.add_argument("model_id", help="Model ID to load")`
			`test_parser.add_argument("--loader", default="gguf", help="Loader type (hf, diffusers, gguf)")`
			`test_parser.add_argument("--device", help="Device to use")`

			`args = parser.parse_args()`

			`if args.command == "device":`
			`from .device import DeviceManager, get_best_device, get_device_count`

			`dm = DeviceManager()`
			`devices = dm.get_cuda_devices()`

			`if args.json:`
			`output = {`
			`"best_device": get_best_device(),`
			`"device_count": get_device_count(),`
			`"cuda_devices": [`
			`{`
			`"name": d.name,`
			`"index": d.index,`
			`"total_memory_mb": d.total_memory_mb,`
			`"free_memory_mb": d.free_memory_mb,`
			`}`
			`for d in devices`
			`],`
			`}`
			`print(json.dumps(output, indent=2))`
			`else:`
			`print(f"Best device: {get_best_device()}")`
			`print(f"Device count: {get_device_count()}")`
			`if devices:`
			`print("\nCUDA devices:")`
			`for d in devices:`
			`print(f" {d.index}: {d.name}")`
			`print(f" Memory: {d.free_memory_mb:.0f} / {d.total_memory_mb:.0f} MB")`

			`elif args.command == "test-load":`
			`from .registry import get_loader`

			`async def test_load():`
			`loader = get_loader(args.loader)`
			`print(f"Loading {args.model_id} with {args.loader} loader...")`

			`kwargs = {}`
			`if args.device:`
			`kwargs["device"] = args.device`

			`model = await loader.load(args.model_id, **kwargs)`
			`print(f"Loaded successfully!")`
			`print(f" Device: {loader.get_device()}")`
			`if loader.model_info:`
			`print(f" Load time: {loader.model_info.load_time_seconds:.2f}s")`
			`print(f" Memory: {loader.model_info.memory_used_mb:.0f} MB")`

			`await loader.unload()`
			`print("Unloaded.")`

			`asyncio.run(test_load())`

			`else:`
			`parser.print_help()`
			`sys.exit(1)`


			`if __name__ == "__main__":`
			`main()`