model-boss/install

#!/usr/bin/env bash
# =============================================================================
# Install model-boss services (Redis, coordinator, multi-model llama-http)
# =============================================================================
# This script:
# 1. Installs Redis via Homebrew if not present
# 2. Starts Redis service
# 3. Installs Python dependencies for coordinator and llama-http
# 4. Creates systemd user services (including multi-model llama-http instances)
# 5. Initializes GPUs from nvidia-smi
# 6. Starts services in dependency order
#
# Multi-Model Architecture:
#   Each model runs as a separate llama-http instance with its own port:
#   - llama-http-3b:  ministral-3b-instruct  (port 10010, internal 10009)
#   - llama-http-14b: ministral-14b-reasoning (port 10020, internal 10019)
#
# Redis Configuration:
#   - URL: redis://localhost:6379 (default Redis db 0)
#   - Key prefixes: gpu:* (GPU leases), ram:* (RAM coordination)
#   - Persistence: RDB snapshots (default), GPU init re-runs on startup
#
# Environment Variables (optional):
#   MODEL_BOSS_REDIS_URL     - Override Redis URL (default: redis://localhost:6379)
#   MODEL_BOSS_PORT          - Coordinator port (default: 8210)
#   INSTALL_MODELS           - Comma-separated list of models (default: 3b,14b)
# =============================================================================
set -euo pipefail

# Model configurations (name:model_id:port:internal_port:context_size)
declare -A MODEL_CONFIGS=(
    ["3b"]="ministral-3b-instruct:10010:10009:4096"
    ["14b"]="ministral-14b-reasoning:10020:10019:8192"
)

# Parse INSTALL_MODELS environment variable (default: both models)
INSTALL_MODELS="${INSTALL_MODELS:-3b,14b}"

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

echo "==> Installing model-boss services..."

# =============================================================================
# Step 1: Ensure Redis is installed and running
# =============================================================================
echo ""
echo "==> Checking Redis..."

REDIS_SERVER=""
if command -v redis-server >/dev/null 2>&1; then
    REDIS_SERVER="redis-server"
elif [ -x "/home/linuxbrew/.linuxbrew/bin/redis-server" ]; then
    REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
fi

if [ -z "$REDIS_SERVER" ]; then
    echo "    Redis not found, installing via Homebrew..."
    if ! command -v brew >/dev/null 2>&1; then
        echo "ERROR: Homebrew not found. Please install Homebrew first."
        exit 1
    fi
    brew install redis
    REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
fi

echo "    ✓ Redis installed: $REDIS_SERVER"

# Start Redis service
echo "==> Starting Redis service..."
if brew services list 2>/dev/null | grep -q "redis.*started"; then
    echo "    ✓ Redis already running"
else
    brew services start redis
    sleep 2
    echo "    ✓ Redis started"
fi

# Verify Redis is responding
if redis-cli ping >/dev/null 2>&1; then
    echo "    ✓ Redis responding on localhost:6379"
else
    echo "ERROR: Redis not responding after start"
    exit 1
fi

# Verify Redis is writable (model-boss uses db 0 with gpu:* and ram:* prefixes)
if redis-cli set "model-boss:install-test" "ok" EX 10 >/dev/null 2>&1; then
    redis-cli del "model-boss:install-test" >/dev/null 2>&1
    echo "    ✓ Redis writable (using db 0, gpu:*/ram:* key prefixes)"
else
    echo "ERROR: Redis not writable"
    exit 1
fi

# =============================================================================
# Step 2: Install Python packages
# =============================================================================
echo ""
echo "==> Installing model-boss-coordinator..."
COORDINATOR_PATH="$SCRIPT_DIR/services/coordinator/service"
if [ -d "$COORDINATOR_PATH" ]; then
    if [ ! -d "$COORDINATOR_PATH/.venv" ]; then
        echo "    Creating coordinator venv..."
        python -m venv "$COORDINATOR_PATH/.venv"
    fi
    echo "    Installing coordinator dependencies..."
    "$COORDINATOR_PATH/.venv/bin/pip" install -e "$COORDINATOR_PATH" --quiet

    # Add .pth files so the venv can find model_boss_loaders and user ML packages (torch etc.)
    SITE_PACKAGES="$("$COORDINATOR_PATH/.venv/bin/python" -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"
    echo "/var/home/lilith/Code/@applications/@model-boss/packages/loaders-py/src" \
        > "$SITE_PACKAGES/_model_boss_loaders.pth"
    echo "/var/home/lilith/.local/lib/python3.12/site-packages" \
        > "$SITE_PACKAGES/_ml_user_site.pth"

    echo "    ✓ model-boss-coordinator installed"
else
    echo "    WARNING: Coordinator path not found: $COORDINATOR_PATH"
fi

echo ""
echo "==> Installing llama-http..."
LLAMA_HTTP_PATH="$SCRIPT_DIR/services/llama-http/service"
if [ -d "$LLAMA_HTTP_PATH" ]; then
    if [ ! -d "$LLAMA_HTTP_PATH/.venv" ]; then
        echo "    Creating llama-http venv..."
        python -m venv "$LLAMA_HTTP_PATH/.venv"
    fi
    echo "    Installing llama-http dependencies..."
    "$LLAMA_HTTP_PATH/.venv/bin/pip" install -e "$LLAMA_HTTP_PATH" --quiet
    echo "    ✓ llama-http installed"
else
    echo "    WARNING: llama-http path not found: $LLAMA_HTTP_PATH"
fi

# =============================================================================
# Step 2b: Build frontend
# =============================================================================
echo ""
echo "==> Building frontend..."
FRONTEND_PATH="$SCRIPT_DIR/frontend"
if [ -d "$FRONTEND_PATH" ]; then
    cd "$FRONTEND_PATH"
    if [ ! -d "node_modules" ]; then
        echo "    Installing Node dependencies..."
        pnpm install
    fi
    echo "    Building frontend..."
    pnpm build:prod
    echo "    ✓ Frontend built to dist/"
    cd "$SCRIPT_DIR"
else
    echo "    WARNING: Frontend path not found: $FRONTEND_PATH"
fi

# =============================================================================
# Step 3: Create systemd user services
# =============================================================================
echo ""
echo "==> Setting up systemd user services..."
mkdir -p ~/.config/systemd/user

# Model Boss Coordinator service
cat > ~/.config/systemd/user/model-boss-coordinator.service << EOF
[Unit]
Description=Model Boss Coordinator (GPU/VRAM lease management)
After=network.target homebrew.redis.service
Wants=homebrew.redis.service

[Service]
Type=simple
WorkingDirectory=$COORDINATOR_PATH
ExecStart=$COORDINATOR_PATH/.venv/bin/python -m model_boss_coordinator
ExecStartPost=/bin/bash -c 'sleep 3 && $SCRIPT_DIR/scripts/init-gpus.sh || true'
Restart=on-failure
RestartSec=10
StandardOutput=journal
StandardError=journal

Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="MODEL_BOSS_PORT=8210"
Environment="MODEL_BOSS_HOST=0.0.0.0"

[Install]
WantedBy=default.target
EOF

# Clean up old broken llama-http services that point to non-existent paths
echo "    Cleaning up old llama-http services..."
for old_svc in llama-http.service llama-http-3b.service llama-http-14b.service; do
    if [ -f ~/.config/systemd/user/"$old_svc" ]; then
        systemctl --user stop "$old_svc" 2>/dev/null || true
        systemctl --user disable "$old_svc" 2>/dev/null || true
    fi
done

# Create multi-model llama-http services
echo "    Creating multi-model llama-http services..."
IFS=',' read -ra MODELS <<< "$INSTALL_MODELS"
CREATED_SERVICES=()

for model_key in "${MODELS[@]}"; do
    model_key=$(echo "$model_key" | xargs)  # Trim whitespace
    if [ -z "${MODEL_CONFIGS[$model_key]:-}" ]; then
        echo "    WARNING: Unknown model '$model_key', skipping"
        continue
    fi

    # Parse config: model_id:port:internal_port:context_size
    IFS=':' read -r model_id port internal_port context_size <<< "${MODEL_CONFIGS[$model_key]}"
    service_name="llama-http-${model_key}"

    echo "    Creating $service_name ($model_id on port $port)..."

    cat > ~/.config/systemd/user/"${service_name}.service" << EOF
[Unit]
Description=Llama HTTP Service - ${model_key^^} ($model_id)
After=network.target model-boss-coordinator.service
Wants=model-boss-coordinator.service

[Service]
Type=simple
WorkingDirectory=$LLAMA_HTTP_PATH
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
Restart=on-failure
RestartSec=30
StandardOutput=journal
StandardError=journal

# Environment for ${model_key} model
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="LLAMA_HTTP_SERVICE_NAME=${service_name}"
Environment="LLAMA_HTTP_PORT=${port}"
Environment="LLAMA_HTTP_MODEL_ID=${model_id}"
Environment="LLAMA_HTTP_CONTEXT_SIZE=${context_size}"
Environment="LLAMA_HTTP_N_GPU_LAYERS=-1"
Environment="LLAMA_HTTP_LLAMA_SERVER_PORT=${internal_port}"
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"

[Install]
WantedBy=default.target
EOF

    CREATED_SERVICES+=("${service_name}.service")
done

# Create a generic llama-http.service that defaults to 3b model (for backwards compatibility)
cat > ~/.config/systemd/user/llama-http.service << EOF
[Unit]
Description=Llama HTTP Service (default: ministral-3b-instruct)
After=network.target model-boss-coordinator.service
Wants=model-boss-coordinator.service

[Service]
Type=simple
WorkingDirectory=$LLAMA_HTTP_PATH
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
Restart=on-failure
RestartSec=30
StandardOutput=journal
StandardError=journal

Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="LLAMA_HTTP_SERVICE_NAME=llama-http"
Environment="LLAMA_HTTP_PORT=10010"
Environment="LLAMA_HTTP_MODEL_ID=ministral-3b-instruct"
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"

[Install]
WantedBy=default.target
EOF

echo "    ✓ systemd services created (${#CREATED_SERVICES[@]} model instances)"

# =============================================================================
# Step 4: Create GPU initialization script
# =============================================================================
echo ""
echo "==> Creating GPU initialization script..."
mkdir -p "$SCRIPT_DIR/scripts"

cat > "$SCRIPT_DIR/scripts/init-gpus.sh" << 'GPUSCRIPT'
#!/usr/bin/env bash
# Initialize GPUs in model-boss from nvidia-smi
# Called by systemd after model-boss-coordinator starts

set -euo pipefail

# Wait for coordinator to be ready
MAX_WAIT=30
WAITED=0
while ! curl -s http://localhost:8210/ready >/dev/null 2>&1; do
    sleep 1
    WAITED=$((WAITED + 1))
    if [ $WAITED -ge $MAX_WAIT ]; then
        echo "ERROR: model-boss-coordinator not ready after ${MAX_WAIT}s"
        exit 1
    fi
done

# Check if GPUs already initialized
GPU_COUNT=$(curl -s http://localhost:8210/api/v1/gpu/status 2>/dev/null | jq -r '.gpus | length' 2>/dev/null || echo "0")
if [ "$GPU_COUNT" -gt 0 ]; then
    echo "GPUs already initialized ($GPU_COUNT GPUs)"
    exit 0
fi

# Detect GPUs from nvidia-smi
if ! command -v nvidia-smi >/dev/null 2>&1; then
    echo "nvidia-smi not found, skipping GPU initialization"
    exit 0
fi

echo "Initializing GPUs from nvidia-smi..."

# Parse nvidia-smi output and initialize each GPU
GPU_INDEX=0
while IFS=',' read -r name memory_total; do
    # Clean up values
    name=$(echo "$name" | xargs)
    memory_total=$(echo "$memory_total" | sed 's/[^0-9]//g')

    if [ -n "$memory_total" ] && [ "$memory_total" -gt 0 ]; then
        echo "  GPU $GPU_INDEX: $name (${memory_total} MB)"
        model-boss gpu init "$GPU_INDEX" "$memory_total" --name "$name" 2>/dev/null || true
        GPU_INDEX=$((GPU_INDEX + 1))
    fi
done < <(nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader,nounits 2>/dev/null)

if [ $GPU_INDEX -eq 0 ]; then
    echo "No GPUs detected"
else
    echo "✓ Initialized $GPU_INDEX GPU(s)"
fi
GPUSCRIPT

chmod +x "$SCRIPT_DIR/scripts/init-gpus.sh"
echo "    ✓ GPU init script created"

# =============================================================================
# Step 5: Reload systemd and enable services
# =============================================================================
echo ""
echo "==> Reloading systemd..."
systemctl --user daemon-reload

echo "==> Enabling services..."
systemctl --user enable model-boss-coordinator.service
for svc in "${CREATED_SERVICES[@]}"; do
    systemctl --user enable "$svc"
done

echo "==> Enabling lingering (services run after logout)..."
loginctl enable-linger "$(whoami)" 2>/dev/null || true

# =============================================================================
# Step 6: Start services
# =============================================================================
echo ""
echo "==> Starting services in dependency order..."

echo "    Starting model-boss-coordinator..."
systemctl --user start model-boss-coordinator.service
sleep 3

# Initialize GPUs manually if ExecStartPost didn't run
echo "    Initializing GPUs..."
"$SCRIPT_DIR/scripts/init-gpus.sh" || true
sleep 2

# Start all configured llama-http instances
for svc in "${CREATED_SERVICES[@]}"; do
    echo "    Starting $svc..."
    systemctl --user start "$svc" || true
done

# Wait for first model to be ready (3b is fastest to load)
echo "    Waiting for llama-http instances to initialize..."
FIRST_PORT=""
for model_key in "${MODELS[@]}"; do
    model_key=$(echo "$model_key" | xargs)
    if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
        IFS=':' read -r _ port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
        FIRST_PORT="$port"
        break
    fi
done

if [ -n "$FIRST_PORT" ]; then
    MAX_WAIT=120
    WAITED=0
    while ! curl -s "http://localhost:$FIRST_PORT/health" 2>/dev/null | grep -q '"status":"ok"'; do
        sleep 5
        WAITED=$((WAITED + 5))
        echo "      ... waiting ($WAITED/${MAX_WAIT}s)"
        if [ $WAITED -ge $MAX_WAIT ]; then
            echo "    WARNING: llama-http not ready after ${MAX_WAIT}s"
            break
        fi
    done
fi

# =============================================================================
# Summary
# =============================================================================
echo ""
echo "==> Installation complete!"
echo ""
echo "Service status:"
systemctl --user status homebrew.redis.service --no-pager 2>/dev/null | head -3 || echo "  Redis: managed by Homebrew"
systemctl --user status model-boss-coordinator.service --no-pager 2>/dev/null | head -3 || true
for svc in "${CREATED_SERVICES[@]}"; do
    systemctl --user status "$svc" --no-pager 2>/dev/null | head -3 || true
done

echo ""
echo "GPU status:"
model-boss gpu status 2>/dev/null || echo "  Run 'model-boss gpu status' to check"

echo ""
echo "Endpoints:"
echo "  Redis:                  localhost:6379 (db 0)"
echo "  model-boss-coordinator: http://localhost:8210"
echo ""
echo "Multi-Model Instances:"
for model_key in "${MODELS[@]}"; do
    model_key=$(echo "$model_key" | xargs)
    if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
        IFS=':' read -r model_id port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
        echo "  llama-http-${model_key}: http://localhost:$port ($model_id)"
    fi
done

echo ""
echo "Redis keys used:"
echo "  gpu:*   - GPU status, leases, heartbeats"
echo "  ram:*   - RAM coordination (if enabled)"
echo ""
echo "Environment variables:"
echo "  MODEL_BOSS_REDIS_URL   - Override Redis URL"
echo "  MODEL_BOSS_PORT        - Coordinator port"
echo "  INSTALL_MODELS         - Models to install (default: 3b,14b)"
echo ""
echo "Commands:"
echo "  model-boss gpu status  - Show GPU status and leases"
echo "  redis-cli keys 'gpu:*' - Show model-boss Redis keys"
echo "  journalctl --user -u model-boss-coordinator -f  - Coordinator logs"
for svc in "${CREATED_SERVICES[@]}"; do
    echo "  journalctl --user -u ${svc%.service} -f"
done