model-boss/install
Claude Code 1094aea576 chore(install-named): 🔧 Update named installation script to enforce strict dependency version pinning
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-03-20 07:26:32 -07:00

454 lines
16 KiB
Bash
Executable file

#!/usr/bin/env bash
# =============================================================================
# Install model-boss services (Redis, coordinator, multi-model llama-http)
# =============================================================================
# This script:
# 1. Installs Redis via Homebrew if not present
# 2. Starts Redis service
# 3. Installs Python dependencies for coordinator and llama-http
# 4. Creates systemd user services (including multi-model llama-http instances)
# 5. Initializes GPUs from nvidia-smi
# 6. Starts services in dependency order
#
# Multi-Model Architecture:
# Each model runs as a separate llama-http instance with its own port:
# - llama-http-3b: ministral-3b-instruct (port 10010, internal 10009)
# - llama-http-14b: ministral-14b-reasoning (port 10020, internal 10019)
#
# Redis Configuration:
# - URL: redis://localhost:6379 (default Redis db 0)
# - Key prefixes: gpu:* (GPU leases), ram:* (RAM coordination)
# - Persistence: RDB snapshots (default), GPU init re-runs on startup
#
# Environment Variables (optional):
# MODEL_BOSS_REDIS_URL - Override Redis URL (default: redis://localhost:6379)
# MODEL_BOSS_PORT - Coordinator port (default: 8210)
# INSTALL_MODELS - Comma-separated list of models (default: 3b,14b)
# =============================================================================
set -euo pipefail
# Model configurations (name:model_id:port:internal_port:context_size)
declare -A MODEL_CONFIGS=(
["3b"]="ministral-3b-instruct:10010:10009:4096"
["14b"]="ministral-14b-reasoning:10020:10019:8192"
)
# Parse INSTALL_MODELS environment variable (default: both models)
INSTALL_MODELS="${INSTALL_MODELS:-3b,14b}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "==> Installing model-boss services..."
# =============================================================================
# Step 1: Ensure Redis is installed and running
# =============================================================================
echo ""
echo "==> Checking Redis..."
REDIS_SERVER=""
if command -v redis-server >/dev/null 2>&1; then
REDIS_SERVER="redis-server"
elif [ -x "/home/linuxbrew/.linuxbrew/bin/redis-server" ]; then
REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
fi
if [ -z "$REDIS_SERVER" ]; then
echo " Redis not found, installing via Homebrew..."
if ! command -v brew >/dev/null 2>&1; then
echo "ERROR: Homebrew not found. Please install Homebrew first."
exit 1
fi
brew install redis
REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
fi
echo " ✓ Redis installed: $REDIS_SERVER"
# Start Redis service
echo "==> Starting Redis service..."
if brew services list 2>/dev/null | grep -q "redis.*started"; then
echo " ✓ Redis already running"
else
brew services start redis
sleep 2
echo " ✓ Redis started"
fi
# Verify Redis is responding
if redis-cli ping >/dev/null 2>&1; then
echo " ✓ Redis responding on localhost:6379"
else
echo "ERROR: Redis not responding after start"
exit 1
fi
# Verify Redis is writable (model-boss uses db 0 with gpu:* and ram:* prefixes)
if redis-cli set "model-boss:install-test" "ok" EX 10 >/dev/null 2>&1; then
redis-cli del "model-boss:install-test" >/dev/null 2>&1
echo " ✓ Redis writable (using db 0, gpu:*/ram:* key prefixes)"
else
echo "ERROR: Redis not writable"
exit 1
fi
# =============================================================================
# Step 2: Install Python packages
# =============================================================================
echo ""
echo "==> Installing model-boss-coordinator..."
COORDINATOR_PATH="$SCRIPT_DIR/services/coordinator/service"
if [ -d "$COORDINATOR_PATH" ]; then
if [ ! -d "$COORDINATOR_PATH/.venv" ]; then
echo " Creating coordinator venv..."
python -m venv "$COORDINATOR_PATH/.venv"
fi
echo " Installing coordinator dependencies..."
"$COORDINATOR_PATH/.venv/bin/pip" install -e "$COORDINATOR_PATH" --quiet
# Add .pth files so the venv can find model_boss_loaders and user ML packages (torch etc.)
SITE_PACKAGES="$("$COORDINATOR_PATH/.venv/bin/python" -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"
echo "/var/home/lilith/Code/@applications/@model-boss/packages/loaders-py/src" \
> "$SITE_PACKAGES/_model_boss_loaders.pth"
echo "/var/home/lilith/.local/lib/python3.12/site-packages" \
> "$SITE_PACKAGES/_ml_user_site.pth"
echo " ✓ model-boss-coordinator installed"
else
echo " WARNING: Coordinator path not found: $COORDINATOR_PATH"
fi
echo ""
echo "==> Installing llama-http..."
LLAMA_HTTP_PATH="$SCRIPT_DIR/services/llama-http/service"
if [ -d "$LLAMA_HTTP_PATH" ]; then
if [ ! -d "$LLAMA_HTTP_PATH/.venv" ]; then
echo " Creating llama-http venv..."
python -m venv "$LLAMA_HTTP_PATH/.venv"
fi
echo " Installing llama-http dependencies..."
"$LLAMA_HTTP_PATH/.venv/bin/pip" install -e "$LLAMA_HTTP_PATH" --quiet
echo " ✓ llama-http installed"
else
echo " WARNING: llama-http path not found: $LLAMA_HTTP_PATH"
fi
# =============================================================================
# Step 2b: Build frontend
# =============================================================================
echo ""
echo "==> Building frontend..."
FRONTEND_PATH="$SCRIPT_DIR/frontend"
if [ -d "$FRONTEND_PATH" ]; then
cd "$FRONTEND_PATH"
if [ ! -d "node_modules" ]; then
echo " Installing Node dependencies..."
pnpm install
fi
echo " Building frontend..."
pnpm build:prod
echo " ✓ Frontend built to dist/"
cd "$SCRIPT_DIR"
else
echo " WARNING: Frontend path not found: $FRONTEND_PATH"
fi
# =============================================================================
# Step 3: Create systemd user services
# =============================================================================
echo ""
echo "==> Setting up systemd user services..."
mkdir -p ~/.config/systemd/user
# Model Boss Coordinator service
cat > ~/.config/systemd/user/model-boss-coordinator.service << EOF
[Unit]
Description=Model Boss Coordinator (GPU/VRAM lease management)
After=network.target homebrew.redis.service
Wants=homebrew.redis.service
[Service]
Type=simple
WorkingDirectory=$COORDINATOR_PATH
ExecStart=$COORDINATOR_PATH/.venv/bin/python -m model_boss_coordinator
ExecStartPost=/bin/bash -c 'sleep 3 && $SCRIPT_DIR/scripts/init-gpus.sh || true'
Restart=on-failure
RestartSec=10
StandardOutput=journal
StandardError=journal
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="MODEL_BOSS_PORT=8210"
Environment="MODEL_BOSS_HOST=0.0.0.0"
[Install]
WantedBy=default.target
EOF
# Clean up old broken llama-http services that point to non-existent paths
echo " Cleaning up old llama-http services..."
for old_svc in llama-http.service llama-http-3b.service llama-http-14b.service; do
if [ -f ~/.config/systemd/user/"$old_svc" ]; then
systemctl --user stop "$old_svc" 2>/dev/null || true
systemctl --user disable "$old_svc" 2>/dev/null || true
fi
done
# Create multi-model llama-http services
echo " Creating multi-model llama-http services..."
IFS=',' read -ra MODELS <<< "$INSTALL_MODELS"
CREATED_SERVICES=()
for model_key in "${MODELS[@]}"; do
model_key=$(echo "$model_key" | xargs) # Trim whitespace
if [ -z "${MODEL_CONFIGS[$model_key]:-}" ]; then
echo " WARNING: Unknown model '$model_key', skipping"
continue
fi
# Parse config: model_id:port:internal_port:context_size
IFS=':' read -r model_id port internal_port context_size <<< "${MODEL_CONFIGS[$model_key]}"
service_name="llama-http-${model_key}"
echo " Creating $service_name ($model_id on port $port)..."
cat > ~/.config/systemd/user/"${service_name}.service" << EOF
[Unit]
Description=Llama HTTP Service - ${model_key^^} ($model_id)
After=network.target model-boss-coordinator.service
Wants=model-boss-coordinator.service
[Service]
Type=simple
WorkingDirectory=$LLAMA_HTTP_PATH
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
Restart=on-failure
RestartSec=30
StandardOutput=journal
StandardError=journal
# Environment for ${model_key} model
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="LLAMA_HTTP_SERVICE_NAME=${service_name}"
Environment="LLAMA_HTTP_PORT=${port}"
Environment="LLAMA_HTTP_MODEL_ID=${model_id}"
Environment="LLAMA_HTTP_CONTEXT_SIZE=${context_size}"
Environment="LLAMA_HTTP_N_GPU_LAYERS=-1"
Environment="LLAMA_HTTP_LLAMA_SERVER_PORT=${internal_port}"
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"
[Install]
WantedBy=default.target
EOF
CREATED_SERVICES+=("${service_name}.service")
done
# Create a generic llama-http.service that defaults to 3b model (for backwards compatibility)
cat > ~/.config/systemd/user/llama-http.service << EOF
[Unit]
Description=Llama HTTP Service (default: ministral-3b-instruct)
After=network.target model-boss-coordinator.service
Wants=model-boss-coordinator.service
[Service]
Type=simple
WorkingDirectory=$LLAMA_HTTP_PATH
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
Restart=on-failure
RestartSec=30
StandardOutput=journal
StandardError=journal
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="HOME=/var/home/lilith"
Environment="LLAMA_HTTP_SERVICE_NAME=llama-http"
Environment="LLAMA_HTTP_PORT=10010"
Environment="LLAMA_HTTP_MODEL_ID=ministral-3b-instruct"
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"
[Install]
WantedBy=default.target
EOF
echo " ✓ systemd services created (${#CREATED_SERVICES[@]} model instances)"
# =============================================================================
# Step 4: Create GPU initialization script
# =============================================================================
echo ""
echo "==> Creating GPU initialization script..."
mkdir -p "$SCRIPT_DIR/scripts"
cat > "$SCRIPT_DIR/scripts/init-gpus.sh" << 'GPUSCRIPT'
#!/usr/bin/env bash
# Initialize GPUs in model-boss from nvidia-smi
# Called by systemd after model-boss-coordinator starts
set -euo pipefail
# Wait for coordinator to be ready
MAX_WAIT=30
WAITED=0
while ! curl -s http://localhost:8210/ready >/dev/null 2>&1; do
sleep 1
WAITED=$((WAITED + 1))
if [ $WAITED -ge $MAX_WAIT ]; then
echo "ERROR: model-boss-coordinator not ready after ${MAX_WAIT}s"
exit 1
fi
done
# Check if GPUs already initialized
GPU_COUNT=$(curl -s http://localhost:8210/api/v1/gpu/status 2>/dev/null | jq -r '.gpus | length' 2>/dev/null || echo "0")
if [ "$GPU_COUNT" -gt 0 ]; then
echo "GPUs already initialized ($GPU_COUNT GPUs)"
exit 0
fi
# Detect GPUs from nvidia-smi
if ! command -v nvidia-smi >/dev/null 2>&1; then
echo "nvidia-smi not found, skipping GPU initialization"
exit 0
fi
echo "Initializing GPUs from nvidia-smi..."
# Parse nvidia-smi output and initialize each GPU
GPU_INDEX=0
while IFS=',' read -r name memory_total; do
# Clean up values
name=$(echo "$name" | xargs)
memory_total=$(echo "$memory_total" | sed 's/[^0-9]//g')
if [ -n "$memory_total" ] && [ "$memory_total" -gt 0 ]; then
echo " GPU $GPU_INDEX: $name (${memory_total} MB)"
model-boss gpu init "$GPU_INDEX" "$memory_total" --name "$name" 2>/dev/null || true
GPU_INDEX=$((GPU_INDEX + 1))
fi
done < <(nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader,nounits 2>/dev/null)
if [ $GPU_INDEX -eq 0 ]; then
echo "No GPUs detected"
else
echo "✓ Initialized $GPU_INDEX GPU(s)"
fi
GPUSCRIPT
chmod +x "$SCRIPT_DIR/scripts/init-gpus.sh"
echo " ✓ GPU init script created"
# =============================================================================
# Step 5: Reload systemd and enable services
# =============================================================================
echo ""
echo "==> Reloading systemd..."
systemctl --user daemon-reload
echo "==> Enabling services..."
systemctl --user enable model-boss-coordinator.service
for svc in "${CREATED_SERVICES[@]}"; do
systemctl --user enable "$svc"
done
echo "==> Enabling lingering (services run after logout)..."
loginctl enable-linger "$(whoami)" 2>/dev/null || true
# =============================================================================
# Step 6: Start services
# =============================================================================
echo ""
echo "==> Starting services in dependency order..."
echo " Starting model-boss-coordinator..."
systemctl --user start model-boss-coordinator.service
sleep 3
# Initialize GPUs manually if ExecStartPost didn't run
echo " Initializing GPUs..."
"$SCRIPT_DIR/scripts/init-gpus.sh" || true
sleep 2
# Start all configured llama-http instances
for svc in "${CREATED_SERVICES[@]}"; do
echo " Starting $svc..."
systemctl --user start "$svc" || true
done
# Wait for first model to be ready (3b is fastest to load)
echo " Waiting for llama-http instances to initialize..."
FIRST_PORT=""
for model_key in "${MODELS[@]}"; do
model_key=$(echo "$model_key" | xargs)
if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
IFS=':' read -r _ port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
FIRST_PORT="$port"
break
fi
done
if [ -n "$FIRST_PORT" ]; then
MAX_WAIT=120
WAITED=0
while ! curl -s "http://localhost:$FIRST_PORT/health" 2>/dev/null | grep -q '"status":"ok"'; do
sleep 5
WAITED=$((WAITED + 5))
echo " ... waiting ($WAITED/${MAX_WAIT}s)"
if [ $WAITED -ge $MAX_WAIT ]; then
echo " WARNING: llama-http not ready after ${MAX_WAIT}s"
break
fi
done
fi
# =============================================================================
# Summary
# =============================================================================
echo ""
echo "==> Installation complete!"
echo ""
echo "Service status:"
systemctl --user status homebrew.redis.service --no-pager 2>/dev/null | head -3 || echo " Redis: managed by Homebrew"
systemctl --user status model-boss-coordinator.service --no-pager 2>/dev/null | head -3 || true
for svc in "${CREATED_SERVICES[@]}"; do
systemctl --user status "$svc" --no-pager 2>/dev/null | head -3 || true
done
echo ""
echo "GPU status:"
model-boss gpu status 2>/dev/null || echo " Run 'model-boss gpu status' to check"
echo ""
echo "Endpoints:"
echo " Redis: localhost:6379 (db 0)"
echo " model-boss-coordinator: http://localhost:8210"
echo ""
echo "Multi-Model Instances:"
for model_key in "${MODELS[@]}"; do
model_key=$(echo "$model_key" | xargs)
if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
IFS=':' read -r model_id port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
echo " llama-http-${model_key}: http://localhost:$port ($model_id)"
fi
done
echo ""
echo "Redis keys used:"
echo " gpu:* - GPU status, leases, heartbeats"
echo " ram:* - RAM coordination (if enabled)"
echo ""
echo "Environment variables:"
echo " MODEL_BOSS_REDIS_URL - Override Redis URL"
echo " MODEL_BOSS_PORT - Coordinator port"
echo " INSTALL_MODELS - Models to install (default: 3b,14b)"
echo ""
echo "Commands:"
echo " model-boss gpu status - Show GPU status and leases"
echo " redis-cli keys 'gpu:*' - Show model-boss Redis keys"
echo " journalctl --user -u model-boss-coordinator -f - Coordinator logs"
for svc in "${CREATED_SERVICES[@]}"; do
echo " journalctl --user -u ${svc%.service} -f"
done