454 lines
16 KiB
Bash
Executable file
454 lines
16 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# Install model-boss services (Redis, coordinator, multi-model llama-http)
|
|
# =============================================================================
|
|
# This script:
|
|
# 1. Installs Redis via Homebrew if not present
|
|
# 2. Starts Redis service
|
|
# 3. Installs Python dependencies for coordinator and llama-http
|
|
# 4. Creates systemd user services (including multi-model llama-http instances)
|
|
# 5. Initializes GPUs from nvidia-smi
|
|
# 6. Starts services in dependency order
|
|
#
|
|
# Multi-Model Architecture:
|
|
# Each model runs as a separate llama-http instance with its own port:
|
|
# - llama-http-3b: ministral-3b-instruct (port 10010, internal 10009)
|
|
# - llama-http-14b: ministral-14b-reasoning (port 10020, internal 10019)
|
|
#
|
|
# Redis Configuration:
|
|
# - URL: redis://localhost:6379 (default Redis db 0)
|
|
# - Key prefixes: gpu:* (GPU leases), ram:* (RAM coordination)
|
|
# - Persistence: RDB snapshots (default), GPU init re-runs on startup
|
|
#
|
|
# Environment Variables (optional):
|
|
# MODEL_BOSS_REDIS_URL - Override Redis URL (default: redis://localhost:6379)
|
|
# MODEL_BOSS_PORT - Coordinator port (default: 8210)
|
|
# INSTALL_MODELS - Comma-separated list of models (default: 3b,14b)
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
# Model configurations (name:model_id:port:internal_port:context_size)
|
|
declare -A MODEL_CONFIGS=(
|
|
["3b"]="ministral-3b-instruct:10010:10009:4096"
|
|
["14b"]="ministral-14b-reasoning:10020:10019:8192"
|
|
)
|
|
|
|
# Parse INSTALL_MODELS environment variable (default: both models)
|
|
INSTALL_MODELS="${INSTALL_MODELS:-3b,14b}"
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
cd "$SCRIPT_DIR"
|
|
|
|
echo "==> Installing model-boss services..."
|
|
|
|
# =============================================================================
|
|
# Step 1: Ensure Redis is installed and running
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Checking Redis..."
|
|
|
|
REDIS_SERVER=""
|
|
if command -v redis-server >/dev/null 2>&1; then
|
|
REDIS_SERVER="redis-server"
|
|
elif [ -x "/home/linuxbrew/.linuxbrew/bin/redis-server" ]; then
|
|
REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
|
|
fi
|
|
|
|
if [ -z "$REDIS_SERVER" ]; then
|
|
echo " Redis not found, installing via Homebrew..."
|
|
if ! command -v brew >/dev/null 2>&1; then
|
|
echo "ERROR: Homebrew not found. Please install Homebrew first."
|
|
exit 1
|
|
fi
|
|
brew install redis
|
|
REDIS_SERVER="/home/linuxbrew/.linuxbrew/bin/redis-server"
|
|
fi
|
|
|
|
echo " ✓ Redis installed: $REDIS_SERVER"
|
|
|
|
# Start Redis service
|
|
echo "==> Starting Redis service..."
|
|
if brew services list 2>/dev/null | grep -q "redis.*started"; then
|
|
echo " ✓ Redis already running"
|
|
else
|
|
brew services start redis
|
|
sleep 2
|
|
echo " ✓ Redis started"
|
|
fi
|
|
|
|
# Verify Redis is responding
|
|
if redis-cli ping >/dev/null 2>&1; then
|
|
echo " ✓ Redis responding on localhost:6379"
|
|
else
|
|
echo "ERROR: Redis not responding after start"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify Redis is writable (model-boss uses db 0 with gpu:* and ram:* prefixes)
|
|
if redis-cli set "model-boss:install-test" "ok" EX 10 >/dev/null 2>&1; then
|
|
redis-cli del "model-boss:install-test" >/dev/null 2>&1
|
|
echo " ✓ Redis writable (using db 0, gpu:*/ram:* key prefixes)"
|
|
else
|
|
echo "ERROR: Redis not writable"
|
|
exit 1
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Step 2: Install Python packages
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Installing model-boss-coordinator..."
|
|
COORDINATOR_PATH="$SCRIPT_DIR/services/coordinator/service"
|
|
if [ -d "$COORDINATOR_PATH" ]; then
|
|
if [ ! -d "$COORDINATOR_PATH/.venv" ]; then
|
|
echo " Creating coordinator venv..."
|
|
python -m venv "$COORDINATOR_PATH/.venv"
|
|
fi
|
|
echo " Installing coordinator dependencies..."
|
|
"$COORDINATOR_PATH/.venv/bin/pip" install -e "$COORDINATOR_PATH" --quiet
|
|
|
|
# Add .pth files so the venv can find model_boss_loaders and user ML packages (torch etc.)
|
|
SITE_PACKAGES="$("$COORDINATOR_PATH/.venv/bin/python" -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"
|
|
echo "/var/home/lilith/Code/@applications/@model-boss/packages/loaders-py/src" \
|
|
> "$SITE_PACKAGES/_model_boss_loaders.pth"
|
|
echo "/var/home/lilith/.local/lib/python3.12/site-packages" \
|
|
> "$SITE_PACKAGES/_ml_user_site.pth"
|
|
|
|
echo " ✓ model-boss-coordinator installed"
|
|
else
|
|
echo " WARNING: Coordinator path not found: $COORDINATOR_PATH"
|
|
fi
|
|
|
|
echo ""
|
|
echo "==> Installing llama-http..."
|
|
LLAMA_HTTP_PATH="$SCRIPT_DIR/services/llama-http/service"
|
|
if [ -d "$LLAMA_HTTP_PATH" ]; then
|
|
if [ ! -d "$LLAMA_HTTP_PATH/.venv" ]; then
|
|
echo " Creating llama-http venv..."
|
|
python -m venv "$LLAMA_HTTP_PATH/.venv"
|
|
fi
|
|
echo " Installing llama-http dependencies..."
|
|
"$LLAMA_HTTP_PATH/.venv/bin/pip" install -e "$LLAMA_HTTP_PATH" --quiet
|
|
echo " ✓ llama-http installed"
|
|
else
|
|
echo " WARNING: llama-http path not found: $LLAMA_HTTP_PATH"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Step 2b: Build frontend
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Building frontend..."
|
|
FRONTEND_PATH="$SCRIPT_DIR/frontend"
|
|
if [ -d "$FRONTEND_PATH" ]; then
|
|
cd "$FRONTEND_PATH"
|
|
if [ ! -d "node_modules" ]; then
|
|
echo " Installing Node dependencies..."
|
|
pnpm install
|
|
fi
|
|
echo " Building frontend..."
|
|
pnpm build:prod
|
|
echo " ✓ Frontend built to dist/"
|
|
cd "$SCRIPT_DIR"
|
|
else
|
|
echo " WARNING: Frontend path not found: $FRONTEND_PATH"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Step 3: Create systemd user services
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Setting up systemd user services..."
|
|
mkdir -p ~/.config/systemd/user
|
|
|
|
# Model Boss Coordinator service
|
|
cat > ~/.config/systemd/user/model-boss-coordinator.service << EOF
|
|
[Unit]
|
|
Description=Model Boss Coordinator (GPU/VRAM lease management)
|
|
After=network.target homebrew.redis.service
|
|
Wants=homebrew.redis.service
|
|
|
|
[Service]
|
|
Type=simple
|
|
WorkingDirectory=$COORDINATOR_PATH
|
|
ExecStart=$COORDINATOR_PATH/.venv/bin/python -m model_boss_coordinator
|
|
ExecStartPost=/bin/bash -c 'sleep 3 && $SCRIPT_DIR/scripts/init-gpus.sh || true'
|
|
Restart=on-failure
|
|
RestartSec=10
|
|
StandardOutput=journal
|
|
StandardError=journal
|
|
|
|
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
|
|
Environment="HOME=/var/home/lilith"
|
|
Environment="MODEL_BOSS_PORT=8210"
|
|
Environment="MODEL_BOSS_HOST=0.0.0.0"
|
|
|
|
[Install]
|
|
WantedBy=default.target
|
|
EOF
|
|
|
|
# Clean up old broken llama-http services that point to non-existent paths
|
|
echo " Cleaning up old llama-http services..."
|
|
for old_svc in llama-http.service llama-http-3b.service llama-http-14b.service; do
|
|
if [ -f ~/.config/systemd/user/"$old_svc" ]; then
|
|
systemctl --user stop "$old_svc" 2>/dev/null || true
|
|
systemctl --user disable "$old_svc" 2>/dev/null || true
|
|
fi
|
|
done
|
|
|
|
# Create multi-model llama-http services
|
|
echo " Creating multi-model llama-http services..."
|
|
IFS=',' read -ra MODELS <<< "$INSTALL_MODELS"
|
|
CREATED_SERVICES=()
|
|
|
|
for model_key in "${MODELS[@]}"; do
|
|
model_key=$(echo "$model_key" | xargs) # Trim whitespace
|
|
if [ -z "${MODEL_CONFIGS[$model_key]:-}" ]; then
|
|
echo " WARNING: Unknown model '$model_key', skipping"
|
|
continue
|
|
fi
|
|
|
|
# Parse config: model_id:port:internal_port:context_size
|
|
IFS=':' read -r model_id port internal_port context_size <<< "${MODEL_CONFIGS[$model_key]}"
|
|
service_name="llama-http-${model_key}"
|
|
|
|
echo " Creating $service_name ($model_id on port $port)..."
|
|
|
|
cat > ~/.config/systemd/user/"${service_name}.service" << EOF
|
|
[Unit]
|
|
Description=Llama HTTP Service - ${model_key^^} ($model_id)
|
|
After=network.target model-boss-coordinator.service
|
|
Wants=model-boss-coordinator.service
|
|
|
|
[Service]
|
|
Type=simple
|
|
WorkingDirectory=$LLAMA_HTTP_PATH
|
|
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
|
|
Restart=on-failure
|
|
RestartSec=30
|
|
StandardOutput=journal
|
|
StandardError=journal
|
|
|
|
# Environment for ${model_key} model
|
|
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
|
|
Environment="HOME=/var/home/lilith"
|
|
Environment="LLAMA_HTTP_SERVICE_NAME=${service_name}"
|
|
Environment="LLAMA_HTTP_PORT=${port}"
|
|
Environment="LLAMA_HTTP_MODEL_ID=${model_id}"
|
|
Environment="LLAMA_HTTP_CONTEXT_SIZE=${context_size}"
|
|
Environment="LLAMA_HTTP_N_GPU_LAYERS=-1"
|
|
Environment="LLAMA_HTTP_LLAMA_SERVER_PORT=${internal_port}"
|
|
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"
|
|
|
|
[Install]
|
|
WantedBy=default.target
|
|
EOF
|
|
|
|
CREATED_SERVICES+=("${service_name}.service")
|
|
done
|
|
|
|
# Create a generic llama-http.service that defaults to 3b model (for backwards compatibility)
|
|
cat > ~/.config/systemd/user/llama-http.service << EOF
|
|
[Unit]
|
|
Description=Llama HTTP Service (default: ministral-3b-instruct)
|
|
After=network.target model-boss-coordinator.service
|
|
Wants=model-boss-coordinator.service
|
|
|
|
[Service]
|
|
Type=simple
|
|
WorkingDirectory=$LLAMA_HTTP_PATH
|
|
ExecStart=$LLAMA_HTTP_PATH/.venv/bin/python -m llama_http
|
|
Restart=on-failure
|
|
RestartSec=30
|
|
StandardOutput=journal
|
|
StandardError=journal
|
|
|
|
Environment="PATH=/home/linuxbrew/.linuxbrew/bin:/var/home/lilith/.local/bin:/usr/local/bin:/usr/bin:/bin"
|
|
Environment="HOME=/var/home/lilith"
|
|
Environment="LLAMA_HTTP_SERVICE_NAME=llama-http"
|
|
Environment="LLAMA_HTTP_PORT=10010"
|
|
Environment="LLAMA_HTTP_MODEL_ID=ministral-3b-instruct"
|
|
Environment="LLAMA_HTTP_IDLE_TIMEOUT_SECONDS=0"
|
|
|
|
[Install]
|
|
WantedBy=default.target
|
|
EOF
|
|
|
|
echo " ✓ systemd services created (${#CREATED_SERVICES[@]} model instances)"
|
|
|
|
# =============================================================================
|
|
# Step 4: Create GPU initialization script
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Creating GPU initialization script..."
|
|
mkdir -p "$SCRIPT_DIR/scripts"
|
|
|
|
cat > "$SCRIPT_DIR/scripts/init-gpus.sh" << 'GPUSCRIPT'
|
|
#!/usr/bin/env bash
|
|
# Initialize GPUs in model-boss from nvidia-smi
|
|
# Called by systemd after model-boss-coordinator starts
|
|
|
|
set -euo pipefail
|
|
|
|
# Wait for coordinator to be ready
|
|
MAX_WAIT=30
|
|
WAITED=0
|
|
while ! curl -s http://localhost:8210/ready >/dev/null 2>&1; do
|
|
sleep 1
|
|
WAITED=$((WAITED + 1))
|
|
if [ $WAITED -ge $MAX_WAIT ]; then
|
|
echo "ERROR: model-boss-coordinator not ready after ${MAX_WAIT}s"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Check if GPUs already initialized
|
|
GPU_COUNT=$(curl -s http://localhost:8210/api/v1/gpu/status 2>/dev/null | jq -r '.gpus | length' 2>/dev/null || echo "0")
|
|
if [ "$GPU_COUNT" -gt 0 ]; then
|
|
echo "GPUs already initialized ($GPU_COUNT GPUs)"
|
|
exit 0
|
|
fi
|
|
|
|
# Detect GPUs from nvidia-smi
|
|
if ! command -v nvidia-smi >/dev/null 2>&1; then
|
|
echo "nvidia-smi not found, skipping GPU initialization"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Initializing GPUs from nvidia-smi..."
|
|
|
|
# Parse nvidia-smi output and initialize each GPU
|
|
GPU_INDEX=0
|
|
while IFS=',' read -r name memory_total; do
|
|
# Clean up values
|
|
name=$(echo "$name" | xargs)
|
|
memory_total=$(echo "$memory_total" | sed 's/[^0-9]//g')
|
|
|
|
if [ -n "$memory_total" ] && [ "$memory_total" -gt 0 ]; then
|
|
echo " GPU $GPU_INDEX: $name (${memory_total} MB)"
|
|
model-boss gpu init "$GPU_INDEX" "$memory_total" --name "$name" 2>/dev/null || true
|
|
GPU_INDEX=$((GPU_INDEX + 1))
|
|
fi
|
|
done < <(nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader,nounits 2>/dev/null)
|
|
|
|
if [ $GPU_INDEX -eq 0 ]; then
|
|
echo "No GPUs detected"
|
|
else
|
|
echo "✓ Initialized $GPU_INDEX GPU(s)"
|
|
fi
|
|
GPUSCRIPT
|
|
|
|
chmod +x "$SCRIPT_DIR/scripts/init-gpus.sh"
|
|
echo " ✓ GPU init script created"
|
|
|
|
# =============================================================================
|
|
# Step 5: Reload systemd and enable services
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Reloading systemd..."
|
|
systemctl --user daemon-reload
|
|
|
|
echo "==> Enabling services..."
|
|
systemctl --user enable model-boss-coordinator.service
|
|
for svc in "${CREATED_SERVICES[@]}"; do
|
|
systemctl --user enable "$svc"
|
|
done
|
|
|
|
echo "==> Enabling lingering (services run after logout)..."
|
|
loginctl enable-linger "$(whoami)" 2>/dev/null || true
|
|
|
|
# =============================================================================
|
|
# Step 6: Start services
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Starting services in dependency order..."
|
|
|
|
echo " Starting model-boss-coordinator..."
|
|
systemctl --user start model-boss-coordinator.service
|
|
sleep 3
|
|
|
|
# Initialize GPUs manually if ExecStartPost didn't run
|
|
echo " Initializing GPUs..."
|
|
"$SCRIPT_DIR/scripts/init-gpus.sh" || true
|
|
sleep 2
|
|
|
|
# Start all configured llama-http instances
|
|
for svc in "${CREATED_SERVICES[@]}"; do
|
|
echo " Starting $svc..."
|
|
systemctl --user start "$svc" || true
|
|
done
|
|
|
|
# Wait for first model to be ready (3b is fastest to load)
|
|
echo " Waiting for llama-http instances to initialize..."
|
|
FIRST_PORT=""
|
|
for model_key in "${MODELS[@]}"; do
|
|
model_key=$(echo "$model_key" | xargs)
|
|
if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
|
|
IFS=':' read -r _ port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
|
|
FIRST_PORT="$port"
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ -n "$FIRST_PORT" ]; then
|
|
MAX_WAIT=120
|
|
WAITED=0
|
|
while ! curl -s "http://localhost:$FIRST_PORT/health" 2>/dev/null | grep -q '"status":"ok"'; do
|
|
sleep 5
|
|
WAITED=$((WAITED + 5))
|
|
echo " ... waiting ($WAITED/${MAX_WAIT}s)"
|
|
if [ $WAITED -ge $MAX_WAIT ]; then
|
|
echo " WARNING: llama-http not ready after ${MAX_WAIT}s"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Summary
|
|
# =============================================================================
|
|
echo ""
|
|
echo "==> Installation complete!"
|
|
echo ""
|
|
echo "Service status:"
|
|
systemctl --user status homebrew.redis.service --no-pager 2>/dev/null | head -3 || echo " Redis: managed by Homebrew"
|
|
systemctl --user status model-boss-coordinator.service --no-pager 2>/dev/null | head -3 || true
|
|
for svc in "${CREATED_SERVICES[@]}"; do
|
|
systemctl --user status "$svc" --no-pager 2>/dev/null | head -3 || true
|
|
done
|
|
|
|
echo ""
|
|
echo "GPU status:"
|
|
model-boss gpu status 2>/dev/null || echo " Run 'model-boss gpu status' to check"
|
|
|
|
echo ""
|
|
echo "Endpoints:"
|
|
echo " Redis: localhost:6379 (db 0)"
|
|
echo " model-boss-coordinator: http://localhost:8210"
|
|
echo ""
|
|
echo "Multi-Model Instances:"
|
|
for model_key in "${MODELS[@]}"; do
|
|
model_key=$(echo "$model_key" | xargs)
|
|
if [ -n "${MODEL_CONFIGS[$model_key]:-}" ]; then
|
|
IFS=':' read -r model_id port _ _ <<< "${MODEL_CONFIGS[$model_key]}"
|
|
echo " llama-http-${model_key}: http://localhost:$port ($model_id)"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "Redis keys used:"
|
|
echo " gpu:* - GPU status, leases, heartbeats"
|
|
echo " ram:* - RAM coordination (if enabled)"
|
|
echo ""
|
|
echo "Environment variables:"
|
|
echo " MODEL_BOSS_REDIS_URL - Override Redis URL"
|
|
echo " MODEL_BOSS_PORT - Coordinator port"
|
|
echo " INSTALL_MODELS - Models to install (default: 3b,14b)"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " model-boss gpu status - Show GPU status and leases"
|
|
echo " redis-cli keys 'gpu:*' - Show model-boss Redis keys"
|
|
echo " journalctl --user -u model-boss-coordinator -f - Coordinator logs"
|
|
for svc in "${CREATED_SERVICES[@]}"; do
|
|
echo " journalctl --user -u ${svc%.service} -f"
|
|
done
|