52 lines
1.6 KiB
Bash
Executable file
52 lines
1.6 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Initialize GPUs in model-boss from nvidia-smi
|
|
# Called by systemd after model-boss-coordinator starts
|
|
|
|
set -euo pipefail
|
|
|
|
# Wait for coordinator to be ready
|
|
MAX_WAIT=30
|
|
WAITED=0
|
|
while ! curl -s http://localhost:8210/ready >/dev/null 2>&1; do
|
|
sleep 1
|
|
WAITED=$((WAITED + 1))
|
|
if [ $WAITED -ge $MAX_WAIT ]; then
|
|
echo "ERROR: model-boss-coordinator not ready after ${MAX_WAIT}s"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Check if GPUs already initialized
|
|
GPU_COUNT=$(curl -s http://localhost:8210/api/v1/gpu/status 2>/dev/null | jq -r '.gpus | length' 2>/dev/null || echo "0")
|
|
if [ "$GPU_COUNT" -gt 0 ]; then
|
|
echo "GPUs already initialized ($GPU_COUNT GPUs)"
|
|
exit 0
|
|
fi
|
|
|
|
# Detect GPUs from nvidia-smi
|
|
if ! command -v nvidia-smi >/dev/null 2>&1; then
|
|
echo "nvidia-smi not found, skipping GPU initialization"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Initializing GPUs from nvidia-smi..."
|
|
|
|
# Parse nvidia-smi output and initialize each GPU
|
|
GPU_INDEX=0
|
|
while IFS=',' read -r name memory_total; do
|
|
# Clean up values
|
|
name=$(echo "$name" | xargs)
|
|
memory_total=$(echo "$memory_total" | sed 's/[^0-9]//g')
|
|
|
|
if [ -n "$memory_total" ] && [ "$memory_total" -gt 0 ]; then
|
|
echo " GPU $GPU_INDEX: $name (${memory_total} MB)"
|
|
model-boss gpu init "$GPU_INDEX" "$memory_total" --name "$name" 2>/dev/null || true
|
|
GPU_INDEX=$((GPU_INDEX + 1))
|
|
fi
|
|
done < <(nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader,nounits 2>/dev/null)
|
|
|
|
if [ $GPU_INDEX -eq 0 ]; then
|
|
echo "No GPUs detected"
|
|
else
|
|
echo "✓ Initialized $GPU_INDEX GPU(s)"
|
|
fi
|