ml-model-loader/scripts/download-models.sh
Lilith 8f4a35ba79 chore: add publishConfig to prevent public npm publishing
All @lilith/* packages should publish to forge.nasty.sh only.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 00:42:23 -08:00

231 lines
7.9 KiB
Bash
Executable file

#!/bin/bash
# download-models.sh - Download HuggingFace models with structured progress output
#
# Output format (JSON lines for easy parsing by Claude):
# {"status": "start", "model": "model-id", "repo": "org/repo", "file": "file.safetensors"}
# {"status": "progress", "model": "model-id", "percent": 45, "downloaded": "3.2GB", "total": "7.1GB"}
# {"status": "complete", "model": "model-id", "path": "/path/to/file", "size_bytes": 12345678}
# {"status": "error", "model": "model-id", "message": "error details"}
#
# Usage:
# ./download-models.sh # Download all pending models
# ./download-models.sh juggernaut-xi-v11 # Download specific model
# ./download-models.sh --list # List available models
# ./download-models.sh --check # Check which models exist
set -euo pipefail
# Configuration
# Note: /bigdisk is the mount point on 'black' server
MODELS_BASE="/bigdisk/_/models/models/diffusion/stable-diffusion/diffusion/stable-diffusion"
# Model definitions: model_id|repo_id|filename|expected_size_bytes
declare -A MODELS=(
["juggernaut-xi-v11"]="RunDiffusion/Juggernaut-XI-v11|Juggernaut-XI-byRunDiffusion.safetensors|7105350536"
["animagine-xl-4.0-opt"]="cagliostrolab/animagine-xl-4.0|animagine-xl-4.0-opt.safetensors|6938350040"
)
# JSON output helpers
json_status() {
local status="$1"
shift
printf '{"status": "%s"' "$status"
while [[ $# -gt 0 ]]; do
local key="$1"
local value="$2"
# Escape quotes in value
value="${value//\"/\\\"}"
printf ', "%s": "%s"' "$key" "$value"
shift 2
done
printf '}\n'
}
json_status_int() {
local status="$1"
local model="$2"
local key="$3"
local value="$4"
printf '{"status": "%s", "model": "%s", "%s": %s}\n' "$status" "$model" "$key" "$value"
}
# Check if model file exists
check_model() {
local model_id="$1"
local def="${MODELS[$model_id]}"
IFS='|' read -r repo_id filename expected_size <<< "$def"
local filepath="$MODELS_BASE/$filename"
if [[ -f "$filepath" ]]; then
local actual_size
actual_size=$(stat -c%s "$filepath" 2>/dev/null || stat -f%z "$filepath" 2>/dev/null)
if [[ "$actual_size" -eq "$expected_size" ]]; then
json_status "exists" "model" "$model_id" "path" "$filepath" "size_bytes" "$actual_size" "verified" "true"
else
json_status "exists" "model" "$model_id" "path" "$filepath" "size_bytes" "$actual_size" "verified" "false" "expected_size" "$expected_size"
fi
return 0
else
json_status "missing" "model" "$model_id" "expected_path" "$filepath"
return 1
fi
}
# Download a model with progress tracking
download_model() {
local model_id="$1"
local def="${MODELS[$model_id]}"
IFS='|' read -r repo_id filename expected_size <<< "$def"
local filepath="$MODELS_BASE/$filename"
# Check if already exists
if [[ -f "$filepath" ]]; then
local actual_size
actual_size=$(stat -c%s "$filepath" 2>/dev/null || stat -f%z "$filepath" 2>/dev/null)
if [[ "$actual_size" -eq "$expected_size" ]]; then
json_status "skip" "model" "$model_id" "reason" "already_exists" "path" "$filepath"
return 0
fi
fi
json_status "start" "model" "$model_id" "repo" "$repo_id" "file" "$filename" "destination" "$MODELS_BASE"
# Ensure directory exists
mkdir -p "$MODELS_BASE"
local start_time
start_time=$(date +%s)
# Build HuggingFace direct download URL
local download_url="https://huggingface.co/${repo_id}/resolve/main/${filename}"
json_status "downloading" "model" "$model_id" "url" "$download_url"
# Download with wget (shows progress on stderr, captures to temp file for parsing)
local tmp_file="${filepath}.downloading"
# Check for HuggingFace token (needed for gated models)
local wget_args=(--progress=dot:giga -O "$tmp_file")
if [[ -f ~/.cache/huggingface/token ]]; then
local hf_token
hf_token=$(cat ~/.cache/huggingface/token)
wget_args+=(--header="Authorization: Bearer ${hf_token}")
fi
if wget "${wget_args[@]}" "$download_url" 2>&1 | \
while IFS= read -r line; do
# Parse wget progress: " 50% [=====> ] 3,500,000,000 10.5M/s"
if [[ "$line" =~ ([0-9]+)%.*\[.*\][[:space:]]+([0-9,]+) ]]; then
local percent="${BASH_REMATCH[1]}"
local bytes="${BASH_REMATCH[2]//,/}"
# Only output every 10%
if (( percent % 10 == 0 )); then
json_status "progress" "model" "$model_id" "percent" "$percent" "bytes" "$bytes"
fi
fi
done; then
# Move temp file to final location
mv "$tmp_file" "$filepath"
local end_time
end_time=$(date +%s)
local duration=$((end_time - start_time))
# Verify download
if [[ -f "$filepath" ]]; then
local actual_size
actual_size=$(stat -c%s "$filepath" 2>/dev/null || stat -f%z "$filepath" 2>/dev/null)
if [[ "$actual_size" -eq "$expected_size" ]]; then
json_status "complete" "model" "$model_id" "path" "$filepath" "size_bytes" "$actual_size" "duration_seconds" "$duration" "verified" "true"
else
json_status "complete" "model" "$model_id" "path" "$filepath" "size_bytes" "$actual_size" "duration_seconds" "$duration" "verified" "false" "expected" "$expected_size"
fi
else
json_status "error" "model" "$model_id" "message" "File not found after download"
return 1
fi
else
rm -f "$tmp_file"
json_status "error" "model" "$model_id" "message" "wget download failed"
return 1
fi
return 0
}
# List all available models
list_models() {
printf '{"status": "list", "models": [\n'
local first=true
for model_id in "${!MODELS[@]}"; do
local def="${MODELS[$model_id]}"
IFS='|' read -r repo_id filename expected_size <<< "$def"
[[ "$first" == "true" ]] || printf ',\n'
printf ' {"id": "%s", "repo": "%s", "file": "%s", "size_bytes": %s}' \
"$model_id" "$repo_id" "$filename" "$expected_size"
first=false
done
printf '\n]}\n'
}
# Check all models
check_all() {
printf '{"status": "check_start", "total": %d}\n' "${#MODELS[@]}"
local existing=0
local missing=0
for model_id in "${!MODELS[@]}"; do
if check_model "$model_id" 2>/dev/null; then
((existing++)) || true
else
((missing++)) || true
fi
done
printf '{"status": "check_complete", "existing": %d, "missing": %d}\n' "$existing" "$missing"
}
# Download all missing models
download_all() {
printf '{"status": "batch_start", "total": %d}\n' "${#MODELS[@]}"
local success=0
local failed=0
local skipped=0
for model_id in "${!MODELS[@]}"; do
if download_model "$model_id" 2>/dev/null; then
((success++)) || true
else
((failed++)) || true
fi
done
printf '{"status": "batch_complete", "success": %d, "failed": %d, "skipped": %d}\n' "$success" "$failed" "$skipped"
}
# Main
case "${1:-all}" in
--list|-l)
list_models
;;
--check|-c)
check_all
;;
--help|-h)
cat << 'EOF'
{"status": "help", "usage": "download-models.sh [model-id|--list|--check|--help]", "commands": [
{"name": "model-id", "description": "Download specific model"},
{"name": "--list", "description": "List all available models"},
{"name": "--check", "description": "Check which models exist"},
{"name": "(no args)", "description": "Download all missing models"}
]}
EOF
;;
all)
download_all
;;
*)
if [[ -n "${MODELS[$1]:-}" ]]; then
download_model "$1"
else
json_status "error" "message" "Unknown model: $1" "available" "$(printf '%s,' "${!MODELS[@]}")"
exit 1
fi
;;
esac