prospector/deploy/deploy-server.sh
Natalie e9916e76de
Some checks failed
CI/CD / verify (push) Failing after 53s
CI/CD / deploy (push) Has been skipped
fix(deploy): install @cocotte/ai-harness from verdaccio + narrow the __SET_ME__ guard
The backend depends on the @cocotte/ai-harness workspace package (published to
the ct-forge verdaccio). ct.prod isn't a workspace, so npm ci can't resolve the
local link — ship an .npmrc (scope routing + read token) and explicitly install
the published tarball after npm ci. Also narrow the DB-creds guard to =__SET_ME__
so a comment mentioning the marker no longer blocks the migrate/start step.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-01 07:08:06 -04:00

179 lines
9.2 KiB
Bash
Executable file

#!/bin/bash
set -euo pipefail
# Deploy the prospector backend (NestJS) to ct.prod (com.uvlava.ct.prod) — the
# hardened PUBLIC prod host (the DMZ). lime is NO LONGER the app/edge host; it
# stays purely internal.
#
# Build locally (nest build -> dist/), ship dist/ + runtime package files +
# migrations/ + the built PWA, ensure node20 + psql-16, install a systemd unit,
# provision the app's .env (dotenv — NOT a systemd EnvironmentFile; ConfigModule
# reads envFilePath ['.env.local','.env'] and dotenv does NOT override
# process.env, so EnvironmentFile silently shadows it), run pending SQL
# migrations, and (re)start. The app binds 127.0.0.1:3210 only; the public
# sales surface is served by Caddy on ct.prod (80/443 -> 127.0.0.1:3210), which
# 403s /internal/* (see deploy/edge/apps.ftw.pw.Caddyfile). DB (DO Managed PG)
# and mesh deps (people/mac-sync/mr-number) are reached over the store VPC / wg1
# mesh — never a public leg.
#
# DATABASE (one-time, secret-bearing — NOT done here):
# doctl databases db create <cluster> prospector
# doctl databases user create <cluster> prospector # generates pw
# # as doadmin, on the prospector DB:
# ALTER DATABASE prospector OWNER TO prospector;
# GRANT ALL ON SCHEMA public TO prospector; ALTER SCHEMA public OWNER TO prospector;
# then fill the PROSPECTOR_DB_* lines in /opt/prospector/.env on ct.prod.
# This script runs migrations + starts only once .env has real DB creds.
#
# Usage:
# ./deploy-server.sh # build + ship + migrate + restart (over mesh)
# ./deploy-server.sh --skip-build # ship the current dist/ as-is
# SERVER_HOST=<ct.prod reserved IP> ./deploy-server.sh # over the public leg
# ct.prod = com.uvlava.ct.prod. Default to the stable wg1 mesh address (always
# reachable once the box has joined wg1; no dependency on the reserved IP, which
# only exists after `terraform apply`). For a non-mesh deploy, pass the ct.prod
# reserved public IP via SERVER_HOST.
SERVER_HOST="${SERVER_HOST:-10.9.0.10}" # ct.prod wg1 mesh leg; reserved public IP after apply
# NOTE: ct.prod must be a TRUSTED SOURCE on the lilith-store-pg managed cluster
# (DO console / databases firewall) or migrations + the app's DB connect time out.
REMOTE_DIR="/opt/prospector"
SERVICE_NAME="prospector"
PORT="3210"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
APP_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
SKIP_BUILD=false
for a in "$@"; do [ "$a" = "--skip-build" ] && SKIP_BUILD=true; done
SSH="ssh -o StrictHostKeyChecking=accept-new"
[ -f "$HOME/.ssh/id_ed25519_1984" ] && SSH="$SSH -i $HOME/.ssh/id_ed25519_1984"
RSH="$SSH"
R="root@$SERVER_HOST"
say() { printf '\033[0;32m▸\033[0m %s\n' "$1"; }
die() { printf '\033[0;31m✗ %s\033[0m\n' "$1" >&2; exit 1; }
say "Checking SSH to $SERVER_HOST"
$SSH "$R" 'echo ok' >/dev/null 2>&1 || die "cannot reach $R"
if [ "$SKIP_BUILD" = false ]; then
say "Building backend (nest build) + PWA (vite build) locally"
( cd "$APP_ROOT" && npm run build ) || die "backend build failed"
[ -f "$APP_ROOT/web/package.json" ] && { ( cd "$APP_ROOT/web" && npm run build ) || die "web build failed"; }
fi
[ -d "$APP_ROOT/dist" ] || die "no dist/ — build first (drop --skip-build)"
say "Ensuring node20 + psql-16 on the droplet"
$SSH "$R" 'command -v node >/dev/null 2>&1 || { export DEBIAN_FRONTEND=noninteractive; curl -fsSL https://deb.nodesource.com/setup_20.x | bash - >/dev/null 2>&1 && apt-get install -y nodejs >/dev/null 2>&1; }
command -v psql >/dev/null 2>&1 && psql --version >/dev/null 2>&1 || { export DEBIAN_FRONTEND=noninteractive; apt-get install -y postgresql-client-16 >/dev/null 2>&1; }
node -v' >/dev/null || die "node/psql provisioning failed"
say "Shipping dist/ + package files + migrations/ (+ built PWA if present)"
$SSH "$R" "mkdir -p $REMOTE_DIR"
rsync -az --delete -e "$RSH" "$APP_ROOT/dist/" "$R:$REMOTE_DIR/dist/"
rsync -az -e "$RSH" "$APP_ROOT/package.json" "$R:$REMOTE_DIR/package.json"
[ -f "$APP_ROOT/package-lock.json" ] && rsync -az -e "$RSH" "$APP_ROOT/package-lock.json" "$R:$REMOTE_DIR/"
[ -d "$APP_ROOT/migrations" ] && rsync -az -e "$RSH" "$APP_ROOT/migrations/" "$R:$REMOTE_DIR/migrations/"
for wd in "$APP_ROOT/web/dist" "$APP_ROOT/dist/web"; do
[ -d "$wd" ] && { rsync -az --delete -e "$RSH" "$wd/" "$R:$REMOTE_DIR/web-dist/"; break; }
done
say "Shipping .npmrc (@cocotte/@lilith -> ct-forge verdaccio) for the published inference pkg"
# The backend depends on @cocotte/ai-harness, published to the ct-forge verdaccio.
# ct.prod is not a workspace, so it must pull the published tarball — route the
# scope + carry the read token (from the operator's ~/.npmrc).
_VTOK="$(grep '//134.199.243.61:4873/:_authToken=' "$HOME/.npmrc" 2>/dev/null | sed 's|.*_authToken=||')"
$SSH "$R" "cat > $REMOTE_DIR/.npmrc <<EOF
registry=https://registry.npmjs.org/
@cocotte:registry=http://134.199.243.61:4873/
@lilith:registry=http://134.199.243.61:4873/
//134.199.243.61:4873/:_authToken=$_VTOK
EOF
chmod 600 $REMOTE_DIR/.npmrc"
say "Installing runtime deps (npm ci --omit=dev)"
$SSH "$R" "cd $REMOTE_DIR && (npm ci --omit=dev || npm install --omit=dev)" >/dev/null
say "Ensuring @cocotte/ai-harness is present from verdaccio (workspace pkg, not a local link on prod)"
$SSH "$R" "cd $REMOTE_DIR && node -e \"require.resolve('@cocotte/ai-harness')\" 2>/dev/null || npm install --omit=dev --no-save @cocotte/ai-harness" >/dev/null
say "Provisioning $REMOTE_DIR/.env (dotenv; preserves existing secrets)"
$SSH "$R" "test -f $REMOTE_DIR/.env" 2>/dev/null && say ".env exists — preserving" || \
$SSH "$R" "gen() { openssl rand -hex \"\$1\" 2>/dev/null || head -c \"\$1\" /dev/urandom | xxd -p | tr -d '\n'; }
cat > $REMOTE_DIR/.env <<EOF
PROSPECTOR_API_PORT=$PORT
NODE_ENV=production
PROSPECTOR_WEB_DIST=$REMOTE_DIR/web-dist
# Fill these from the prospector role on lilith-store-pg (see header).
PROSPECTOR_DB_HOST=__SET_ME__
PROSPECTOR_DB_PORT=25060
PROSPECTOR_DB_NAME=prospector
PROSPECTOR_DB_USER=prospector
PROSPECTOR_DB_PASSWORD=__SET_ME__
PROSPECTOR_DB_SSL=true
PROSPECTOR_SERVICE_TOKEN=\$(gen 32)
# deps (HTTP over the wg1 mesh) — placeholders until people/mac-sync/mr-number deploy
PEOPLE_BASE_URL=http://10.9.0.5:3061
PEOPLE_SERVICE_TOKEN=\$(gen 24)
# mac-sync runs on the OPERATOR's Mac (not lime, not ct.prod). Reachable over
# the wg1 mesh at the Mac's mesh IP — the operator MUST set these two by hand:
# MACSYNC_BASE_URL -> http://<operator-mac wg1 IP>:3201
# MACSYNC_DEVICE_ID -> the macsync device id of that Mac
# (marker __OPERATOR_SET__ is deliberately distinct from the DB __SET_ME__ guard
# below, so an unset macsync dep does NOT halt the migrate/start step.)
MACSYNC_BASE_URL=__OPERATOR_SET__
MACSYNC_DEVICE_ID=__OPERATOR_SET__
MACSYNC_SERVICE_TOKEN=\$(gen 24)
MRNUMBER_BASE_URL=http://10.9.0.6:8787
MRNUMBER_SERVICE_TOKEN=\$(gen 24)
EOF
chmod 600 $REMOTE_DIR/.env"
say "Ensuring DO managed-PG CA cert on the droplet (NODE_EXTRA_CA_CERTS)"
$SSH "$R" "test -f $REMOTE_DIR/do-ca.crt" 2>/dev/null || {
_CID=ef22022e-de47-4a4d-8303-0166dbf891d6
curl -s -H "Authorization: Bearer $(cat "$HOME/.vault/do-pat-ct.token")" "https://api.digitalocean.com/v2/databases/$_CID/ca" \
| python3 -c 'import sys,json,base64;sys.stdout.write(base64.b64decode(json.load(sys.stdin)["ca"]["certificate"]).decode())' > /tmp/_do-ca.crt
rsync -az -e "$RSH" /tmp/_do-ca.crt "$R:$REMOTE_DIR/do-ca.crt" && rm -f /tmp/_do-ca.crt
}
say "Installing systemd unit $SERVICE_NAME (reads its own .env; no EnvironmentFile)"
$SSH "$R" "cat > /etc/systemd/system/$SERVICE_NAME.service <<EOF
[Unit]
Description=prospector backend (NestJS) — AFK auto-send + operator PWA
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
WorkingDirectory=$REMOTE_DIR
Environment=NODE_EXTRA_CA_CERTS=$REMOTE_DIR/do-ca.crt
ExecStart=/usr/bin/node $REMOTE_DIR/dist/main.js
Restart=always
RestartSec=5
StandardOutput=append:/var/log/$SERVICE_NAME.log
StandardError=append:/var/log/$SERVICE_NAME.log
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl enable $SERVICE_NAME >/dev/null 2>&1 || true"
if $SSH "$R" "grep -qE '=__SET_ME__' $REMOTE_DIR/.env" 2>/dev/null; then
printf '\033[1;33m⚠ PROSPECTOR_DB_* not set in %s:%s/.env — created DB+role, fill creds, then re-run.\033[0m\n' "$SERVER_HOST" "$REMOTE_DIR"
exit 0
fi
say "Applying pending SQL migrations (ledger-tracked, sslmode=require)"
$SSH "$R" 'set -e; . '"$REMOTE_DIR"'/.env
URI="postgres://$PROSPECTOR_DB_USER:$PROSPECTOR_DB_PASSWORD@$PROSPECTOR_DB_HOST:$PROSPECTOR_DB_PORT/$PROSPECTOR_DB_NAME?sslmode=require"
psql -v ON_ERROR_STOP=1 -q -d "$URI" -c "CREATE TABLE IF NOT EXISTS _prospector_migrations (filename TEXT PRIMARY KEY, applied_at TIMESTAMPTZ NOT NULL DEFAULT now())" >/dev/null
for f in '"$REMOTE_DIR"'/migrations/*.sql; do b=$(basename "$f")
[ "$(psql -tAq -d "$URI" -c "SELECT 1 FROM _prospector_migrations WHERE filename='"'"'$b'"'"'")" = "1" ] && continue
psql -v ON_ERROR_STOP=1 -q -d "$URI" -f "$f" >/dev/null
psql -q -d "$URI" -c "INSERT INTO _prospector_migrations(filename) VALUES ('"'"'$b'"'"')" >/dev/null
echo " applied $b"; done'
say "Restarting $SERVICE_NAME"
$SSH "$R" "systemctl restart $SERVICE_NAME"; sleep 4
$SSH "$R" "curl -fsS http://127.0.0.1:$PORT/ >/dev/null 2>&1" \
&& say "prospector up on :$PORT (mesh-only)" || printf '\033[1;33m⚠ started but / didnt answer yet; check /var/log/%s.log\033[0m\n' "$SERVICE_NAME"