platform-deployments/scripts/lilith-backup-notify.sh
Quinn Ftw 3dbd4375d3 chore(conf.d): 🔧 Update configuration service files in conf.d directory
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-18 13:42:49 -08:00

102 lines
3.7 KiB
Bash
Executable file

#!/usr/bin/env bash
# =============================================================================
# lilith-backup-notify.sh
# =============================================================================
# Sends a failure notification when a backup systemd unit fails.
# Invoked automatically via the OnFailure= directive in backup service units,
# through the lilith-backup-notify-failure@.service template.
#
# USAGE
# lilith-backup-notify.sh <failed-unit-name>
#
# ENVIRONMENT
# BACKUP_NOTIFY_WEBHOOK — HTTP(S) URL to POST the notification payload to.
# If unset or the POST fails, falls back to syslog.
#
# WEBHOOK PAYLOAD (JSON)
# {
# "text": "Backup failure: lilith-backup-postgres.service on hostname at 2026-02-18T06:00:00Z",
# "unit": "lilith-backup-postgres.service",
# "host": "hostname",
# "timestamp": "2026-02-18T06:00:00Z"
# }
#
# EXIT CODE
# Always 0 — notification failures must not cascade into further unit failures.
# =============================================================================
set -uo pipefail
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
readonly LOG_PREFIX="[lilith-backup-notify]"
readonly FAILED_UNIT="${1:-unknown-unit}"
HOSTNAME="$(hostname -f 2>/dev/null || hostname)"
readonly HOSTNAME
TIMESTAMP="$(date --utc '+%Y-%m-%dT%H:%M:%SZ')"
readonly TIMESTAMP
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
log() { echo "${LOG_PREFIX} ${TIMESTAMP} INFO $*"; }
err() { echo "${LOG_PREFIX} ${TIMESTAMP} ERROR $*" >&2; }
syslog_notify() {
local message="$1"
logger --tag "lilith-backup-notify" --priority "user.err" "${message}" 2>/dev/null || true
}
# ---------------------------------------------------------------------------
# Build notification message
# ---------------------------------------------------------------------------
NOTIFY_TEXT="Backup failure: ${FAILED_UNIT} on ${HOSTNAME} at ${TIMESTAMP}"
log "Notifying: ${NOTIFY_TEXT}"
# ---------------------------------------------------------------------------
# Attempt webhook notification
# ---------------------------------------------------------------------------
webhook_url="${BACKUP_NOTIFY_WEBHOOK:-}"
if [[ -n "${webhook_url}" ]]; then
# Build JSON payload (no jq dependency — manual construction is safe here
# because all values are system-controlled, not user input)
json_payload="$(printf \
'{"text":"%s","unit":"%s","host":"%s","timestamp":"%s"}' \
"${NOTIFY_TEXT}" \
"${FAILED_UNIT}" \
"${HOSTNAME}" \
"${TIMESTAMP}")"
http_status="$(curl \
--silent \
--show-error \
--max-time 10 \
--retry 2 \
--retry-delay 3 \
--output /dev/null \
--write-out '%{http_code}' \
--header 'Content-Type: application/json' \
--data "${json_payload}" \
"${webhook_url}" 2>&1)" || http_status="curl-error"
if [[ "${http_status}" =~ ^2 ]]; then
log "Webhook notification sent (HTTP ${http_status})"
exit 0
else
err "Webhook POST failed (status: ${http_status}) — falling back to syslog"
fi
else
err "BACKUP_NOTIFY_WEBHOOK not set — falling back to syslog"
fi
# ---------------------------------------------------------------------------
# Fallback: syslog
# ---------------------------------------------------------------------------
syslog_notify "${NOTIFY_TEXT}"
log "Syslog notification sent"
# Always exit 0 — notification failures must not cascade
exit 0