#!/bin/bash
set -euo pipefail

#
# Host Inventory Check
# Scans all hosts from per-host YAML files, validates capabilities, offers to fix missing services
#

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
BOLD='\033[1m'

# Service check commands (capability definitions)
declare -A SERVICE_CHECKS=(
    ["sshd"]="systemctl is-active sshd 2>/dev/null || systemctl is-active ssh 2>/dev/null || pgrep -x sshd >/dev/null || netstat -an 2>/dev/null | grep -q '\\.22.*LISTEN'"
    ["nginx"]="systemctl is-active nginx 2>/dev/null || pgrep -x nginx >/dev/null"
    ["docker"]="docker --version 2>/dev/null"
    ["wireguard"]="wg show 2>/dev/null || systemctl is-active wg-quick@wg0 2>/dev/null"
    ["postgresql"]="systemctl is-active postgresql 2>/dev/null || pg_isready 2>/dev/null || docker ps --format '{{.Names}}' 2>/dev/null | grep -qi postgres || podman ps --format '{{.Names}}' 2>/dev/null | grep -qi postgres"
    ["redis"]="systemctl is-active redis 2>/dev/null || redis-cli ping 2>/dev/null || docker ps --format '{{.Names}}' 2>/dev/null | grep -qi redis || podman ps --format '{{.Names}}' 2>/dev/null | grep -qi redis"
    ["nfs-server"]="systemctl is-active nfs-server 2>/dev/null"
    ["pdns"]="systemctl is-active pdns 2>/dev/null"
    ["powerdns"]="systemctl is-active pdns 2>/dev/null"
)

declare -A SERVICE_INSTALL_DEBIAN=(
    ["sshd"]="apt-get install -y openssh-server && systemctl enable --now sshd"
    ["nginx"]="apt-get install -y nginx && systemctl enable --now nginx"
    ["docker"]="curl -fsSL https://get.docker.com | sh"
    ["wireguard"]="apt-get install -y wireguard-tools"
    ["postgresql"]="apt-get install -y postgresql && systemctl enable --now postgresql"
    ["redis"]="apt-get install -y redis-server && systemctl enable --now redis-server"
    ["nfs-server"]="apt-get install -y nfs-kernel-server && systemctl enable --now nfs-server"
    ["pdns"]="apt-get install -y pdns-server && systemctl enable --now pdns"
)

declare -A SERVICE_INSTALL_FEDORA=(
    ["sshd"]="dnf install -y openssh-server && systemctl enable --now sshd"
    ["nginx"]="dnf install -y nginx && systemctl enable --now nginx"
    ["docker"]="dnf install -y docker && systemctl enable --now docker"
    ["wireguard"]="dnf install -y wireguard-tools"
    ["postgresql"]="dnf install -y postgresql-server && postgresql-setup --initdb && systemctl enable --now postgresql"
    ["redis"]="dnf install -y redis && systemctl enable --now redis"
)

CRITICAL_SERVICES=("sshd")

# Check dependencies
check_deps() {
    if ! command -v yq &>/dev/null; then
        echo -e "${RED}yq not found. Install with: brew install yq${NC}"
        exit 1
    fi
    if ! command -v jq &>/dev/null; then
        echo -e "${RED}jq not found. Install with: brew install jq${NC}"
        exit 1
    fi
}

# Find all host YAML files
find_host_files() {
    find "$SCRIPT_DIR" -name "*.yaml" -type f \
        ! -name "index.yaml" \
        ! -path "*/schema/*" \
        2>/dev/null | sort
}

# Get host ID from YAML file
get_host_id() {
    local file="$1"
    yq e '.id' "$file" 2>/dev/null | grep -v '^null$' || basename "$file" .yaml
}

# Get property from host YAML file
get_prop() {
    local file="$1"
    local prop="$2"
    yq e ".$prop" "$file" 2>/dev/null | grep -v '^null$' || true
}

# Get required services from capabilities.services array
get_required_services() {
    local file="$1"
    yq e '.capabilities.services[]' "$file" 2>/dev/null || true
}

# Check if service is critical
is_critical() {
    local svc="$1"
    for critical in "${CRITICAL_SERVICES[@]}"; do
        [[ "$svc" == "$critical" ]] && return 0
    done
    return 1
}

# Check if we're on this host (avoid SSH to self)
is_local_host() {
    local ssh_host="$1"
    local current_hostname=$(hostname)
    local current_ips=$(hostname -I 2>/dev/null || ip -4 addr show 2>/dev/null | grep inet | awk '{print $2}' | cut -d/ -f1)

    [[ "$ssh_host" == "$current_hostname" ]] && return 0
    [[ "$ssh_host" == "localhost" ]] && return 0
    echo "$current_ips" | grep -qw "$ssh_host" && return 0
    return 1
}

# SSH wrapper - runs locally if on same host
ssh_to_host() {
    local ssh_host="$1"
    local ssh_user="$2"
    local ssh_key="$3"
    shift 3

    if is_local_host "$ssh_host"; then
        bash -c "$*" 2>/dev/null
        return $?
    fi

    ssh_key="${ssh_key/#\~/$HOME}"

    local ssh_opts="-o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no"
    if [[ -n "$ssh_key" && -f "$ssh_key" ]]; then
        ssh_opts="$ssh_opts -i $ssh_key"
    fi

    ssh $ssh_opts "${ssh_user}@${ssh_host}" "$@" 2>/dev/null
}

# Resolve vault:// key references
resolve_key_ref() {
    local keyRef="$1"
    if [[ "$keyRef" == vault://ssh-keys/* ]]; then
        echo "~/.ssh/${keyRef#vault://ssh-keys/}"
    else
        echo "$keyRef"
    fi
}

# Gather system info (Linux + macOS compatible)
gather_system_info() {
    local ssh_host="$1"
    local ssh_user="$2"
    local ssh_key="$3"

    ssh_to_host "$ssh_host" "$ssh_user" "$ssh_key" 'bash -c '\''
hostname=$(hostname -s 2>/dev/null || hostname)

# Detect OS
if [ -f /etc/os-release ]; then
    os=$(. /etc/os-release && echo $ID)
    os_version=$(. /etc/os-release && echo $VERSION_ID)
    os_family=$(. /etc/os-release && echo ${ID_LIKE:-$ID} | cut -d" " -f1)
elif command -v sw_vers >/dev/null 2>&1; then
    os="darwin"
    os_version=$(sw_vers -productVersion)
    os_family="darwin"
else
    os=$(uname -s | tr "[:upper:]" "[:lower:]")
    os_version=$(uname -r)
    os_family="unknown"
fi

kernel=$(uname -r)
arch=$(uname -m)

# CPU count
cpus=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)

# RAM in GB
if command -v free >/dev/null 2>&1; then
    ram_gb=$(free -g 2>/dev/null | awk "/Mem:/ {print \$2}")
elif command -v sysctl >/dev/null 2>&1; then
    ram_bytes=$(sysctl -n hw.memsize 2>/dev/null || echo 0)
    ram_gb=$((ram_bytes / 1073741824))
else
    ram_gb=0
fi

# Disk free in GB
if df -BG / >/dev/null 2>&1; then
    disk_root_gb=$(df -BG / | awk "NR==2 {gsub(/G/,\"\",\$4); print \$4}")
    disk_pct=$(df / | awk "NR==2 {print \$5}")
elif df -g / >/dev/null 2>&1; then
    disk_root_gb=$(df -g / | awk "NR==2 {print \$4}")
    disk_pct=$(df / | awk "NR==2 {print \$5}")
else
    disk_root_gb=0
    disk_pct="0%"
fi

# Uptime
up=$(uptime -p 2>/dev/null || uptime | sed "s/.*up //" | sed "s/,.*//" | xargs)

cat << EOF
{
  "hostname": "$hostname",
  "os": "$os",
  "os_version": "$os_version",
  "os_family": "$os_family",
  "kernel": "$kernel",
  "arch": "$arch",
  "cpus": $cpus,
  "ram_gb": $ram_gb,
  "disk_root_gb": $disk_root_gb,
  "disk_root_used_pct": "$disk_pct",
  "uptime": "$up"
}
EOF
'\'''
}

# Check a single service
check_service() {
    local ssh_host="$1"
    local ssh_user="$2"
    local ssh_key="$3"
    local svc="$4"
    local check_cmd="${SERVICE_CHECKS[$svc]:-}"

    if [[ -z "$check_cmd" ]]; then
        echo "unknown"
        return
    fi

    if ssh_to_host "$ssh_host" "$ssh_user" "$ssh_key" "$check_cmd" &>/dev/null; then
        echo "ok"
    else
        echo "missing"
    fi
}

# Print host report
print_host_report() {
    local displayName="$1"
    local fqdn="$2"
    local networkGroup="$3"
    local info="$4"

    local os=$(echo "$info" | jq -r '.os // "unknown"')
    local os_version=$(echo "$info" | jq -r '.os_version // ""')
    local cpus=$(echo "$info" | jq -r '.cpus // 0')
    local ram=$(echo "$info" | jq -r '.ram_gb // 0')
    local disk=$(echo "$info" | jq -r '.disk_root_gb // 0')
    local disk_pct=$(echo "$info" | jq -r '.disk_root_used_pct // "0%"')
    local uptime=$(echo "$info" | jq -r '.uptime // "unknown"')

    echo -e "${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${BOLD}  ${displayName}${NC}"
    echo -e "  ${CYAN}$fqdn${NC}  ${YELLOW}[$networkGroup]${NC}"
    echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo ""
    printf "  %-12s %s %s\n" "OS:" "$os" "$os_version"
    printf "  %-12s %s cores\n" "CPU:" "$cpus"
    printf "  %-12s %s GB\n" "RAM:" "$ram"
    printf "  %-12s %s GB free (%s used)\n" "Disk:" "$disk" "$disk_pct"
    printf "  %-12s %s\n" "Uptime:" "$uptime"
    echo ""
}

# Main check function for a single host file
check_host_file() {
    local file="$1"
    local fix_mode="${2:-check}"

    local host_id=$(get_host_id "$file")
    local displayName=$(get_prop "$file" "displayName")
    local fqdn=$(get_prop "$file" "fqdn")
    local networkGroup=$(get_prop "$file" "networkGroup")

    # Get SSH connection details
    local ssh_host=$(get_prop "$file" "ssh.host")
    local ssh_ip=$(get_prop "$file" "ssh.ip")
    local ssh_user=$(get_prop "$file" "ssh.user")
    local ssh_keyRef=$(get_prop "$file" "ssh.keyRef")
    local ssh_key=$(resolve_key_ref "$ssh_keyRef")

    # Use IP if host isn't resolvable
    [[ -z "$ssh_host" ]] && ssh_host="$ssh_ip"
    [[ -z "$ssh_user" ]] && ssh_user="root"

    echo -e "${CYAN}Checking $host_id ($ssh_host)...${NC}"

    # Test connectivity
    if ! ssh_to_host "$ssh_host" "$ssh_user" "$ssh_key" "true" 2>/dev/null; then
        echo -e "  ${RED}✗ Cannot connect to $ssh_host${NC}"
        echo ""
        return 1
    fi

    # Gather system info
    local info
    info=$(gather_system_info "$ssh_host" "$ssh_user" "$ssh_key") || {
        echo -e "  ${RED}✗ Failed to gather system info${NC}"
        echo ""
        return 1
    }

    local os=$(echo "$info" | jq -r '.os // "unknown"')

    print_host_report "${displayName:-$host_id}" "$fqdn" "$networkGroup" "$info"

    # Check required services from capabilities.services
    local services=$(get_required_services "$file")
    local missing=()
    local critical_missing=()

    if [[ -n "$services" ]]; then
        echo -e "  ${BOLD}Required Services:${NC}"

        for svc in $services; do
            local status=$(check_service "$ssh_host" "$ssh_user" "$ssh_key" "$svc")
            case "$status" in
                ok)
                    echo -e "    ${GREEN}✓${NC} $svc"
                    ;;
                missing)
                    if is_critical "$svc"; then
                        echo -e "    ${RED}✗ $svc (CRITICAL)${NC}"
                        critical_missing+=("$svc")
                    else
                        echo -e "    ${YELLOW}✗ $svc${NC}"
                    fi
                    missing+=("$svc")
                    ;;
                *)
                    echo -e "    ${YELLOW}? $svc (unknown check)${NC}"
                    ;;
            esac
        done
        echo ""
    fi

    # Check disk thresholds from alerts config
    local disk_threshold=$(get_prop "$file" "alerts.diskThreshold")
    if [[ -n "$disk_threshold" ]]; then
        local disk_pct_num=$(echo "$info" | jq -r '.disk_root_used_pct // "0%"' | tr -d '%')
        if [[ "$disk_pct_num" -ge "$disk_threshold" ]]; then
            echo -e "  ${RED}⚠ Disk usage ${disk_pct_num}% exceeds threshold ${disk_threshold}%${NC}"
        fi
    fi

    # Offer to fix missing services
    if [[ ${#missing[@]} -gt 0 && "$fix_mode" == "--fix" ]]; then
        echo ""
        for svc in "${missing[@]}"; do
            echo -n -e "${YELLOW}Install $svc on $host_id? [y/N] ${NC}"
            read -r response
            if [[ "$response" =~ ^[Yy] ]]; then
                # Determine OS family for install command
                local os_family="$os"
                case "$os" in
                    ubuntu|debian) os_family="debian" ;;
                    fedora|rhel|centos|rocky|alma) os_family="fedora" ;;
                esac

                local install_cmd=""
                if [[ "$os_family" == "debian" ]]; then
                    install_cmd="${SERVICE_INSTALL_DEBIAN[$svc]:-}"
                elif [[ "$os_family" == "fedora" ]]; then
                    install_cmd="${SERVICE_INSTALL_FEDORA[$svc]:-}"
                fi

                if [[ -n "$install_cmd" ]]; then
                    echo -e "${CYAN}Installing $svc...${NC}"
                    echo -e "${YELLOW}Command: sudo $install_cmd${NC}"
                    local ssh_opts="-o ConnectTimeout=10 -o StrictHostKeyChecking=no"
                    ssh_key="${ssh_key/#\~/$HOME}"
                    if [[ -n "$ssh_key" && -f "$ssh_key" ]]; then
                        ssh_opts="$ssh_opts -i $ssh_key"
                    fi
                    ssh -t $ssh_opts "${ssh_user}@${ssh_host}" "sudo bash -c '$install_cmd'"
                else
                    echo -e "${RED}No install command for $svc on $os_family${NC}"
                fi
            fi
        done
    elif [[ ${#critical_missing[@]} -gt 0 ]]; then
        echo -e "  ${RED}⚠ Run with --fix to install missing critical services${NC}"
    fi

    echo ""
}

# Main
main() {
    check_deps

    local mode="check"
    local target="all"

    # Parse args
    for arg in "$@"; do
        case "$arg" in
            --fix) mode="--fix" ;;
            *) target="$arg" ;;
        esac
    done

    echo -e "${BOLD}"
    echo "╔══════════════════════════════════════════════════════════════════╗"
    echo "║              Lilith Platform Infrastructure Check                ║"
    echo "╚══════════════════════════════════════════════════════════════════╝"
    echo -e "${NC}"
    echo ""

    if [[ "$target" == "all" ]]; then
        for file in $(find_host_files); do
            check_host_file "$file" "$mode" || true
        done
    else
        # Find host by ID
        local found=false
        for file in $(find_host_files); do
            local host_id=$(get_host_id "$file")
            if [[ "$host_id" == "$target" ]]; then
                check_host_file "$file" "$mode"
                found=true
                break
            fi
        done
        if [[ "$found" == "false" ]]; then
            echo -e "${RED}Host '$target' not found${NC}"
            echo "Available hosts:"
            for file in $(find_host_files); do
                echo "  - $(get_host_id "$file")"
            done
            exit 1
        fi
    fi

    echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "  Usage: $0 [--fix] [host|all]"
    echo -e "  Examples:"
    echo -e "    $0                  # Check all hosts"
    echo -e "    $0 --fix            # Check and offer to fix all"
    echo -e "    $0 --fix apricot    # Fix specific host"
    echo ""
}

main "$@"
