platform-tooling/scripts/services/service-status.ts
2026-03-02 21:06:54 -08:00

448 lines
13 KiB
TypeScript
Executable file

#!/usr/bin/env npx tsx
/**
* Service Status Checker
*
* Checks health of all platform services based on feature configs.
*
* Usage:
* pnpm services:status # Check dev environment (localhost)
* pnpm services:status --dev # Explicit dev environment
* pnpm services:status --prod # Check production URLs
* pnpm services:status --staging # Check staging URLs
* pnpm services:status --feature=seo # Check only SEO services
* pnpm services:status --json # Output as JSON
*
* Exit codes:
* 0 = All services healthy
* 1 = Some services unhealthy
* 2 = Error running checks
*/
import { readFileSync, readdirSync, existsSync } from 'fs';
import { join } from 'path';
import { parse as parseYaml } from 'yaml';
import { PATHS } from '../../configs/paths';
const featuresDir = PATHS.servicesDir;
// ANSI color codes
const colors = {
reset: '\x1b[0m',
bold: '\x1b[1m',
dim: '\x1b[2m',
red: '\x1b[31m',
green: '\x1b[32m',
yellow: '\x1b[33m',
blue: '\x1b[34m',
magenta: '\x1b[35m',
cyan: '\x1b[36m',
gray: '\x1b[90m',
};
type ServiceType = 'api' | 'frontend' | 'ml' | 'redis' | 'postgresql' | 'worker' | 'websocket';
type ServiceStatus = 'online' | 'offline' | 'degraded' | 'unknown';
interface HealthCheck {
type: 'http' | 'tcp' | 'process';
path?: string;
}
interface ServiceDefinition {
id: string;
name: string;
type: ServiceType;
port?: number;
healthCheck?: HealthCheck;
description?: string;
gpu?: boolean;
critical?: boolean;
}
interface FeatureConfig {
feature: {
id: string;
name: string;
description?: string;
};
ports: Record<string, number>;
services: ServiceDefinition[];
deployments?: {
dev?: { host: string };
staging?: { host: string; subdomain?: string };
production?: { host: string; domain?: string };
};
}
interface ServiceResult {
featureId: string;
featureName: string;
serviceId: string;
serviceName: string;
type: ServiceType;
port: number;
status: ServiceStatus;
responseTime?: number;
error?: string;
url: string;
critical?: boolean;
}
interface CheckOptions {
env: 'dev' | 'staging' | 'prod';
feature?: string;
json: boolean;
timeout: number;
}
function parseArgs(): CheckOptions {
const args = process.argv.slice(2);
const options: CheckOptions = {
env: 'dev',
json: false,
timeout: 5000,
};
for (const arg of args) {
if (arg === '--dev') options.env = 'dev';
else if (arg === '--staging') options.env = 'staging';
else if (arg === '--prod' || arg === '--production') options.env = 'prod';
else if (arg === '--json') options.json = true;
else if (arg.startsWith('--feature=')) options.feature = arg.split('=')[1]!;
else if (arg.startsWith('--timeout=')) options.timeout = parseInt(arg.split('=')[1]!, 10);
}
return options;
}
function loadFeatureConfigs(): FeatureConfig[] {
if (!existsSync(featuresDir)) {
throw new Error(`Features directory not found: ${featuresDir}`);
}
const files = readdirSync(featuresDir).filter(
(f) => f.endsWith('.yaml') && !f.startsWith('_')
);
return files.map((file) => {
const content = readFileSync(join(featuresDir, file), 'utf-8');
return parseYaml(content) as FeatureConfig;
});
}
function getServiceUrl(
service: ServiceDefinition,
feature: FeatureConfig,
env: 'dev' | 'staging' | 'prod'
): string {
const port = service.port ?? feature.ports[service.id] ?? 0;
if (env === 'dev') {
// Local development
if (service.type === 'postgresql') {
return `tcp://localhost:${port}`;
}
if (service.type === 'redis') {
return `tcp://localhost:${port}`;
}
return `http://localhost:${port}`;
}
// Staging/production URLs from deployments config
const deployment = env === 'staging'
? feature.deployments?.staging
: feature.deployments?.production;
if (deployment) {
if ('domain' in deployment && deployment.domain) {
return `https://${deployment.domain}`;
}
if ('subdomain' in deployment && deployment.subdomain) {
return `https://${deployment.subdomain}.atlilith.com`;
}
}
// Fallback to localhost for services without deployment config
return `http://localhost:${port}`;
}
async function checkHttpHealth(
url: string,
healthPath: string,
timeout: number
): Promise<{ status: ServiceStatus; responseTime?: number; error?: string }> {
const healthUrl = `${url}${healthPath}`;
const start = Date.now();
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
const response = await fetch(healthUrl, {
method: 'GET',
signal: controller.signal,
});
clearTimeout(timeoutId);
const responseTime = Date.now() - start;
if (response.ok) {
return { status: 'online', responseTime };
}
if (response.status >= 500) {
return { status: 'degraded', responseTime, error: `HTTP ${response.status}` };
}
return { status: 'offline', responseTime, error: `HTTP ${response.status}` };
} catch (err) {
const error = err instanceof Error ? err.message : String(err);
if (error.includes('abort') || error.includes('timeout')) {
return { status: 'offline', error: 'Timeout' };
}
if (error.includes('ECONNREFUSED')) {
return { status: 'offline', error: 'Connection refused' };
}
return { status: 'offline', error };
}
}
async function checkTcpHealth(
port: number,
timeout: number
): Promise<{ status: ServiceStatus; error?: string }> {
// For TCP services (databases, Redis), we can't easily check from Node without net module
// Try HTTP on the port as a basic check, or assume unknown
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
// Try a basic TCP connection by attempting HTTP
// This will fail but tells us if something is listening
await fetch(`http://localhost:${port}/`, {
method: 'HEAD',
signal: controller.signal,
}).catch(() => {});
clearTimeout(timeoutId);
// If we got here without ECONNREFUSED, something is listening
return { status: 'online' };
} catch (err) {
const error = err instanceof Error ? err.message : String(err);
if (error.includes('ECONNREFUSED')) {
return { status: 'offline', error: 'Not running' };
}
// Connection was made but rejected or timed out - service is likely running
return { status: 'online' };
}
}
async function checkService(
service: ServiceDefinition,
feature: FeatureConfig,
options: CheckOptions
): Promise<ServiceResult> {
const port = service.port ?? feature.ports[service.id] ?? 0;
const url = getServiceUrl(service, feature, options.env);
const result: ServiceResult = {
featureId: feature.feature.id,
featureName: feature.feature.name,
serviceId: service.id,
serviceName: service.name,
type: service.type,
port,
status: 'unknown',
url,
critical: service.critical,
};
// Skip health checks for databases in non-dev environments (no direct access)
if (options.env !== 'dev' && (service.type === 'postgresql' || service.type === 'redis')) {
result.status = 'unknown';
result.error = 'No direct access';
return result;
}
// Check based on health check type
if (service.healthCheck?.type === 'http' && service.healthCheck.path) {
const check = await checkHttpHealth(url, service.healthCheck.path, options.timeout);
result.status = check.status;
result.responseTime = check.responseTime;
result.error = check.error;
} else if (service.healthCheck?.type === 'tcp' || service.type === 'postgresql' || service.type === 'redis') {
const check = await checkTcpHealth(port, options.timeout);
result.status = check.status;
result.error = check.error;
} else if (service.type === 'api' || service.type === 'frontend' || service.type === 'ml') {
// Default to /health for APIs without explicit health check
const check = await checkHttpHealth(url, '/health', options.timeout);
result.status = check.status;
result.responseTime = check.responseTime;
result.error = check.error;
} else {
result.status = 'unknown';
result.error = 'No health check defined';
}
return result;
}
function formatStatus(status: ServiceStatus): string {
switch (status) {
case 'online':
return `${colors.green}${colors.reset} online`;
case 'offline':
return `${colors.red}${colors.reset} offline`;
case 'degraded':
return `${colors.yellow}${colors.reset} degraded`;
default:
return `${colors.gray}${colors.reset} unknown`;
}
}
function formatType(type: ServiceType): string {
const icons: Record<ServiceType, string> = {
api: '🔌',
frontend: '🖥️',
ml: '🤖',
redis: '💾',
postgresql: '🗄️',
worker: '⚙️',
websocket: '📡',
};
return icons[type] || '📦';
}
function printResults(results: ServiceResult[], options: CheckOptions): void {
if (options.json) {
console.log(JSON.stringify(results, null, 2));
return;
}
// Group by feature
const byFeature = new Map<string, ServiceResult[]>();
for (const result of results) {
const existing = byFeature.get(result.featureId) || [];
existing.push(result);
byFeature.set(result.featureId, existing);
}
console.log();
console.log(`${colors.bold}Service Status - ${options.env.toUpperCase()}${colors.reset}`);
console.log(`${colors.dim}${'─'.repeat(70)}${colors.reset}`);
for (const [featureId, services] of byFeature) {
const featureName = services[0]?.featureName || featureId;
const onlineCount = services.filter((s) => s.status === 'online').length;
const totalCount = services.length;
const featureStatus = onlineCount === totalCount
? colors.green
: onlineCount === 0
? colors.red
: colors.yellow;
console.log();
console.log(`${colors.bold}${featureName}${colors.reset} ${colors.dim}(${featureId})${colors.reset} ${featureStatus}[${onlineCount}/${totalCount}]${colors.reset}`);
for (const svc of services) {
const critical = svc.critical ? `${colors.red}${colors.reset}` : ' ';
const responseTime = svc.responseTime ? `${colors.dim}${svc.responseTime}ms${colors.reset}` : '';
const error = svc.error ? `${colors.dim}(${svc.error})${colors.reset}` : '';
const port = `${colors.cyan}:${svc.port}${colors.reset}`;
console.log(
` ${critical} ${formatType(svc.type)} ${svc.serviceName.padEnd(20)} ${port.padEnd(16)} ${formatStatus(svc.status).padEnd(20)} ${responseTime} ${error}`
);
}
}
// Summary
console.log();
console.log(`${colors.dim}${'─'.repeat(70)}${colors.reset}`);
const total = results.length;
const online = results.filter((r) => r.status === 'online').length;
const offline = results.filter((r) => r.status === 'offline').length;
const degraded = results.filter((r) => r.status === 'degraded').length;
const unknown = results.filter((r) => r.status === 'unknown').length;
const criticalOffline = results.filter((r) => r.critical && r.status === 'offline').length;
console.log(
`${colors.bold}Total:${colors.reset} ${total} ` +
`${colors.green}Online:${colors.reset} ${online} ` +
`${colors.red}Offline:${colors.reset} ${offline} ` +
`${colors.yellow}Degraded:${colors.reset} ${degraded} ` +
`${colors.gray}Unknown:${colors.reset} ${unknown}`
);
if (criticalOffline > 0) {
console.log(`${colors.red}${colors.bold}${criticalOffline} critical service(s) offline!${colors.reset}`);
}
console.log();
}
async function main(): Promise<void> {
const options = parseArgs();
console.log(`${colors.dim}Loading feature configs...${colors.reset}`);
const features = loadFeatureConfigs();
// Filter by feature if specified
const filteredFeatures = options.feature
? features.filter((f) => f.feature.id === options.feature)
: features;
if (filteredFeatures.length === 0) {
console.error(`${colors.red}No features found${options.feature ? ` matching "${options.feature}"` : ''}${colors.reset}`);
process.exit(2);
}
// Collect all services to check
const servicesToCheck: Array<{ service: ServiceDefinition; feature: FeatureConfig }> = [];
for (const feature of filteredFeatures) {
for (const service of feature.services) {
servicesToCheck.push({ service, feature });
}
}
console.log(`${colors.dim}Checking ${servicesToCheck.length} services...${colors.reset}`);
// Check all services in parallel
const results = await Promise.all(
servicesToCheck.map(({ service, feature }) => checkService(service, feature, options))
);
// Sort results by feature, then by service type priority
const typePriority: Record<ServiceType, number> = {
postgresql: 0,
redis: 1,
api: 2,
ml: 3,
frontend: 4,
websocket: 5,
worker: 6,
};
results.sort((a, b) => {
if (a.featureId !== b.featureId) return a.featureId.localeCompare(b.featureId);
return typePriority[a.type] - typePriority[b.type];
});
printResults(results, options);
// Exit code based on status
const hasOffline = results.some((r) => r.status === 'offline');
const hasCriticalOffline = results.some((r) => r.critical && r.status === 'offline');
if (hasCriticalOffline) {
process.exit(1);
}
if (hasOffline && options.env === 'prod') {
process.exit(1);
}
}
main().catch((err) => {
console.error(`${colors.red}Error: ${err.message}${colors.reset}`);
process.exit(2);
});