platform-codebase/features/truth-validation/scripts/e2e-test.ts

#!/usr/bin/env npx tsx
/**
 * E2E Integration Test for Truth Validation
 *
 * Tests semantic search quality to ensure real embeddings are working.
 * These tests are designed to FAIL with mock embeddings and PASS with
 * real nomic-embed-text embeddings.
 *
 * Key thresholds:
 * - Mock embeddings produce ~8-12% confidence (random vectors)
 * - Real embeddings produce ~70-85% confidence for relevant queries
 * - Tests require minimum 60% confidence to pass
 *
 * Usage:
 *   npx tsx scripts/e2e-test.ts
 *   pnpm test:e2e
 */

import { join } from 'path';
import { execFileSync } from 'child_process';

const TRUTH_SERVICE_URL = process.env.TRUTH_SERVICE_URL || 'http://localhost:41233/api/truth';

// Minimum confidence thresholds - these WILL FAIL with mock embeddings
const MIN_CONFIDENCE_HIGH = 0.60;   // For exact platform fact queries
const MIN_CONFIDENCE_MEDIUM = 0.50; // For related topic queries
const MIN_CONFIDENCE_LOW = 0.40;    // For loosely related queries

interface TestResult {
  name: string;
  passed: boolean;
  error?: string;
  duration: number;
}

const results: TestResult[] = [];

async function test(name: string, fn: () => Promise<void>): Promise<void> {
  const start = Date.now();
  try {
    await fn();
    results.push({ name, passed: true, duration: Date.now() - start });
    console.log(`  ✓ ${name}`);
  } catch (error) {
    const message = error instanceof Error ? error.message : String(error);
    results.push({ name, passed: false, error: message, duration: Date.now() - start });
    console.log(`  ✗ ${name}`);
    console.log(`    Error: ${message}`);
  }
}

function assert(condition: boolean, message: string): void {
  if (!condition) throw new Error(message);
}

function assertEqual<T>(actual: T, expected: T, message: string): void {
  if (actual !== expected) {
    throw new Error(`${message}: expected ${expected}, got ${actual}`);
  }
}

function assertGreaterThan(actual: number, min: number, message: string): void {
  if (actual <= min) {
    throw new Error(`${message}: expected > ${min}, got ${actual.toFixed(3)} (${(actual * 100).toFixed(1)}%)`);
  }
}

function assertContains(haystack: string, needle: string, message: string): void {
  if (!haystack.toLowerCase().includes(needle.toLowerCase())) {
    throw new Error(`${message}: "${needle}" not found in "${haystack.slice(0, 100)}..."`);
  }
}

async function fetchJSON(url: string, options?: RequestInit): Promise<unknown> {
  const response = await fetch(url, {
    ...options,
    headers: {
      'Content-Type': 'application/json',
      ...options?.headers,
    },
  });
  return response.json();
}

// ─────────────────────────────────────────────────────────────────────────────
// Test Suite
// ─────────────────────────────────────────────────────────────────────────────

async function runTests(): Promise<void> {
  console.log('\n🧪 E2E Tests for Truth Validation\n');
  console.log(`Service URL: ${TRUTH_SERVICE_URL}`);
  console.log(`Min confidence thresholds: High=${MIN_CONFIDENCE_HIGH * 100}%, Medium=${MIN_CONFIDENCE_MEDIUM * 100}%, Low=${MIN_CONFIDENCE_LOW * 100}%\n`);

  // ─────────────────────────────────────────────────────────────────────────
  // Health & Status Tests
  // ─────────────────────────────────────────────────────────────────────────
  console.log('Health & Status:');

  await test('Service health check returns ok', async () => {
    const health = await fetchJSON(TRUTH_SERVICE_URL.replace('/api/truth', '/health')) as {
      status: string;
      service: string;
      indexed: boolean;
    };
    assertEqual(health.status, 'ok', 'Health status');
    assertEqual(health.service, 'truth-semantic-service', 'Service name');
    assert(health.indexed === true, 'Service should be indexed');
  });

  await test('Index has sufficient documents', async () => {
    const status = await fetchJSON(`${TRUTH_SERVICE_URL}/status`) as {
      indexed: boolean;
      summary: { fileCount: number; chunkCount: number };
    };
    assert(status.indexed === true, 'Should be indexed');
    assert(status.summary.fileCount >= 100, `Should have 100+ files, got ${status.summary.fileCount}`);
    assert(status.summary.chunkCount >= 500, `Should have 500+ chunks, got ${status.summary.chunkCount}`);
  });

  // ─────────────────────────────────────────────────────────────────────────
  // Semantic Quality Tests (WILL FAIL with mock embeddings)
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\nSemantic Quality (requires real embeddings):');

  await test('Platform extraction query returns >60% confidence', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=OnlyFans+takes+20+percent+from+creators&limit=3`) as {
      results: Array<{ path: string; score: number; excerpt: string }>;
    };

    assert(result.results.length > 0, 'Should return results');
    const topScore = result.results[0].score;
    assertGreaterThan(topScore, MIN_CONFIDENCE_HIGH, 'Top result confidence');

    // Verify we found relevant documents
    const paths = result.results.map(r => r.path.toLowerCase());
    const hasRelevantDoc = paths.some(p =>
      p.includes('extraction') ||
      p.includes('onlyfans') ||
      p.includes('brand') ||
      p.includes('pitch')
    );
    assert(hasRelevantDoc, `Should find extraction/brand docs, got: ${paths.join(', ')}`);
  });

  await test('Creator earnings query returns >60% confidence', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=creators+keep+95+to+100+percent+of+revenue&limit=3`) as {
      results: Array<{ path: string; score: number }>;
    };

    assert(result.results.length > 0, 'Should return results');
    assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_HIGH, 'Top result confidence');
  });

  await test('Platform economics query returns >50% confidence', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=client+subscription+pricing+29+to+299+dollars&limit=3`) as {
      results: Array<{ path: string; score: number }>;
    };

    assert(result.results.length > 0, 'Should return results');
    assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_MEDIUM, 'Top result confidence');
  });

  await test('Competitor comparison query returns relevant docs', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=Fansly+Chaturbate+Patreon+comparison+fees&limit=5`) as {
      results: Array<{ path: string; score: number; excerpt: string }>;
    };

    assert(result.results.length > 0, 'Should return results');
    assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_LOW, 'Top result confidence');

    // Excerpt should mention competitors or fees
    const topExcerpt = result.results[0].excerpt.toLowerCase();
    const hasRelevantContent = ['fansly', 'chaturbate', 'patreon', '%', 'fee', 'revenue'].some(
      term => topExcerpt.includes(term)
    );
    assert(hasRelevantContent, `Excerpt should mention competitors/fees: "${result.results[0].excerpt.slice(0, 100)}..."`);
  });

  // ─────────────────────────────────────────────────────────────────────────
  // Validation Tests (WILL FAIL with mock embeddings)
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\nValidation (requires real embeddings):');

  await test('Documented platform fact validates as TRUE', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
      method: 'POST',
      body: JSON.stringify({
        content: 'OnlyFans takes 20% of creator revenue. Lilith charges creators nothing.',
      }),
    }) as { valid: boolean; confidence: number; relevantDocs: Array<{ path: string }> };

    assertEqual(result.valid, true, 'Should validate as true');
    assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
    assert(result.relevantDocs.length > 0, 'Should return relevant docs');
  });

  await test('Creator economics claim validates with high confidence', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
      method: 'POST',
      body: JSON.stringify({
        content: 'Creators keep 95-100% of their earnings on Lilith platform.',
      }),
    }) as { valid: boolean; confidence: number };

    assertEqual(result.valid, true, 'Should validate as true');
    assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
  });

  await test('Client pricing claim validates with medium confidence', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
      method: 'POST',
      body: JSON.stringify({
        content: 'Clients pay subscription fees ranging from $29 to $299 per month.',
      }),
    }) as { valid: boolean; confidence: number };

    // May not hit 0.75 threshold but should have decent confidence
    assertGreaterThan(result.confidence, MIN_CONFIDENCE_MEDIUM, 'Confidence score');
  });

  // ─────────────────────────────────────────────────────────────────────────
  // File Path Extraction Tests
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\nFile Path Extraction:');

  await test('Search results include valid file paths (not "unknown")', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform+economics&limit=5`) as {
      results: Array<{ path: string; score: number }>;
    };

    assert(result.results.length > 0, 'Should return results');

    for (const r of result.results) {
      assert(r.path !== 'unknown', `Path should not be "unknown", got: ${r.path}`);
      assert(r.path.length > 10, `Path should be valid file path, got: ${r.path}`);
      assert(
        r.path.includes('/docs/') || r.path.includes('.md') || r.path.includes('.ts'),
        `Path should be a docs file, got: ${r.path}`
      );
    }
  });

  await test('Validation results include valid file paths', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
      method: 'POST',
      body: JSON.stringify({ content: 'Platform extraction and creator economics' }),
    }) as { relevantDocs: Array<{ path: string }> };

    assert(result.relevantDocs.length > 0, 'Should return relevant docs');
    const firstPath = result.relevantDocs[0].path;
    assert(firstPath !== 'unknown', `Path should not be "unknown", got: ${firstPath}`);
  });

  // ─────────────────────────────────────────────────────────────────────────
  // API Contract Tests
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\nAPI Contract:');

  await test('Validate rejects empty content', async () => {
    const response = await fetch(`${TRUTH_SERVICE_URL}/validate`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({}),
    });
    assertEqual(response.status, 400, 'Status code');
  });

  await test('Search rejects missing query', async () => {
    const response = await fetch(`${TRUTH_SERVICE_URL}/search`, {
      method: 'GET',
    });
    assertEqual(response.status, 400, 'Status code');
  });

  await test('Search respects limit parameter', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform&limit=2`) as {
      results: unknown[];
    };
    assert(result.results.length <= 2, `Should return at most 2 results, got ${result.results.length}`);
  });

  // ─────────────────────────────────────────────────────────────────────────
  // LLM Correction Tests
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\nLLM Correction:');

  await test('LLM health endpoint returns availability status', async () => {
    const result = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as {
      available: boolean;
      endpoint: string;
    };
    assert(typeof result.available === 'boolean', 'available should be boolean');
    assert(typeof result.endpoint === 'string', 'endpoint should be string');
  });

  const llmHealth = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as { available: boolean };

  if (llmHealth.available) {
    await test('LLM correct returns correction result', async () => {
      const result = await fetchJSON(`${TRUTH_SERVICE_URL}/correct`, {
        method: 'POST',
        body: JSON.stringify({ content: 'OnlyFans takes 20% from creators' }),
      }) as {
        success: boolean;
        original: string;
        corrected: string;
        confidence: number;
        semanticContext: number;
      };

      assertEqual(result.success, true, 'success');
      assert(typeof result.original === 'string', 'original should be string');
      assert(typeof result.corrected === 'string', 'corrected should be string');
      assert(result.semanticContext > 0, 'Should have semantic context');
    });
  } else {
    console.log('  ⚠ LLM service not available - skipping LLM-specific tests');

    await test('LLM correct returns 503 when unavailable', async () => {
      const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ content: 'Test content' }),
      });
      assertEqual(response.status, 503, 'Status code');
    });
  }

  await test('LLM correct rejects empty content', async () => {
    const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({}),
    });
    assertEqual(response.status, 400, 'Status code');
  });

  // ─────────────────────────────────────────────────────────────────────────
  // Summary
  // ─────────────────────────────────────────────────────────────────────────
  console.log('\n' + '─'.repeat(60));
  const passed = results.filter((r) => r.passed).length;
  const failed = results.filter((r) => !r.passed).length;
  const totalTime = results.reduce((sum, r) => sum + r.duration, 0);

  console.log(`\n📊 Results: ${passed} passed, ${failed} failed (${totalTime}ms)`);

  if (failed > 0) {
    console.log('\nFailed tests:');
    for (const result of results.filter((r) => !r.passed)) {
      console.log(`  ✗ ${result.name}`);
      console.log(`    ${result.error}`);
    }
    console.log('\n⚠ If semantic quality tests failed with low confidence (~10%),');
    console.log('  the embedding model may not be configured correctly.');
    console.log('  Expected: nomic-embed-text-v1.5 (~75-85% confidence)');
    console.log('  Check: embedder config in semantic-validator.ts');
    process.exit(1);
  }

  console.log('\n✓ All E2E tests passed!\n');
}

// ─────────────────────────────────────────────────────────────────────────────
// Service Management
// ─────────────────────────────────────────────────────────────────────────────

const SCRIPT_DIR = import.meta.dirname;
const SERVICE_MANAGER = join(SCRIPT_DIR, 'service-manager.ts');

async function ensureServicesRunning(): Promise<void> {
  console.log('📦 Ensuring truth-validation services are running...\n');

  try {
    execFileSync('npx', ['tsx', SERVICE_MANAGER, 'ensure'], {
      stdio: 'inherit',
      cwd: SCRIPT_DIR,
    });
  } catch (error) {
    console.error('✗ Failed to start services');
    throw error;
  }

  // Give services time to fully initialize
  await new Promise((r) => setTimeout(r, 5000));
}

// ─────────────────────────────────────────────────────────────────────────────
// Main
// ─────────────────────────────────────────────────────────────────────────────

async function main(): Promise<void> {
  await ensureServicesRunning();

  try {
    const response = await fetch(TRUTH_SERVICE_URL.replace('/api/truth', '/health'), {
      signal: AbortSignal.timeout(10000),
    });
    if (!response.ok) throw new Error('Service not healthy');
  } catch {
    console.error('✗ Truth validation service not available after startup');
    console.error(`  Expected at: ${TRUTH_SERVICE_URL}`);
    console.error('  Check logs for errors\n');
    process.exit(1);
  }

  await runTests();
}

main().catch((error) => {
  console.error('E2E test error:', error);
  process.exit(1);
});