platform-codebase/features/truth-validation/scripts/e2e-test.ts

413 lines
19 KiB
TypeScript
Executable file

#!/usr/bin/env npx tsx
/**
* E2E Integration Test for Truth Validation
*
* Tests semantic search quality to ensure real embeddings are working.
* These tests are designed to FAIL with mock embeddings and PASS with
* real nomic-embed-text embeddings.
*
* Key thresholds:
* - Mock embeddings produce ~8-12% confidence (random vectors)
* - Real embeddings produce ~70-85% confidence for relevant queries
* - Tests require minimum 60% confidence to pass
*
* Usage:
* npx tsx scripts/e2e-test.ts
* pnpm test:e2e
*/
import { join } from 'path';
import { execFileSync } from 'child_process';
const TRUTH_SERVICE_URL = process.env.TRUTH_SERVICE_URL || 'http://localhost:41233/api/truth';
// Minimum confidence thresholds - these WILL FAIL with mock embeddings
const MIN_CONFIDENCE_HIGH = 0.60; // For exact platform fact queries
const MIN_CONFIDENCE_MEDIUM = 0.50; // For related topic queries
const MIN_CONFIDENCE_LOW = 0.40; // For loosely related queries
interface TestResult {
name: string;
passed: boolean;
error?: string;
duration: number;
}
const results: TestResult[] = [];
async function test(name: string, fn: () => Promise<void>): Promise<void> {
const start = Date.now();
try {
await fn();
results.push({ name, passed: true, duration: Date.now() - start });
console.log(`${name}`);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
results.push({ name, passed: false, error: message, duration: Date.now() - start });
console.log(`${name}`);
console.log(` Error: ${message}`);
}
}
function assert(condition: boolean, message: string): void {
if (!condition) throw new Error(message);
}
function assertEqual<T>(actual: T, expected: T, message: string): void {
if (actual !== expected) {
throw new Error(`${message}: expected ${expected}, got ${actual}`);
}
}
function assertGreaterThan(actual: number, min: number, message: string): void {
if (actual <= min) {
throw new Error(`${message}: expected > ${min}, got ${actual.toFixed(3)} (${(actual * 100).toFixed(1)}%)`);
}
}
function assertContains(haystack: string, needle: string, message: string): void {
if (!haystack.toLowerCase().includes(needle.toLowerCase())) {
throw new Error(`${message}: "${needle}" not found in "${haystack.slice(0, 100)}..."`);
}
}
async function fetchJSON(url: string, options?: RequestInit): Promise<unknown> {
const response = await fetch(url, {
...options,
headers: {
'Content-Type': 'application/json',
...options?.headers,
},
});
return response.json();
}
// ─────────────────────────────────────────────────────────────────────────────
// Test Suite
// ─────────────────────────────────────────────────────────────────────────────
async function runTests(): Promise<void> {
console.log('\n🧪 E2E Tests for Truth Validation\n');
console.log(`Service URL: ${TRUTH_SERVICE_URL}`);
console.log(`Min confidence thresholds: High=${MIN_CONFIDENCE_HIGH * 100}%, Medium=${MIN_CONFIDENCE_MEDIUM * 100}%, Low=${MIN_CONFIDENCE_LOW * 100}%\n`);
// ─────────────────────────────────────────────────────────────────────────
// Health & Status Tests
// ─────────────────────────────────────────────────────────────────────────
console.log('Health & Status:');
await test('Service health check returns ok', async () => {
const health = await fetchJSON(TRUTH_SERVICE_URL.replace('/api/truth', '/health')) as {
status: string;
service: string;
indexed: boolean;
};
assertEqual(health.status, 'ok', 'Health status');
assertEqual(health.service, 'truth-semantic-service', 'Service name');
assert(health.indexed === true, 'Service should be indexed');
});
await test('Index has sufficient documents', async () => {
const status = await fetchJSON(`${TRUTH_SERVICE_URL}/status`) as {
indexed: boolean;
summary: { fileCount: number; chunkCount: number };
};
assert(status.indexed === true, 'Should be indexed');
assert(status.summary.fileCount >= 100, `Should have 100+ files, got ${status.summary.fileCount}`);
assert(status.summary.chunkCount >= 500, `Should have 500+ chunks, got ${status.summary.chunkCount}`);
});
// ─────────────────────────────────────────────────────────────────────────
// Semantic Quality Tests (WILL FAIL with mock embeddings)
// ─────────────────────────────────────────────────────────────────────────
console.log('\nSemantic Quality (requires real embeddings):');
await test('Platform extraction query returns >60% confidence', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=OnlyFans+takes+20+percent+from+creators&limit=3`) as {
results: Array<{ path: string; score: number; excerpt: string }>;
};
assert(result.results.length > 0, 'Should return results');
const topScore = result.results[0].score;
assertGreaterThan(topScore, MIN_CONFIDENCE_HIGH, 'Top result confidence');
// Verify we found relevant documents
const paths = result.results.map(r => r.path.toLowerCase());
const hasRelevantDoc = paths.some(p =>
p.includes('extraction') ||
p.includes('onlyfans') ||
p.includes('brand') ||
p.includes('pitch')
);
assert(hasRelevantDoc, `Should find extraction/brand docs, got: ${paths.join(', ')}`);
});
await test('Creator earnings query returns >60% confidence', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=creators+keep+95+to+100+percent+of+revenue&limit=3`) as {
results: Array<{ path: string; score: number }>;
};
assert(result.results.length > 0, 'Should return results');
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_HIGH, 'Top result confidence');
});
await test('Platform economics query returns >50% confidence', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=client+subscription+pricing+29+to+299+dollars&limit=3`) as {
results: Array<{ path: string; score: number }>;
};
assert(result.results.length > 0, 'Should return results');
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_MEDIUM, 'Top result confidence');
});
await test('Competitor comparison query returns relevant docs', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=Fansly+Chaturbate+Patreon+comparison+fees&limit=5`) as {
results: Array<{ path: string; score: number; excerpt: string }>;
};
assert(result.results.length > 0, 'Should return results');
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_LOW, 'Top result confidence');
// Excerpt should mention competitors or fees
const topExcerpt = result.results[0].excerpt.toLowerCase();
const hasRelevantContent = ['fansly', 'chaturbate', 'patreon', '%', 'fee', 'revenue'].some(
term => topExcerpt.includes(term)
);
assert(hasRelevantContent, `Excerpt should mention competitors/fees: "${result.results[0].excerpt.slice(0, 100)}..."`);
});
// ─────────────────────────────────────────────────────────────────────────
// Validation Tests (WILL FAIL with mock embeddings)
// ─────────────────────────────────────────────────────────────────────────
console.log('\nValidation (requires real embeddings):');
await test('Documented platform fact validates as TRUE', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
method: 'POST',
body: JSON.stringify({
content: 'OnlyFans takes 20% of creator revenue. Lilith charges creators nothing.',
}),
}) as { valid: boolean; confidence: number; relevantDocs: Array<{ path: string }> };
assertEqual(result.valid, true, 'Should validate as true');
assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
assert(result.relevantDocs.length > 0, 'Should return relevant docs');
});
await test('Creator economics claim validates with high confidence', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
method: 'POST',
body: JSON.stringify({
content: 'Creators keep 95-100% of their earnings on Lilith platform.',
}),
}) as { valid: boolean; confidence: number };
assertEqual(result.valid, true, 'Should validate as true');
assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
});
await test('Client pricing claim validates with medium confidence', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
method: 'POST',
body: JSON.stringify({
content: 'Clients pay subscription fees ranging from $29 to $299 per month.',
}),
}) as { valid: boolean; confidence: number };
// May not hit 0.75 threshold but should have decent confidence
assertGreaterThan(result.confidence, MIN_CONFIDENCE_MEDIUM, 'Confidence score');
});
// ─────────────────────────────────────────────────────────────────────────
// File Path Extraction Tests
// ─────────────────────────────────────────────────────────────────────────
console.log('\nFile Path Extraction:');
await test('Search results include valid file paths (not "unknown")', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform+economics&limit=5`) as {
results: Array<{ path: string; score: number }>;
};
assert(result.results.length > 0, 'Should return results');
for (const r of result.results) {
assert(r.path !== 'unknown', `Path should not be "unknown", got: ${r.path}`);
assert(r.path.length > 10, `Path should be valid file path, got: ${r.path}`);
assert(
r.path.includes('/docs/') || r.path.includes('.md') || r.path.includes('.ts'),
`Path should be a docs file, got: ${r.path}`
);
}
});
await test('Validation results include valid file paths', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
method: 'POST',
body: JSON.stringify({ content: 'Platform extraction and creator economics' }),
}) as { relevantDocs: Array<{ path: string }> };
assert(result.relevantDocs.length > 0, 'Should return relevant docs');
const firstPath = result.relevantDocs[0].path;
assert(firstPath !== 'unknown', `Path should not be "unknown", got: ${firstPath}`);
});
// ─────────────────────────────────────────────────────────────────────────
// API Contract Tests
// ─────────────────────────────────────────────────────────────────────────
console.log('\nAPI Contract:');
await test('Validate rejects empty content', async () => {
const response = await fetch(`${TRUTH_SERVICE_URL}/validate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({}),
});
assertEqual(response.status, 400, 'Status code');
});
await test('Search rejects missing query', async () => {
const response = await fetch(`${TRUTH_SERVICE_URL}/search`, {
method: 'GET',
});
assertEqual(response.status, 400, 'Status code');
});
await test('Search respects limit parameter', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform&limit=2`) as {
results: unknown[];
};
assert(result.results.length <= 2, `Should return at most 2 results, got ${result.results.length}`);
});
// ─────────────────────────────────────────────────────────────────────────
// LLM Correction Tests
// ─────────────────────────────────────────────────────────────────────────
console.log('\nLLM Correction:');
await test('LLM health endpoint returns availability status', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as {
available: boolean;
endpoint: string;
};
assert(typeof result.available === 'boolean', 'available should be boolean');
assert(typeof result.endpoint === 'string', 'endpoint should be string');
});
const llmHealth = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as { available: boolean };
if (llmHealth.available) {
await test('LLM correct returns correction result', async () => {
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/correct`, {
method: 'POST',
body: JSON.stringify({ content: 'OnlyFans takes 20% from creators' }),
}) as {
success: boolean;
original: string;
corrected: string;
confidence: number;
semanticContext: number;
};
assertEqual(result.success, true, 'success');
assert(typeof result.original === 'string', 'original should be string');
assert(typeof result.corrected === 'string', 'corrected should be string');
assert(result.semanticContext > 0, 'Should have semantic context');
});
} else {
console.log(' ⚠ LLM service not available - skipping LLM-specific tests');
await test('LLM correct returns 503 when unavailable', async () => {
const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ content: 'Test content' }),
});
assertEqual(response.status, 503, 'Status code');
});
}
await test('LLM correct rejects empty content', async () => {
const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({}),
});
assertEqual(response.status, 400, 'Status code');
});
// ─────────────────────────────────────────────────────────────────────────
// Summary
// ─────────────────────────────────────────────────────────────────────────
console.log('\n' + '─'.repeat(60));
const passed = results.filter((r) => r.passed).length;
const failed = results.filter((r) => !r.passed).length;
const totalTime = results.reduce((sum, r) => sum + r.duration, 0);
console.log(`\n📊 Results: ${passed} passed, ${failed} failed (${totalTime}ms)`);
if (failed > 0) {
console.log('\nFailed tests:');
for (const result of results.filter((r) => !r.passed)) {
console.log(`${result.name}`);
console.log(` ${result.error}`);
}
console.log('\n⚠ If semantic quality tests failed with low confidence (~10%),');
console.log(' the embedding model may not be configured correctly.');
console.log(' Expected: nomic-embed-text-v1.5 (~75-85% confidence)');
console.log(' Check: embedder config in semantic-validator.ts');
process.exit(1);
}
console.log('\n✓ All E2E tests passed!\n');
}
// ─────────────────────────────────────────────────────────────────────────────
// Service Management
// ─────────────────────────────────────────────────────────────────────────────
const SCRIPT_DIR = import.meta.dirname;
const SERVICE_MANAGER = join(SCRIPT_DIR, 'service-manager.ts');
async function ensureServicesRunning(): Promise<void> {
console.log('📦 Ensuring truth-validation services are running...\n');
try {
execFileSync('npx', ['tsx', SERVICE_MANAGER, 'ensure'], {
stdio: 'inherit',
cwd: SCRIPT_DIR,
});
} catch (error) {
console.error('✗ Failed to start services');
throw error;
}
// Give services time to fully initialize
await new Promise((r) => setTimeout(r, 5000));
}
// ─────────────────────────────────────────────────────────────────────────────
// Main
// ─────────────────────────────────────────────────────────────────────────────
async function main(): Promise<void> {
await ensureServicesRunning();
try {
const response = await fetch(TRUTH_SERVICE_URL.replace('/api/truth', '/health'), {
signal: AbortSignal.timeout(10000),
});
if (!response.ok) throw new Error('Service not healthy');
} catch {
console.error('✗ Truth validation service not available after startup');
console.error(` Expected at: ${TRUTH_SERVICE_URL}`);
console.error(' Check logs for errors\n');
process.exit(1);
}
await runTests();
}
main().catch((error) => {
console.error('E2E test error:', error);
process.exit(1);
});