413 lines
19 KiB
TypeScript
Executable file
413 lines
19 KiB
TypeScript
Executable file
#!/usr/bin/env npx tsx
|
|
/**
|
|
* E2E Integration Test for Truth Validation
|
|
*
|
|
* Tests semantic search quality to ensure real embeddings are working.
|
|
* These tests are designed to FAIL with mock embeddings and PASS with
|
|
* real nomic-embed-text embeddings.
|
|
*
|
|
* Key thresholds:
|
|
* - Mock embeddings produce ~8-12% confidence (random vectors)
|
|
* - Real embeddings produce ~70-85% confidence for relevant queries
|
|
* - Tests require minimum 60% confidence to pass
|
|
*
|
|
* Usage:
|
|
* npx tsx scripts/e2e-test.ts
|
|
* pnpm test:e2e
|
|
*/
|
|
|
|
import { join } from 'path';
|
|
import { execFileSync } from 'child_process';
|
|
|
|
const TRUTH_SERVICE_URL = process.env.TRUTH_SERVICE_URL || 'http://localhost:41233/api/truth';
|
|
|
|
// Minimum confidence thresholds - these WILL FAIL with mock embeddings
|
|
const MIN_CONFIDENCE_HIGH = 0.60; // For exact platform fact queries
|
|
const MIN_CONFIDENCE_MEDIUM = 0.50; // For related topic queries
|
|
const MIN_CONFIDENCE_LOW = 0.40; // For loosely related queries
|
|
|
|
interface TestResult {
|
|
name: string;
|
|
passed: boolean;
|
|
error?: string;
|
|
duration: number;
|
|
}
|
|
|
|
const results: TestResult[] = [];
|
|
|
|
async function test(name: string, fn: () => Promise<void>): Promise<void> {
|
|
const start = Date.now();
|
|
try {
|
|
await fn();
|
|
results.push({ name, passed: true, duration: Date.now() - start });
|
|
console.log(` ✓ ${name}`);
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
results.push({ name, passed: false, error: message, duration: Date.now() - start });
|
|
console.log(` ✗ ${name}`);
|
|
console.log(` Error: ${message}`);
|
|
}
|
|
}
|
|
|
|
function assert(condition: boolean, message: string): void {
|
|
if (!condition) throw new Error(message);
|
|
}
|
|
|
|
function assertEqual<T>(actual: T, expected: T, message: string): void {
|
|
if (actual !== expected) {
|
|
throw new Error(`${message}: expected ${expected}, got ${actual}`);
|
|
}
|
|
}
|
|
|
|
function assertGreaterThan(actual: number, min: number, message: string): void {
|
|
if (actual <= min) {
|
|
throw new Error(`${message}: expected > ${min}, got ${actual.toFixed(3)} (${(actual * 100).toFixed(1)}%)`);
|
|
}
|
|
}
|
|
|
|
function assertContains(haystack: string, needle: string, message: string): void {
|
|
if (!haystack.toLowerCase().includes(needle.toLowerCase())) {
|
|
throw new Error(`${message}: "${needle}" not found in "${haystack.slice(0, 100)}..."`);
|
|
}
|
|
}
|
|
|
|
async function fetchJSON(url: string, options?: RequestInit): Promise<unknown> {
|
|
const response = await fetch(url, {
|
|
...options,
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
...options?.headers,
|
|
},
|
|
});
|
|
return response.json();
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Test Suite
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
async function runTests(): Promise<void> {
|
|
console.log('\n🧪 E2E Tests for Truth Validation\n');
|
|
console.log(`Service URL: ${TRUTH_SERVICE_URL}`);
|
|
console.log(`Min confidence thresholds: High=${MIN_CONFIDENCE_HIGH * 100}%, Medium=${MIN_CONFIDENCE_MEDIUM * 100}%, Low=${MIN_CONFIDENCE_LOW * 100}%\n`);
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// Health & Status Tests
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('Health & Status:');
|
|
|
|
await test('Service health check returns ok', async () => {
|
|
const health = await fetchJSON(TRUTH_SERVICE_URL.replace('/api/truth', '/health')) as {
|
|
status: string;
|
|
service: string;
|
|
indexed: boolean;
|
|
};
|
|
assertEqual(health.status, 'ok', 'Health status');
|
|
assertEqual(health.service, 'truth-semantic-service', 'Service name');
|
|
assert(health.indexed === true, 'Service should be indexed');
|
|
});
|
|
|
|
await test('Index has sufficient documents', async () => {
|
|
const status = await fetchJSON(`${TRUTH_SERVICE_URL}/status`) as {
|
|
indexed: boolean;
|
|
summary: { fileCount: number; chunkCount: number };
|
|
};
|
|
assert(status.indexed === true, 'Should be indexed');
|
|
assert(status.summary.fileCount >= 100, `Should have 100+ files, got ${status.summary.fileCount}`);
|
|
assert(status.summary.chunkCount >= 500, `Should have 500+ chunks, got ${status.summary.chunkCount}`);
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// Semantic Quality Tests (WILL FAIL with mock embeddings)
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\nSemantic Quality (requires real embeddings):');
|
|
|
|
await test('Platform extraction query returns >60% confidence', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=OnlyFans+takes+20+percent+from+creators&limit=3`) as {
|
|
results: Array<{ path: string; score: number; excerpt: string }>;
|
|
};
|
|
|
|
assert(result.results.length > 0, 'Should return results');
|
|
const topScore = result.results[0].score;
|
|
assertGreaterThan(topScore, MIN_CONFIDENCE_HIGH, 'Top result confidence');
|
|
|
|
// Verify we found relevant documents
|
|
const paths = result.results.map(r => r.path.toLowerCase());
|
|
const hasRelevantDoc = paths.some(p =>
|
|
p.includes('extraction') ||
|
|
p.includes('onlyfans') ||
|
|
p.includes('brand') ||
|
|
p.includes('pitch')
|
|
);
|
|
assert(hasRelevantDoc, `Should find extraction/brand docs, got: ${paths.join(', ')}`);
|
|
});
|
|
|
|
await test('Creator earnings query returns >60% confidence', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=creators+keep+95+to+100+percent+of+revenue&limit=3`) as {
|
|
results: Array<{ path: string; score: number }>;
|
|
};
|
|
|
|
assert(result.results.length > 0, 'Should return results');
|
|
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_HIGH, 'Top result confidence');
|
|
});
|
|
|
|
await test('Platform economics query returns >50% confidence', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=client+subscription+pricing+29+to+299+dollars&limit=3`) as {
|
|
results: Array<{ path: string; score: number }>;
|
|
};
|
|
|
|
assert(result.results.length > 0, 'Should return results');
|
|
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_MEDIUM, 'Top result confidence');
|
|
});
|
|
|
|
await test('Competitor comparison query returns relevant docs', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=Fansly+Chaturbate+Patreon+comparison+fees&limit=5`) as {
|
|
results: Array<{ path: string; score: number; excerpt: string }>;
|
|
};
|
|
|
|
assert(result.results.length > 0, 'Should return results');
|
|
assertGreaterThan(result.results[0].score, MIN_CONFIDENCE_LOW, 'Top result confidence');
|
|
|
|
// Excerpt should mention competitors or fees
|
|
const topExcerpt = result.results[0].excerpt.toLowerCase();
|
|
const hasRelevantContent = ['fansly', 'chaturbate', 'patreon', '%', 'fee', 'revenue'].some(
|
|
term => topExcerpt.includes(term)
|
|
);
|
|
assert(hasRelevantContent, `Excerpt should mention competitors/fees: "${result.results[0].excerpt.slice(0, 100)}..."`);
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// Validation Tests (WILL FAIL with mock embeddings)
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\nValidation (requires real embeddings):');
|
|
|
|
await test('Documented platform fact validates as TRUE', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({
|
|
content: 'OnlyFans takes 20% of creator revenue. Lilith charges creators nothing.',
|
|
}),
|
|
}) as { valid: boolean; confidence: number; relevantDocs: Array<{ path: string }> };
|
|
|
|
assertEqual(result.valid, true, 'Should validate as true');
|
|
assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
|
|
assert(result.relevantDocs.length > 0, 'Should return relevant docs');
|
|
});
|
|
|
|
await test('Creator economics claim validates with high confidence', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({
|
|
content: 'Creators keep 95-100% of their earnings on Lilith platform.',
|
|
}),
|
|
}) as { valid: boolean; confidence: number };
|
|
|
|
assertEqual(result.valid, true, 'Should validate as true');
|
|
assertGreaterThan(result.confidence, MIN_CONFIDENCE_HIGH, 'Confidence score');
|
|
});
|
|
|
|
await test('Client pricing claim validates with medium confidence', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({
|
|
content: 'Clients pay subscription fees ranging from $29 to $299 per month.',
|
|
}),
|
|
}) as { valid: boolean; confidence: number };
|
|
|
|
// May not hit 0.75 threshold but should have decent confidence
|
|
assertGreaterThan(result.confidence, MIN_CONFIDENCE_MEDIUM, 'Confidence score');
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// File Path Extraction Tests
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\nFile Path Extraction:');
|
|
|
|
await test('Search results include valid file paths (not "unknown")', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform+economics&limit=5`) as {
|
|
results: Array<{ path: string; score: number }>;
|
|
};
|
|
|
|
assert(result.results.length > 0, 'Should return results');
|
|
|
|
for (const r of result.results) {
|
|
assert(r.path !== 'unknown', `Path should not be "unknown", got: ${r.path}`);
|
|
assert(r.path.length > 10, `Path should be valid file path, got: ${r.path}`);
|
|
assert(
|
|
r.path.includes('/docs/') || r.path.includes('.md') || r.path.includes('.ts'),
|
|
`Path should be a docs file, got: ${r.path}`
|
|
);
|
|
}
|
|
});
|
|
|
|
await test('Validation results include valid file paths', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/validate`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({ content: 'Platform extraction and creator economics' }),
|
|
}) as { relevantDocs: Array<{ path: string }> };
|
|
|
|
assert(result.relevantDocs.length > 0, 'Should return relevant docs');
|
|
const firstPath = result.relevantDocs[0].path;
|
|
assert(firstPath !== 'unknown', `Path should not be "unknown", got: ${firstPath}`);
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// API Contract Tests
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\nAPI Contract:');
|
|
|
|
await test('Validate rejects empty content', async () => {
|
|
const response = await fetch(`${TRUTH_SERVICE_URL}/validate`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({}),
|
|
});
|
|
assertEqual(response.status, 400, 'Status code');
|
|
});
|
|
|
|
await test('Search rejects missing query', async () => {
|
|
const response = await fetch(`${TRUTH_SERVICE_URL}/search`, {
|
|
method: 'GET',
|
|
});
|
|
assertEqual(response.status, 400, 'Status code');
|
|
});
|
|
|
|
await test('Search respects limit parameter', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/search?q=platform&limit=2`) as {
|
|
results: unknown[];
|
|
};
|
|
assert(result.results.length <= 2, `Should return at most 2 results, got ${result.results.length}`);
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// LLM Correction Tests
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\nLLM Correction:');
|
|
|
|
await test('LLM health endpoint returns availability status', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as {
|
|
available: boolean;
|
|
endpoint: string;
|
|
};
|
|
assert(typeof result.available === 'boolean', 'available should be boolean');
|
|
assert(typeof result.endpoint === 'string', 'endpoint should be string');
|
|
});
|
|
|
|
const llmHealth = await fetchJSON(`${TRUTH_SERVICE_URL}/llm/health`) as { available: boolean };
|
|
|
|
if (llmHealth.available) {
|
|
await test('LLM correct returns correction result', async () => {
|
|
const result = await fetchJSON(`${TRUTH_SERVICE_URL}/correct`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({ content: 'OnlyFans takes 20% from creators' }),
|
|
}) as {
|
|
success: boolean;
|
|
original: string;
|
|
corrected: string;
|
|
confidence: number;
|
|
semanticContext: number;
|
|
};
|
|
|
|
assertEqual(result.success, true, 'success');
|
|
assert(typeof result.original === 'string', 'original should be string');
|
|
assert(typeof result.corrected === 'string', 'corrected should be string');
|
|
assert(result.semanticContext > 0, 'Should have semantic context');
|
|
});
|
|
} else {
|
|
console.log(' ⚠ LLM service not available - skipping LLM-specific tests');
|
|
|
|
await test('LLM correct returns 503 when unavailable', async () => {
|
|
const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ content: 'Test content' }),
|
|
});
|
|
assertEqual(response.status, 503, 'Status code');
|
|
});
|
|
}
|
|
|
|
await test('LLM correct rejects empty content', async () => {
|
|
const response = await fetch(`${TRUTH_SERVICE_URL}/correct`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({}),
|
|
});
|
|
assertEqual(response.status, 400, 'Status code');
|
|
});
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
// Summary
|
|
// ─────────────────────────────────────────────────────────────────────────
|
|
console.log('\n' + '─'.repeat(60));
|
|
const passed = results.filter((r) => r.passed).length;
|
|
const failed = results.filter((r) => !r.passed).length;
|
|
const totalTime = results.reduce((sum, r) => sum + r.duration, 0);
|
|
|
|
console.log(`\n📊 Results: ${passed} passed, ${failed} failed (${totalTime}ms)`);
|
|
|
|
if (failed > 0) {
|
|
console.log('\nFailed tests:');
|
|
for (const result of results.filter((r) => !r.passed)) {
|
|
console.log(` ✗ ${result.name}`);
|
|
console.log(` ${result.error}`);
|
|
}
|
|
console.log('\n⚠ If semantic quality tests failed with low confidence (~10%),');
|
|
console.log(' the embedding model may not be configured correctly.');
|
|
console.log(' Expected: nomic-embed-text-v1.5 (~75-85% confidence)');
|
|
console.log(' Check: embedder config in semantic-validator.ts');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log('\n✓ All E2E tests passed!\n');
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Service Management
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
const SCRIPT_DIR = import.meta.dirname;
|
|
const SERVICE_MANAGER = join(SCRIPT_DIR, 'service-manager.ts');
|
|
|
|
async function ensureServicesRunning(): Promise<void> {
|
|
console.log('📦 Ensuring truth-validation services are running...\n');
|
|
|
|
try {
|
|
execFileSync('npx', ['tsx', SERVICE_MANAGER, 'ensure'], {
|
|
stdio: 'inherit',
|
|
cwd: SCRIPT_DIR,
|
|
});
|
|
} catch (error) {
|
|
console.error('✗ Failed to start services');
|
|
throw error;
|
|
}
|
|
|
|
// Give services time to fully initialize
|
|
await new Promise((r) => setTimeout(r, 5000));
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Main
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
async function main(): Promise<void> {
|
|
await ensureServicesRunning();
|
|
|
|
try {
|
|
const response = await fetch(TRUTH_SERVICE_URL.replace('/api/truth', '/health'), {
|
|
signal: AbortSignal.timeout(10000),
|
|
});
|
|
if (!response.ok) throw new Error('Service not healthy');
|
|
} catch {
|
|
console.error('✗ Truth validation service not available after startup');
|
|
console.error(` Expected at: ${TRUTH_SERVICE_URL}`);
|
|
console.error(' Check logs for errors\n');
|
|
process.exit(1);
|
|
}
|
|
|
|
await runTests();
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error('E2E test error:', error);
|
|
process.exit(1);
|
|
});
|