chore(platform-admin-primary-scope): 🔧 Update TypeScript files in src directory

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-12 00:07:50 -08:00 · 2026-02-12 00:07:50 -08:00 · cab79a20c9
commit cab79a20c9
parent b25facc38f
11 changed files with 1040 additions and 126 deletions
--- a/features/truth-validation/semantic-service/scripts/extract-claims.ts
+++ b/features/truth-validation/semantic-service/scripts/extract-claims.ts
@ -4,51 +4,22 @@
 * Recursively scans all .md files in docs/ and extracts factual claims
 * using regex patterns. Outputs structured JSON for cross-validation.
 *
+ * Each claim is enriched with context: the nearest heading, surrounding
+ * paragraph, and 3 lines before/after for downstream classification.
+ *
 * Usage: bun run scripts/extract-claims.ts
 * Output: scripts/output/extracted-claims.json
 */

 import { readFileSync, readdirSync, statSync, mkdirSync, writeFileSync, existsSync } from 'node:fs';
 import { join, relative, extname } from 'node:path';
-
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
-type ClaimCategory =
-  | 'economics'
-  | 'competitors'
-  | 'technical'
-  | 'terminology'
-  | 'safety'
-  | 'legal';
-
-interface ExtractedClaim {
-  file: string;
-  line: number;
-  claim: string;
-  category: ClaimCategory;
-  rawText: string;
-}
-
-interface ExtractionResult {
-  claims: ExtractedClaim[];
-  totalFiles: number;
-  totalClaims: number;
-  byCategory: Record<ClaimCategory, number>;
-  extractedAt: string;
-}
+import { buildClaimContext } from './lib/context-enricher.ts';
+import type { ClaimCategory, ClaimContext, ClaimPattern, ExtractedClaim, ExtractionResult } from './lib/types.ts';

 // ---------------------------------------------------------------------------
 // Claim patterns
 // ---------------------------------------------------------------------------

-interface ClaimPattern {
-  regex: RegExp;
-  category: ClaimCategory;
-  label: string;
-}
-
 const CLAIM_PATTERNS: ClaimPattern[] = [
  // Economics: fee/percentage claims
  {
@ -112,6 +83,57 @@ const CLAIM_PATTERNS: ClaimPattern[] = [
  },
 ];

+// ---------------------------------------------------------------------------
+// Value extraction helpers
+// ---------------------------------------------------------------------------
+
+function extractMatchedValue(line: string, label: string): string {
+  switch (label) {
+    case 'percentage': {
+      const m = line.match(/(\d+(?:\.\d+)?)\s*%/);
+      return m ? `${m[1]}%` : '';
+    }
+    case 'dollar-amount': {
+      const m = line.match(/\$\s*(\d+(?:\.\d+)?)/);
+      return m ? `$${m[1]}` : '';
+    }
+    case 'creator-earnings': {
+      const m = line.match(/creators?\s+(keep|earn|receive|get|take)\s/i);
+      return m ? m[0].trim() : '';
+    }
+    case 'platform-fee': {
+      const m = line.match(/platform\s+(fee|charge|cost|commission|cut|take)/i);
+      return m ? m[0].trim() : '';
+    }
+    case 'competitor-mention': {
+      const m = line.match(/(onlyfans|chaturbate|fansly|pornhub|manyvids)/i);
+      return m ? m[1] : '';
+    }
+    case 'numerical-technical': {
+      const m = line.match(/\b(\d+)\s+(services?|features?|files?|packages?|endpoints?|routes?)\b/i);
+      return m ? `${m[1]} ${m[2].toLowerCase()}` : '';
+    }
+    case 'port-number': {
+      const m = line.match(/port\s+(\d{4,5})/i);
+      return m ? m[1] : '';
+    }
+    case 'safety-feature': {
+      const m = line.match(/(id\s+verif|background\s+check|escrow|smart\s+contract)/i);
+      return m ? m[1] : '';
+    }
+    case 'forbidden-term': {
+      const m = line.match(/\b(prostitute|hooker|whore|john)\b/i);
+      return m ? m[1] : '';
+    }
+    case 'legal-claim': {
+      const m = line.match(/(iceland|gdpr|eu\s+complian|data\s+protection|privacy\s+regulation)/i);
+      return m ? m[1] : '';
+    }
+    default:
+      return '';
+  }
+}
+
 // ---------------------------------------------------------------------------
 // File discovery
 // ---------------------------------------------------------------------------
@ -150,7 +172,7 @@ function findMarkdownFiles(dir: string): string[] {
 }

 // ---------------------------------------------------------------------------
-// Claim extraction
+// Claim extraction (with context)
 // ---------------------------------------------------------------------------

 function extractClaimsFromFile(filePath: string, docsRoot: string): ExtractedClaim[] {
@ -159,24 +181,38 @@ function extractClaimsFromFile(filePath: string, docsRoot: string): ExtractedCla
  const relativePath = relative(docsRoot, filePath);
  const claims: ExtractedClaim[] = [];

+  // Track code block state to skip fenced code
+  let inCodeBlock = false;
+
  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];
    const lineNumber = i + 1;

-    // Skip empty lines, code blocks, and HTML comments
-    if (!line.trim() || line.trim().startsWith('```') || line.trim().startsWith('<!--')) {
+    // Track fenced code blocks
+    if (line.trim().startsWith('```')) {
+      inCodeBlock = !inCodeBlock;
+      continue;
+    }
+
+    // Skip lines inside code blocks, empty lines, and HTML comments
+    if (inCodeBlock || !line.trim() || line.trim().startsWith('<!--')) {
      continue;
    }

    for (const pattern of CLAIM_PATTERNS) {
      const match = line.match(pattern.regex);
      if (match) {
+        const value = extractMatchedValue(line, pattern.label);
+        const context = buildClaimContext(lines, i);
+
        claims.push({
          file: relativePath,
          line: lineNumber,
          claim: `${pattern.label}: ${match[0]}`,
          category: pattern.category,
          rawText: line.trim(),
+          value,
+          context,
        });
      }
    }
@ -203,7 +239,7 @@ function main(): void {
  const files = findMarkdownFiles(docsRoot);
  console.log(`[extract-claims] Found ${files.length} markdown files`);

-  // Extract claims from each file
+  // Extract claims from each file (context is built inline per-file)
  const allClaims: ExtractedClaim[] = [];
  for (const file of files) {
    const claims = extractClaimsFromFile(file, docsRoot);
--- a/features/truth-validation/semantic-service/scripts/lib/claim-classifier.ts
+++ b/features/truth-validation/semantic-service/scripts/lib/claim-classifier.ts
@ -0,0 +1,369 @@
+/**
+ * Claim Classifier
+ *
+ * Auto-classifies extracted claims as false-positive vs real contradiction.
+ * This is the primary noise reduction layer — eliminates ~80% of false
+ * positives by detecting competitor context, unrelated metrics, comparative
+ * tables, and previously resolved claims.
+ *
+ * Classification rules are checked in order (first match wins):
+ *   1. Previously resolved (hash exists in resolution store)
+ *   2. Competitor context (mentions competitor, not about Lilith)
+ *   3. Unrelated metric (value appears in unrelated KPI context)
+ *   4. Comparative context (markdown table row comparing competitors)
+ *   5. Real contradiction (none of the above matched)
+ */
+
+import { createHash } from 'node:crypto';
+
+import type {
+  ExtractedClaim,
+  ClassifiedClaim,
+  Classification,
+  ClassificationSummary,
+} from './types.ts';
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/** Known competitor names for context detection. */
+const COMPETITOR_NAMES = [
+  'onlyfans',
+  'chaturbate',
+  'fansly',
+  'pornhub',
+  'manyvids',
+  'loyalfans',
+  'justforfans',
+  'cam4',
+  'stripchat',
+  'livejasmin',
+  'bongacams',
+  'myfreecams',
+  'xhamster',
+] as const;
+
+/** Regex matching any competitor name (case-insensitive). */
+const COMPETITOR_REGEX = new RegExp(COMPETITOR_NAMES.join('|'), 'i');
+
+/**
+ * Regex matching Lilith self-references.
+ * Word boundaries on "we" and "our" prevent false matches inside words
+ * like "were", "power", "tower", etc.
+ */
+const LILITH_SELF_REGEX = /lilith|\bwe\b|\bour\b/i;
+
+/**
+ * Keywords that indicate an unrelated metric context.
+ * When a numeric value appears near these words, the claim is likely
+ * about a KPI rather than a factual assertion we need to validate.
+ */
+const UNRELATED_METRIC_KEYWORDS = [
+  'retention',
+  'conversion',
+  'response rate',
+  'uptime',
+  'bounce',
+  'completion',
+  'satisfaction',
+  'engagement',
+  'growth',
+  'attrition',
+  'churn',
+  'adoption',
+  'utilization',
+  'capacity',
+  'latency',
+  'throughput',
+] as const;
+
+/** Regex matching any unrelated metric keyword (case-insensitive). */
+const UNRELATED_METRIC_REGEX = new RegExp(UNRELATED_METRIC_KEYWORDS.join('|'), 'i');
+
+/**
+ * Creator earnings language — when present alongside an unrelated metric
+ * keyword, the claim is actually about creator economics and should NOT
+ * be dismissed as an unrelated metric.
+ */
+const CREATOR_EARNINGS_REGEX = /creator.+(?:keep|earn|receive|get|take)/i;
+
+// ---------------------------------------------------------------------------
+// Hash computation
+// ---------------------------------------------------------------------------
+
+/**
+ * Compute a content-based hash for a claim.
+ *
+ * The hash is deterministic for the same (file, paragraph, value) triple,
+ * with whitespace in the paragraph collapsed so trivial formatting changes
+ * do not invalidate existing resolutions.
+ *
+ * @returns hex-encoded sha256 digest
+ */
+export function computeClaimHash(file: string, paragraph: string, value: string): string {
+  const normalizedParagraph = paragraph.replace(/\s+/g, ' ').trim();
+  const input = `${file}:${normalizedParagraph}:${value}`;
+  return createHash('sha256').update(input).digest('hex');
+}
+
+// ---------------------------------------------------------------------------
+// Single claim classification
+// ---------------------------------------------------------------------------
+
+/**
+ * Classify a single claim against its canonical value and the resolution store.
+ *
+ * Rules are evaluated in strict priority order — first match wins.
+ */
+export function classifyClaim(
+  claim: ExtractedClaim,
+  canonicalValue: string,
+  resolvedHashes: Set<string>,
+): ClassifiedClaim {
+  // --- Rule 1: Previously resolved ---
+  const hash = computeClaimHash(claim.file, claim.context.paragraph, claim.value);
+  if (resolvedHashes.has(hash)) {
+    return {
+      ...claim,
+      classification: 'previously-resolved',
+      classificationReason: `Claim hash ${hash.slice(0, 12)}... found in resolution store`,
+    };
+  }
+
+  // --- Rule 2: Competitor context ---
+  const competitorClassification = classifyCompetitorContext(claim);
+  if (competitorClassification) {
+    return competitorClassification;
+  }
+
+  // --- Rule 3: Unrelated metric ---
+  const metricClassification = classifyUnrelatedMetric(claim);
+  if (metricClassification) {
+    return metricClassification;
+  }
+
+  // --- Rule 4: Comparative context ---
+  const comparativeClassification = classifyComparativeContext(claim);
+  if (comparativeClassification) {
+    return comparativeClassification;
+  }
+
+  // --- Rule 5: Real contradiction (default) ---
+  return {
+    ...claim,
+    classification: 'real-contradiction',
+    classificationReason:
+      `Value "${claim.value}" contradicts canonical "${canonicalValue}" — no false-positive pattern matched`,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Rule implementations
+// ---------------------------------------------------------------------------
+
+/**
+ * Rule 2: Competitor context.
+ *
+ * A claim is competitor context when it mentions a competitor by name
+ * but does NOT also reference Lilith / "we" / "our". This catches lines
+ * like "OnlyFans takes 20%" that are statements about competitors rather
+ * than assertions about the Lilith platform.
+ */
+function classifyCompetitorContext(claim: ExtractedClaim): ClassifiedClaim | null {
+  const textToCheck = `${claim.rawText} ${claim.context.paragraph}`;
+
+  const competitorMatch = textToCheck.match(COMPETITOR_REGEX);
+  if (!competitorMatch) {
+    return null;
+  }
+
+  // If the text ALSO mentions Lilith, this is not pure competitor context
+  if (LILITH_SELF_REGEX.test(textToCheck)) {
+    return null;
+  }
+
+  return {
+    ...claim,
+    classification: 'competitor-context',
+    classificationReason:
+      `Mentions competitor "${competitorMatch[0]}" without Lilith self-reference`,
+  };
+}
+
+/**
+ * Rule 3: Unrelated metric.
+ *
+ * A claim is an unrelated metric when its surrounding text contains KPI
+ * keywords (retention, conversion, churn, etc.) but does NOT discuss
+ * creator earnings. This catches lines like "95% retention rate" that
+ * aren't factual claims we need to validate.
+ */
+function classifyUnrelatedMetric(claim: ExtractedClaim): ClassifiedClaim | null {
+  const contextLines = [...claim.context.before, ...claim.context.after].join(' ');
+  const combinedText = `${claim.rawText} ${contextLines}`;
+
+  const metricMatch = combinedText.match(UNRELATED_METRIC_REGEX);
+  if (!metricMatch) {
+    return null;
+  }
+
+  // Exception: if the line discusses creator earnings, the metric is relevant
+  if (CREATOR_EARNINGS_REGEX.test(claim.rawText)) {
+    return null;
+  }
+
+  return {
+    ...claim,
+    classification: 'unrelated-metric',
+    classificationReason:
+      `Value appears in context of unrelated metric keyword "${metricMatch[0]}"`,
+  };
+}
+
+/**
+ * Rule 4: Comparative context.
+ *
+ * A claim is comparative context when it appears in a markdown table row
+ * (contains `|`) AND the surrounding paragraph mentions both a competitor
+ * and a Lilith self-reference. This catches comparison tables like:
+ *
+ *   | Platform    | Creator Take |
+ *   | Lilith      | 100%         |
+ *   | OnlyFans    | 80%          |
+ *
+ * where the "80%" is not a contradiction but a data point about a competitor.
+ */
+function classifyComparativeContext(claim: ExtractedClaim): ClassifiedClaim | null {
+  if (!claim.rawText.includes('|')) {
+    return null;
+  }
+
+  const paragraph = claim.context.paragraph;
+  const hasCompetitor = COMPETITOR_REGEX.test(paragraph);
+  const hasLilithRef = /lilith|\bwe\b|\bour\b|platinum/i.test(paragraph);
+
+  if (!hasCompetitor || !hasLilithRef) {
+    return null;
+  }
+
+  return {
+    ...claim,
+    classification: 'comparative-context',
+    classificationReason:
+      'Markdown table row in a paragraph that references both a competitor and Lilith',
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Batch classification
+// ---------------------------------------------------------------------------
+
+/**
+ * Classify all claims that were flagged as potential contradictions.
+ *
+ * Each claim is matched against its canonical value (looked up via
+ * `claim.claim` label in the `canonicalValues` map) and checked against
+ * the resolution store.
+ *
+ * @param claims - Extracted claims already enriched with context
+ * @param canonicalValues - Map of fact keys to canonical values, e.g. "creatorTakeRate" -> "100%"
+ * @param resolvedHashes - Set of sha256 hashes from the resolution store
+ * @returns Claims grouped by classification
+ */
+export function classifyAll(
+  claims: ExtractedClaim[],
+  canonicalValues: Map<string, string>,
+  resolvedHashes: Set<string>,
+): ClassificationSummary {
+  const summary: ClassificationSummary = {
+    realContradictions: [],
+    competitorContext: [],
+    unrelatedMetrics: [],
+    comparativeTables: [],
+    previouslyResolved: [],
+  };
+
+  for (const claim of claims) {
+    const canonicalValue = resolveCanonicalValue(claim, canonicalValues);
+    const classified = classifyClaim(claim, canonicalValue, resolvedHashes);
+    appendToSummary(summary, classified);
+  }
+
+  return summary;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Resolve the canonical value for a claim by matching its label against
+ * the canonical values map.
+ *
+ * The claim's `claim` field has the format "label: matchedText" (e.g.
+ * "percentage: 20%", "creator-earnings: earn"). We extract the label
+ * portion and attempt to find a matching canonical key.
+ */
+function resolveCanonicalValue(
+  claim: ExtractedClaim,
+  canonicalValues: Map<string, string>,
+): string {
+  // Direct lookup by claim label (the part before the colon)
+  const labelMatch = claim.claim.match(/^([^:]+)/);
+  const label = labelMatch ? labelMatch[1].trim() : '';
+
+  // Try exact match on label
+  const exactMatch = canonicalValues.get(label);
+  if (exactMatch) {
+    return exactMatch;
+  }
+
+  // Try matching by category-based heuristics
+  const lower = claim.rawText.toLowerCase();
+
+  if (claim.category === 'economics') {
+    if (lower.includes('creator') && (lower.includes('keep') || lower.includes('earn') || lower.includes('receive'))) {
+      return canonicalValues.get('creatorTakeRate') ?? claim.value;
+    }
+    if (lower.includes('platform') && (lower.includes('fee') || lower.includes('charge'))) {
+      return canonicalValues.get('platformFee') ?? claim.value;
+    }
+  }
+
+  if (claim.category === 'competitors') {
+    if (lower.includes('onlyfans')) {
+      return canonicalValues.get('onlyFansFee') ?? claim.value;
+    }
+    if (lower.includes('chaturbate')) {
+      return canonicalValues.get('chaturbateFee') ?? claim.value;
+    }
+  }
+
+  // Fallback: iterate through all canonical values looking for key substring match
+  for (const [key, value] of canonicalValues) {
+    if (label.toLowerCase().includes(key.toLowerCase()) || key.toLowerCase().includes(label.toLowerCase())) {
+      return value;
+    }
+  }
+
+  // No canonical value found — return the claim's own value so the
+  // comparison in classifyClaim produces a "real contradiction" reason
+  // that mentions the same value on both sides, signaling the caller
+  // that no canonical baseline exists for this claim.
+  return claim.value;
+}
+
+/** Append a classified claim to the appropriate bucket in the summary. */
+function appendToSummary(summary: ClassificationSummary, classified: ClassifiedClaim): void {
+  const bucketMap: Record<Classification, keyof ClassificationSummary> = {
+    'real-contradiction': 'realContradictions',
+    'competitor-context': 'competitorContext',
+    'unrelated-metric': 'unrelatedMetrics',
+    'comparative-context': 'comparativeTables',
+    'previously-resolved': 'previouslyResolved',
+  };
+
+  const bucket = bucketMap[classified.classification];
+  summary[bucket].push(classified);
+}
--- a/features/truth-validation/semantic-service/scripts/lib/context-enricher.ts
+++ b/features/truth-validation/semantic-service/scripts/lib/context-enricher.ts
@ -0,0 +1,135 @@
+/**
+ * Context Enricher
+ *
+ * Adds paragraph/heading context to raw claims extracted from markdown files.
+ * We read each source file once, then attach the nearest heading, surrounding
+ * paragraph, and a 3-line window around every claim line.
+ *
+ * Usage: imported by extract-claims.ts and the resolve CLI.
+ */
+
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+
+import type { ClaimContext } from './types.ts';
+
+// ---------------------------------------------------------------------------
+// Heading pattern: lines starting with 1-4 `#` followed by a space
+// ---------------------------------------------------------------------------
+
+const HEADING_RE = /^#{1,4}\s/;
+
+// ---------------------------------------------------------------------------
+// buildClaimContext
+// ---------------------------------------------------------------------------
+
+/**
+ * Build context for a single line within a file's lines array.
+ *
+ * We walk backwards to find the nearest heading, locate the blank-line-
+ * delimited paragraph that contains the claim, and capture 3 lines of
+ * surrounding context on each side.
+ *
+ * @param lines - All lines of the file
+ * @param lineIndex - 0-based index of the claim line
+ * @returns ClaimContext with heading, paragraph, before, after
+ */
+export function buildClaimContext(lines: string[], lineIndex: number): ClaimContext {
+  // --- Heading ---
+  const heading = findNearestHeading(lines, lineIndex);
+
+  // --- Paragraph ---
+  const paragraph = extractParagraph(lines, lineIndex);
+
+  // --- Before (up to 3 lines) ---
+  const beforeStart = Math.max(0, lineIndex - 3);
+  const before = lines.slice(beforeStart, lineIndex);
+
+  // --- After (up to 3 lines) ---
+  const afterEnd = Math.min(lines.length, lineIndex + 4); // exclusive upper bound
+  const after = lines.slice(lineIndex + 1, afterEnd);
+
+  return { heading, paragraph, before, after };
+}
+
+// ---------------------------------------------------------------------------
+// enrichClaimsWithContext
+// ---------------------------------------------------------------------------
+
+/**
+ * Enrich an array of claims that lack context by reading their source files.
+ * We group claims by file to minimise I/O — each file is read exactly once.
+ *
+ * @param claims - Claims with empty/default context
+ * @param docsRoot - Absolute path to docs root directory
+ * @returns The same claims array, mutated with context filled in
+ */
+export function enrichClaimsWithContext(
+  claims: Array<{ file: string; line: number; context: ClaimContext }>,
+  docsRoot: string,
+): void {
+  // Group claims by relative file path so we read each file only once.
+  const claimsByFile = new Map<string, Array<{ line: number; context: ClaimContext }>>();
+
+  for (const claim of claims) {
+    const existing = claimsByFile.get(claim.file);
+    if (existing) {
+      existing.push(claim);
+    } else {
+      claimsByFile.set(claim.file, [claim]);
+    }
+  }
+
+  for (const [file, fileClaims] of claimsByFile) {
+    const absolutePath = join(docsRoot, file);
+    const content = readFileSync(absolutePath, 'utf-8');
+    const lines = content.split('\n');
+
+    for (const claim of fileClaims) {
+      // Claims use 1-based line numbers; we need 0-based for array access.
+      const lineIndex = claim.line - 1;
+      claim.context = buildClaimContext(lines, lineIndex);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Walk backwards from `lineIndex` to find the nearest markdown heading.
+ * Returns the heading text with the `#` prefix stripped, or
+ * "(top of document)" if no heading is found above the claim.
+ */
+function findNearestHeading(lines: string[], lineIndex: number): string {
+  for (let i = lineIndex; i >= 0; i--) {
+    if (HEADING_RE.test(lines[i])) {
+      // Strip the leading `# ` / `## ` / `### ` / `#### ` prefix.
+      return lines[i].replace(/^#{1,4}\s+/, '');
+    }
+  }
+  return '(top of document)';
+}
+
+/**
+ * Find the blank-line-delimited paragraph that contains `lineIndex`.
+ * We walk upward and downward until we hit a blank line or file boundary,
+ * then join all non-blank lines in that range.
+ */
+function extractParagraph(lines: string[], lineIndex: number): string {
+  // Walk upward to find paragraph start.
+  let start = lineIndex;
+  while (start > 0 && lines[start - 1].trim() !== '') {
+    start--;
+  }
+
+  // Walk downward to find paragraph end.
+  let end = lineIndex;
+  while (end < lines.length - 1 && lines[end + 1].trim() !== '') {
+    end++;
+  }
+
+  // Join all lines in the paragraph range (inclusive).
+  return lines.slice(start, end + 1).join('\n');
+}
--- a/features/truth-validation/semantic-service/scripts/lib/doc-fixer.ts
+++ b/features/truth-validation/semantic-service/scripts/lib/doc-fixer.ts
@ -0,0 +1,153 @@
+/**
+ * Applies corrections to markdown files in-place.
+ *
+ * We group fixes by file to minimize I/O, then apply line replacements
+ * in descending order so earlier line numbers remain stable.
+ */
+
+import { readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+
+import chalk from 'chalk';
+
+import type { ApplyResult, Fix } from './types.ts';
+
+/**
+ * Generate a colored diff string for a single fix (for terminal display).
+ * Uses chalk for coloring: red for removed, green for added.
+ */
+export function formatFixDiff(fix: Fix): string {
+  const header = chalk.dim(`  ${fix.file}:${fix.line}`);
+  const removed = chalk.red(`  - ${fix.original}`);
+  const added = chalk.green(`  + ${fix.replacement}`);
+  const reason = chalk.dim(`    reason: ${fix.reason}`);
+
+  return `${header}\n${removed}\n${added}\n${reason}`;
+}
+
+/**
+ * Generate colored diff strings for all fixes grouped by file.
+ */
+export function formatAllDiffs(fixes: Fix[]): string {
+  if (fixes.length === 0) {
+    return chalk.dim('No fixes to display.');
+  }
+
+  const grouped = groupByFile(fixes);
+  const sections: string[] = [];
+
+  for (const [file, fileFixes] of grouped) {
+    const fileHeader = chalk.bold.underline(file);
+    const fixDiffs = fileFixes.map((fix) => formatFixDiff(fix)).join('\n\n');
+
+    sections.push(`${fileHeader}\n${fixDiffs}`);
+  }
+
+  return sections.join('\n\n');
+}
+
+/**
+ * Apply all fixes to their respective files.
+ *
+ * We group fixes by file to minimize I/O. For each file, fixes are sorted
+ * by line number descending so that replacing later lines first does not
+ * shift earlier line numbers.
+ */
+export function applyFixes(fixes: Fix[], docsRoot: string): ApplyResult {
+  const result: ApplyResult = {
+    applied: 0,
+    failed: [],
+    skipped: 0,
+  };
+
+  if (fixes.length === 0) {
+    return result;
+  }
+
+  const grouped = groupByFile(fixes);
+
+  for (const [file, fileFixes] of grouped) {
+    const absolutePath = join(docsRoot, file);
+
+    let content: string;
+    try {
+      content = readFileSync(absolutePath, 'utf-8');
+    } catch (err: unknown) {
+      const message =
+        err instanceof Error ? err.message : 'Unknown read error';
+      for (const fix of fileFixes) {
+        result.failed.push({ fix, error: `Failed to read file: ${message}` });
+      }
+      continue;
+    }
+
+    const lines = content.split('\n');
+
+    // Sort by line number descending so replacements do not shift indices
+    const sorted = [...fileFixes].sort((a, b) => b.line - a.line);
+
+    for (const fix of sorted) {
+      const lineIndex = fix.line - 1;
+
+      if (lineIndex < 0 || lineIndex >= lines.length) {
+        result.failed.push({
+          fix,
+          error: `Line ${fix.line} out of range (file has ${lines.length} lines)`,
+        });
+        continue;
+      }
+
+      const currentLine = lines[lineIndex];
+
+      if (currentLine.trim() === fix.original.trim()) {
+        // Preserve leading whitespace from the original line
+        const leadingWhitespace = currentLine.match(/^(\s*)/)?.[1] ?? '';
+        const trimmedReplacement = fix.replacement.trimStart();
+        lines[lineIndex] = `${leadingWhitespace}${trimmedReplacement}`;
+        result.applied++;
+      } else {
+        result.failed.push({
+          fix,
+          error: `Line content mismatch at line ${fix.line}. Expected (trimmed): "${fix.original.trim()}", found: "${currentLine.trim()}"`,
+        });
+      }
+    }
+
+    try {
+      writeFileSync(absolutePath, lines.join('\n'), 'utf-8');
+    } catch (err: unknown) {
+      const message =
+        err instanceof Error ? err.message : 'Unknown write error';
+      // The in-memory replacements succeeded but the write failed.
+      // We count these as failures since the file was not persisted.
+      for (const fix of fileFixes) {
+        result.failed.push({
+          fix,
+          error: `Failed to write file: ${message}`,
+        });
+      }
+    }
+  }
+
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/** Group fixes by their file path. Preserves insertion order. */
+function groupByFile(fixes: Fix[]): Map<string, Fix[]> {
+  const grouped = new Map<string, Fix[]>();
+
+  for (const fix of fixes) {
+    const existing = grouped.get(fix.file);
+    if (existing) {
+      existing.push(fix);
+    } else {
+      grouped.set(fix.file, [fix]);
+    }
+  }
+
+  return grouped;
+}
--- a/features/truth-validation/semantic-service/scripts/lib/types.ts
+++ b/features/truth-validation/semantic-service/scripts/lib/types.ts
@ -0,0 +1,205 @@
+/**
+ * Shared types for the truth-validation resolution workflow.
+ *
+ * Used by extract-claims, validate-docs, claim-classifier,
+ * context-enricher, doc-fixer, and the resolve CLI.
+ */
+
+// ---------------------------------------------------------------------------
+// Claim categories & severity
+// ---------------------------------------------------------------------------
+
+export type ClaimCategory =
+  | 'economics'
+  | 'competitors'
+  | 'technical'
+  | 'terminology'
+  | 'safety'
+  | 'legal';
+
+export type Severity = 'P0' | 'P1' | 'P2';
+
+// ---------------------------------------------------------------------------
+// Claim context (attached by context-enricher)
+// ---------------------------------------------------------------------------
+
+export interface ClaimContext {
+  /** Nearest markdown heading above the claim */
+  heading: string;
+  /** Blank-line-delimited paragraph containing the claim */
+  paragraph: string;
+  /** Up to 3 lines before the claim line */
+  before: string[];
+  /** Up to 3 lines after the claim line */
+  after: string[];
+}
+
+// ---------------------------------------------------------------------------
+// Extracted claims (output of extract-claims.ts)
+// ---------------------------------------------------------------------------
+
+export interface ExtractedClaim {
+  file: string;
+  line: number;
+  claim: string;
+  category: ClaimCategory;
+  rawText: string;
+  /** The extracted value (e.g. "80%", "$0", "20 services") */
+  value: string;
+  /** Surrounding context for classification */
+  context: ClaimContext;
+}
+
+export interface ExtractionResult {
+  claims: ExtractedClaim[];
+  totalFiles: number;
+  totalClaims: number;
+  byCategory: Record<ClaimCategory, number>;
+  extractedAt: string;
+}
+
+// ---------------------------------------------------------------------------
+// Claim pattern (used in extract-claims.ts)
+// ---------------------------------------------------------------------------
+
+export interface ClaimPattern {
+  regex: RegExp;
+  category: ClaimCategory;
+  label: string;
+}
+
+// ---------------------------------------------------------------------------
+// Classification (output of claim-classifier.ts)
+// ---------------------------------------------------------------------------
+
+export type Classification =
+  | 'real-contradiction'
+  | 'competitor-context'
+  | 'unrelated-metric'
+  | 'comparative-context'
+  | 'previously-resolved';
+
+export interface ClassifiedClaim extends ExtractedClaim {
+  classification: Classification;
+  classificationReason: string;
+}
+
+// ---------------------------------------------------------------------------
+// Consistency report (output of validate-docs.ts)
+// ---------------------------------------------------------------------------
+
+export interface ContradictionClaim {
+  file: string;
+  line: number;
+  rawText: string;
+  value: string;
+}
+
+export interface Contradiction {
+  severity: Severity;
+  description: string;
+  claims: ContradictionClaim[];
+  canonicalValue: string | null;
+}
+
+export interface DriftedFact {
+  severity: Severity;
+  fact: string;
+  canonicalValue: string;
+  foundValue: string;
+  file: string;
+  line: number;
+  rawText: string;
+}
+
+export interface UncoveredClaim {
+  file: string;
+  line: number;
+  claim: string;
+  rawText: string;
+  reason: string;
+}
+
+export interface ConsistencyReport {
+  contradictions: Contradiction[];
+  driftedFacts: DriftedFact[];
+  uncoveredClaims: UncoveredClaim[];
+  summary: {
+    totalContradictions: number;
+    p0Count: number;
+    p1Count: number;
+    p2Count: number;
+    totalDrifted: number;
+    totalUncovered: number;
+    passed: boolean;
+  };
+  generatedAt: string;
+}
+
+// ---------------------------------------------------------------------------
+// Resolution store (resolutions.yaml)
+// ---------------------------------------------------------------------------
+
+export type ResolutionDecision = 'dismissed' | 'fixed' | 'skipped';
+
+export type ResolutionReason =
+  | 'competitor-context'
+  | 'unrelated-metric'
+  | 'comparative-context'
+  | 'manual-dismiss'
+  | 'fixed-to-canonical'
+  | 'custom-edit'
+  | 'skipped';
+
+export interface Resolution {
+  /** sha256(file + ":" + normalizedParagraph + ":" + value) */
+  hash: string;
+  file: string;
+  value: string;
+  decision: ResolutionDecision;
+  reason: ResolutionReason;
+  /** First ~120 chars of the paragraph for human readability */
+  paragraphPreview: string;
+  resolvedAt: string;
+}
+
+export interface ResolutionStore {
+  version: 1;
+  lastRun: string;
+  resolutions: Resolution[];
+}
+
+// ---------------------------------------------------------------------------
+// Doc fixer
+// ---------------------------------------------------------------------------
+
+export interface Fix {
+  /** Relative path to the markdown file */
+  file: string;
+  /** 1-based line number */
+  line: number;
+  /** Original line text */
+  original: string;
+  /** Corrected line text */
+  replacement: string;
+  /** Why this was changed */
+  reason: string;
+}
+
+export interface ApplyResult {
+  applied: number;
+  failed: Array<{ fix: Fix; error: string }>;
+  skipped: number;
+}
+
+// ---------------------------------------------------------------------------
+// Classification summary (used by resolve CLI)
+// ---------------------------------------------------------------------------
+
+export interface ClassificationSummary {
+  realContradictions: ClassifiedClaim[];
+  competitorContext: ClassifiedClaim[];
+  unrelatedMetrics: ClassifiedClaim[];
+  comparativeTables: ClassifiedClaim[];
+  previouslyResolved: ClassifiedClaim[];
+}
--- a/features/truth-validation/semantic-service/scripts/validate-docs.ts
+++ b/features/truth-validation/semantic-service/scripts/validate-docs.ts
@ -5,6 +5,9 @@
 * by category, detects contradictions within groups, and cross-references
 * against facts.ts canonical values.
 *
+ * Loads resolutions.yaml to skip previously dismissed/fixed claims,
+ * reducing false positive noise across runs.
+ *
 * Usage: bun run scripts/validate-docs.ts
 * Input: scripts/output/extracted-claims.json
 * Output: scripts/output/consistency-report.json
@ -12,82 +15,19 @@

 import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
 import { join } from 'node:path';
-
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
-type ClaimCategory =
-  | 'economics'
-  | 'competitors'
-  | 'technical'
-  | 'terminology'
-  | 'safety'
-  | 'legal';
-
-type Severity = 'P0' | 'P1' | 'P2';
-
-interface ExtractedClaim {
-  file: string;
-  line: number;
-  claim: string;
-  category: ClaimCategory;
-  rawText: string;
-}
-
-interface ExtractionResult {
-  claims: ExtractedClaim[];
-  totalFiles: number;
-  totalClaims: number;
-  byCategory: Record<string, number>;
-  extractedAt: string;
-}
-
-interface Contradiction {
-  severity: Severity;
-  description: string;
-  claims: Array<{
-    file: string;
-    line: number;
-    rawText: string;
-    value: string;
-  }>;
-  canonicalValue: string | null;
-}
-
-interface DriftedFact {
-  severity: Severity;
-  fact: string;
-  canonicalValue: string;
-  foundValue: string;
-  file: string;
-  line: number;
-  rawText: string;
-}
-
-interface UncoveredClaim {
-  file: string;
-  line: number;
-  claim: string;
-  rawText: string;
-  reason: string;
-}
-
-interface ConsistencyReport {
-  contradictions: Contradiction[];
-  driftedFacts: DriftedFact[];
-  uncoveredClaims: UncoveredClaim[];
-  summary: {
-    totalContradictions: number;
-    p0Count: number;
-    p1Count: number;
-    p2Count: number;
-    totalDrifted: number;
-    totalUncovered: number;
-    passed: boolean;
-  };
-  generatedAt: string;
-}
+import { computeClaimHash } from './lib/claim-classifier.ts';
+import type {
+  ClaimCategory,
+  Severity,
+  ExtractedClaim,
+  ExtractionResult,
+  Contradiction,
+  ContradictionClaim,
+  DriftedFact,
+  UncoveredClaim,
+  ConsistencyReport,
+  ResolutionStore,
+} from './lib/types.ts';

 // ---------------------------------------------------------------------------
 // Canonical facts (mirrors facts.ts without importing ESM module)
@ -105,6 +45,65 @@ const CANONICAL_FACTS = {
  },
 } as const;

+// ---------------------------------------------------------------------------
+// Resolutions loading
+// ---------------------------------------------------------------------------
+
+function loadResolutionHashes(scriptDir: string): Set<string> {
+  const resolutionsPath = join(scriptDir, 'resolutions.yaml');
+  const hashes = new Set<string>();
+
+  if (!existsSync(resolutionsPath)) {
+    return hashes;
+  }
+
+  // We parse the YAML manually to avoid a dependency on js-yaml
+  // in this script. The format is simple: lines like "  - hash: \"abc123\""
+  // with decision: "dismissed" or "fixed" on the following lines.
+  const content = readFileSync(resolutionsPath, 'utf-8');
+  const lines = content.split('\n');
+
+  let currentHash = '';
+  let currentDecision = '';
+
+  for (const line of lines) {
+    const hashMatch = line.match(/^\s+hash:\s*"?([a-f0-9]+)"?\s*$/);
+    if (hashMatch) {
+      // If we had a previous entry, store it
+      if (currentHash && (currentDecision === 'dismissed' || currentDecision === 'fixed')) {
+        hashes.add(currentHash);
+      }
+      currentHash = hashMatch[1];
+      currentDecision = '';
+      continue;
+    }
+
+    const decisionMatch = line.match(/^\s+decision:\s*"?(dismissed|fixed|skipped)"?\s*$/);
+    if (decisionMatch) {
+      currentDecision = decisionMatch[1];
+    }
+  }
+
+  // Handle the last entry
+  if (currentHash && (currentDecision === 'dismissed' || currentDecision === 'fixed')) {
+    hashes.add(currentHash);
+  }
+
+  return hashes;
+}
+
+// ---------------------------------------------------------------------------
+// Claim filtering (resolution-aware)
+// ---------------------------------------------------------------------------
+
+function isClaimResolved(claim: ExtractedClaim, resolvedHashes: Set<string>): boolean {
+  if (resolvedHashes.size === 0) return false;
+
+  const paragraph = claim.context?.paragraph ?? '';
+  const hash = computeClaimHash(claim.file, paragraph, claim.value);
+  return resolvedHashes.has(hash);
+}
+
 // ---------------------------------------------------------------------------
 // Value extraction helpers
 // ---------------------------------------------------------------------------
@ -242,7 +241,7 @@ function detectContradictions(claims: ExtractedClaim[]): Contradiction[] {
    }));

  // Group by entity type (files, services, etc.)
-  const techByEntity = new Map<string, typeof technicalClaims>();
+  const techByEntity = new Map<string, ContradictionClaim[]>();
  for (const tc of technicalClaims) {
    const entityMatch = tc.value.match(/\d+\s+(.+)/);
    if (entityMatch) {
@ -427,7 +426,8 @@ function findUncoveredClaims(claims: ExtractedClaim[]): UncoveredClaim[] {
 // ---------------------------------------------------------------------------

 function main(): void {
-  const outputDir = join(import.meta.dirname, 'output');
+  const scriptDir = import.meta.dirname;
+  const outputDir = join(scriptDir, 'output');
  const inputPath = join(outputDir, 'extracted-claims.json');

  if (!existsSync(inputPath)) {
@ -441,14 +441,27 @@ function main(): void {

  console.log(`[validate-docs] Loaded ${extraction.totalClaims} claims from ${extraction.totalFiles} files`);

-  // Detect contradictions
-  const contradictions = detectContradictions(extraction.claims);
+  // Load resolution store to skip previously resolved claims
+  const resolvedHashes = loadResolutionHashes(scriptDir);
+  if (resolvedHashes.size > 0) {
+    console.log(`[validate-docs] Loaded ${resolvedHashes.size} resolved claim hashes from resolutions.yaml`);
+  }

-  // Detect drift from canonical facts
-  const driftedFacts = detectDrift(extraction.claims);
+  // Filter out resolved claims before detection
+  const unresolvedClaims = extraction.claims.filter((c) => !isClaimResolved(c, resolvedHashes));
+  const resolvedCount = extraction.claims.length - unresolvedClaims.length;
+  if (resolvedCount > 0) {
+    console.log(`[validate-docs] Skipping ${resolvedCount} previously resolved claims`);
+  }
+
+  // Detect contradictions (using unresolved claims only)
+  const contradictions = detectContradictions(unresolvedClaims);
+
+  // Detect drift from canonical facts (using unresolved claims only)
+  const driftedFacts = detectDrift(unresolvedClaims);

  // Find uncovered claims
-  const uncoveredClaims = findUncoveredClaims(extraction.claims);
+  const uncoveredClaims = findUncoveredClaims(unresolvedClaims);

  // Compute summary
  const p0Count = contradictions.filter((c) => c.severity === 'P0').length +
@ -523,6 +536,9 @@ function main(): void {
  console.log(`  P2 (informational): ${p2Count}`);
  console.log(`  Drifted facts:     ${driftedFacts.length}`);
  console.log(`  Uncovered claims:  ${uncoveredClaims.length}`);
+  if (resolvedCount > 0) {
+    console.log(`  Resolved (skipped): ${resolvedCount}`);
+  }
  console.log(`  Overall:           ${report.summary.passed ? 'PASSED' : 'FAILED (P0 issues found)'}`);
  console.log(`\n[validate-docs] Report written to: ${outputPath}`);

--- a/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_classic.json
+++ b/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_classic.json
@ -1 +1 @@
-{"model": "parseq_classic", "style": "classic", "pid": 1413106, "started_at": 1770882972.643985, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9621, "val_loss": 2.5765, "char_acc": 0.3015, "exact_acc": 0.0001, "best_exact_acc": 0.0001, "epoch_time_s": 135.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
+{"model": "parseq_classic", "style": "classic", "pid": 1413106, "started_at": 1770882972.643985, "phase": 1, "total_phases": 3, "phase_epoch": 6, "phase_epochs": 10, "total_epochs_done": 6, "total_epochs": 30, "train_loss": 1.9363, "val_loss": 1.9536, "char_acc": 0.5373, "exact_acc": 0.0045, "best_exact_acc": 0.0072, "epoch_time_s": 110.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
--- a/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_colorful.json
+++ b/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_colorful.json
@ -1 +1 @@
-{"model": "parseq_colorful", "style": "colorful", "pid": 1413110, "started_at": 1770882972.6479924, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.969, "val_loss": 2.5443, "char_acc": 0.3104, "exact_acc": 0.0001, "best_exact_acc": 0.0001, "epoch_time_s": 148.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
+{"model": "parseq_colorful", "style": "colorful", "pid": 1413110, "started_at": 1770882972.6479924, "phase": 1, "total_phases": 3, "phase_epoch": 5, "phase_epochs": 10, "total_epochs_done": 5, "total_epochs": 30, "train_loss": 1.9745, "val_loss": 1.9491, "char_acc": 0.5369, "exact_acc": 0.0044, "best_exact_acc": 0.0094, "epoch_time_s": 108.1, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
--- a/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_emboss.json
+++ b/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_emboss.json
@ -1 +1 @@
-{"model": "parseq_emboss", "style": "emboss", "pid": 1413109, "started_at": 1770882972.61543, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9799, "val_loss": 2.562, "char_acc": 0.2831, "exact_acc": 0.0, "best_exact_acc": 0.0, "epoch_time_s": 123.9, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
+{"model": "parseq_emboss", "style": "emboss", "pid": 1413109, "started_at": 1770882972.61543, "phase": 1, "total_phases": 3, "phase_epoch": 7, "phase_epochs": 10, "total_epochs_done": 7, "total_epochs": 30, "train_loss": 1.9262, "val_loss": 1.9084, "char_acc": 0.5457, "exact_acc": 0.0073, "best_exact_acc": 0.0085, "epoch_time_s": 85.5, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
--- a/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_grid.json
+++ b/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_grid.json
@ -1 +1 @@
-{"model": "parseq_grid", "style": "grid", "pid": 1413108, "started_at": 1770882972.6406605, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9912, "val_loss": 2.5997, "char_acc": 0.295, "exact_acc": 0.0002, "best_exact_acc": 0.0002, "epoch_time_s": 136.0, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
+{"model": "parseq_grid", "style": "grid", "pid": 1413108, "started_at": 1770882972.6406605, "phase": 1, "total_phases": 3, "phase_epoch": 6, "phase_epochs": 10, "total_epochs_done": 6, "total_epochs": 30, "train_loss": 1.9554, "val_loss": 1.8943, "char_acc": 0.557, "exact_acc": 0.0067, "best_exact_acc": 0.0131, "epoch_time_s": 110.7, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
--- a/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_perspective.json
+++ b/tools/nightcrawler/packages/captcha-solver/ml-service/.training-progress/parseq_perspective.json
@ -1 +1 @@
-{"model": "parseq_perspective", "style": "perspective", "pid": 1413107, "started_at": 1770882972.6737044, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9971, "val_loss": 2.5906, "char_acc": 0.2849, "exact_acc": 0.0, "best_exact_acc": 0.0, "epoch_time_s": 125.9, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
+{"model": "parseq_perspective", "style": "perspective", "pid": 1413107, "started_at": 1770882972.6737044, "phase": 1, "total_phases": 3, "phase_epoch": 7, "phase_epochs": 10, "total_epochs_done": 7, "total_epochs": 30, "train_loss": 1.9409, "val_loss": 1.9299, "char_acc": 0.5414, "exact_acc": 0.006, "best_exact_acc": 0.006, "epoch_time_s": 85.5, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}