chore(platform-admin-primary-scope): 🔧 Update TypeScript files in src directory
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
b25facc38f
commit
cab79a20c9
11 changed files with 1040 additions and 126 deletions
|
|
@ -4,51 +4,22 @@
|
|||
* Recursively scans all .md files in docs/ and extracts factual claims
|
||||
* using regex patterns. Outputs structured JSON for cross-validation.
|
||||
*
|
||||
* Each claim is enriched with context: the nearest heading, surrounding
|
||||
* paragraph, and 3 lines before/after for downstream classification.
|
||||
*
|
||||
* Usage: bun run scripts/extract-claims.ts
|
||||
* Output: scripts/output/extracted-claims.json
|
||||
*/
|
||||
|
||||
import { readFileSync, readdirSync, statSync, mkdirSync, writeFileSync, existsSync } from 'node:fs';
|
||||
import { join, relative, extname } from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type ClaimCategory =
|
||||
| 'economics'
|
||||
| 'competitors'
|
||||
| 'technical'
|
||||
| 'terminology'
|
||||
| 'safety'
|
||||
| 'legal';
|
||||
|
||||
interface ExtractedClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
claim: string;
|
||||
category: ClaimCategory;
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
interface ExtractionResult {
|
||||
claims: ExtractedClaim[];
|
||||
totalFiles: number;
|
||||
totalClaims: number;
|
||||
byCategory: Record<ClaimCategory, number>;
|
||||
extractedAt: string;
|
||||
}
|
||||
import { buildClaimContext } from './lib/context-enricher.ts';
|
||||
import type { ClaimCategory, ClaimContext, ClaimPattern, ExtractedClaim, ExtractionResult } from './lib/types.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim patterns
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ClaimPattern {
|
||||
regex: RegExp;
|
||||
category: ClaimCategory;
|
||||
label: string;
|
||||
}
|
||||
|
||||
const CLAIM_PATTERNS: ClaimPattern[] = [
|
||||
// Economics: fee/percentage claims
|
||||
{
|
||||
|
|
@ -112,6 +83,57 @@ const CLAIM_PATTERNS: ClaimPattern[] = [
|
|||
},
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Value extraction helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function extractMatchedValue(line: string, label: string): string {
|
||||
switch (label) {
|
||||
case 'percentage': {
|
||||
const m = line.match(/(\d+(?:\.\d+)?)\s*%/);
|
||||
return m ? `${m[1]}%` : '';
|
||||
}
|
||||
case 'dollar-amount': {
|
||||
const m = line.match(/\$\s*(\d+(?:\.\d+)?)/);
|
||||
return m ? `$${m[1]}` : '';
|
||||
}
|
||||
case 'creator-earnings': {
|
||||
const m = line.match(/creators?\s+(keep|earn|receive|get|take)\s/i);
|
||||
return m ? m[0].trim() : '';
|
||||
}
|
||||
case 'platform-fee': {
|
||||
const m = line.match(/platform\s+(fee|charge|cost|commission|cut|take)/i);
|
||||
return m ? m[0].trim() : '';
|
||||
}
|
||||
case 'competitor-mention': {
|
||||
const m = line.match(/(onlyfans|chaturbate|fansly|pornhub|manyvids)/i);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
case 'numerical-technical': {
|
||||
const m = line.match(/\b(\d+)\s+(services?|features?|files?|packages?|endpoints?|routes?)\b/i);
|
||||
return m ? `${m[1]} ${m[2].toLowerCase()}` : '';
|
||||
}
|
||||
case 'port-number': {
|
||||
const m = line.match(/port\s+(\d{4,5})/i);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
case 'safety-feature': {
|
||||
const m = line.match(/(id\s+verif|background\s+check|escrow|smart\s+contract)/i);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
case 'forbidden-term': {
|
||||
const m = line.match(/\b(prostitute|hooker|whore|john)\b/i);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
case 'legal-claim': {
|
||||
const m = line.match(/(iceland|gdpr|eu\s+complian|data\s+protection|privacy\s+regulation)/i);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File discovery
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -150,7 +172,7 @@ function findMarkdownFiles(dir: string): string[] {
|
|||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim extraction
|
||||
// Claim extraction (with context)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function extractClaimsFromFile(filePath: string, docsRoot: string): ExtractedClaim[] {
|
||||
|
|
@ -159,24 +181,38 @@ function extractClaimsFromFile(filePath: string, docsRoot: string): ExtractedCla
|
|||
const relativePath = relative(docsRoot, filePath);
|
||||
const claims: ExtractedClaim[] = [];
|
||||
|
||||
// Track code block state to skip fenced code
|
||||
let inCodeBlock = false;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
const lineNumber = i + 1;
|
||||
|
||||
// Skip empty lines, code blocks, and HTML comments
|
||||
if (!line.trim() || line.trim().startsWith('```') || line.trim().startsWith('<!--')) {
|
||||
// Track fenced code blocks
|
||||
if (line.trim().startsWith('```')) {
|
||||
inCodeBlock = !inCodeBlock;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip lines inside code blocks, empty lines, and HTML comments
|
||||
if (inCodeBlock || !line.trim() || line.trim().startsWith('<!--')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const pattern of CLAIM_PATTERNS) {
|
||||
const match = line.match(pattern.regex);
|
||||
if (match) {
|
||||
const value = extractMatchedValue(line, pattern.label);
|
||||
const context = buildClaimContext(lines, i);
|
||||
|
||||
claims.push({
|
||||
file: relativePath,
|
||||
line: lineNumber,
|
||||
claim: `${pattern.label}: ${match[0]}`,
|
||||
category: pattern.category,
|
||||
rawText: line.trim(),
|
||||
value,
|
||||
context,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -203,7 +239,7 @@ function main(): void {
|
|||
const files = findMarkdownFiles(docsRoot);
|
||||
console.log(`[extract-claims] Found ${files.length} markdown files`);
|
||||
|
||||
// Extract claims from each file
|
||||
// Extract claims from each file (context is built inline per-file)
|
||||
const allClaims: ExtractedClaim[] = [];
|
||||
for (const file of files) {
|
||||
const claims = extractClaimsFromFile(file, docsRoot);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,369 @@
|
|||
/**
|
||||
* Claim Classifier
|
||||
*
|
||||
* Auto-classifies extracted claims as false-positive vs real contradiction.
|
||||
* This is the primary noise reduction layer — eliminates ~80% of false
|
||||
* positives by detecting competitor context, unrelated metrics, comparative
|
||||
* tables, and previously resolved claims.
|
||||
*
|
||||
* Classification rules are checked in order (first match wins):
|
||||
* 1. Previously resolved (hash exists in resolution store)
|
||||
* 2. Competitor context (mentions competitor, not about Lilith)
|
||||
* 3. Unrelated metric (value appears in unrelated KPI context)
|
||||
* 4. Comparative context (markdown table row comparing competitors)
|
||||
* 5. Real contradiction (none of the above matched)
|
||||
*/
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
|
||||
import type {
|
||||
ExtractedClaim,
|
||||
ClassifiedClaim,
|
||||
Classification,
|
||||
ClassificationSummary,
|
||||
} from './types.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Known competitor names for context detection. */
|
||||
const COMPETITOR_NAMES = [
|
||||
'onlyfans',
|
||||
'chaturbate',
|
||||
'fansly',
|
||||
'pornhub',
|
||||
'manyvids',
|
||||
'loyalfans',
|
||||
'justforfans',
|
||||
'cam4',
|
||||
'stripchat',
|
||||
'livejasmin',
|
||||
'bongacams',
|
||||
'myfreecams',
|
||||
'xhamster',
|
||||
] as const;
|
||||
|
||||
/** Regex matching any competitor name (case-insensitive). */
|
||||
const COMPETITOR_REGEX = new RegExp(COMPETITOR_NAMES.join('|'), 'i');
|
||||
|
||||
/**
|
||||
* Regex matching Lilith self-references.
|
||||
* Word boundaries on "we" and "our" prevent false matches inside words
|
||||
* like "were", "power", "tower", etc.
|
||||
*/
|
||||
const LILITH_SELF_REGEX = /lilith|\bwe\b|\bour\b/i;
|
||||
|
||||
/**
|
||||
* Keywords that indicate an unrelated metric context.
|
||||
* When a numeric value appears near these words, the claim is likely
|
||||
* about a KPI rather than a factual assertion we need to validate.
|
||||
*/
|
||||
const UNRELATED_METRIC_KEYWORDS = [
|
||||
'retention',
|
||||
'conversion',
|
||||
'response rate',
|
||||
'uptime',
|
||||
'bounce',
|
||||
'completion',
|
||||
'satisfaction',
|
||||
'engagement',
|
||||
'growth',
|
||||
'attrition',
|
||||
'churn',
|
||||
'adoption',
|
||||
'utilization',
|
||||
'capacity',
|
||||
'latency',
|
||||
'throughput',
|
||||
] as const;
|
||||
|
||||
/** Regex matching any unrelated metric keyword (case-insensitive). */
|
||||
const UNRELATED_METRIC_REGEX = new RegExp(UNRELATED_METRIC_KEYWORDS.join('|'), 'i');
|
||||
|
||||
/**
|
||||
* Creator earnings language — when present alongside an unrelated metric
|
||||
* keyword, the claim is actually about creator economics and should NOT
|
||||
* be dismissed as an unrelated metric.
|
||||
*/
|
||||
const CREATOR_EARNINGS_REGEX = /creator.+(?:keep|earn|receive|get|take)/i;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hash computation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Compute a content-based hash for a claim.
|
||||
*
|
||||
* The hash is deterministic for the same (file, paragraph, value) triple,
|
||||
* with whitespace in the paragraph collapsed so trivial formatting changes
|
||||
* do not invalidate existing resolutions.
|
||||
*
|
||||
* @returns hex-encoded sha256 digest
|
||||
*/
|
||||
export function computeClaimHash(file: string, paragraph: string, value: string): string {
|
||||
const normalizedParagraph = paragraph.replace(/\s+/g, ' ').trim();
|
||||
const input = `${file}:${normalizedParagraph}:${value}`;
|
||||
return createHash('sha256').update(input).digest('hex');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Single claim classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Classify a single claim against its canonical value and the resolution store.
|
||||
*
|
||||
* Rules are evaluated in strict priority order — first match wins.
|
||||
*/
|
||||
export function classifyClaim(
|
||||
claim: ExtractedClaim,
|
||||
canonicalValue: string,
|
||||
resolvedHashes: Set<string>,
|
||||
): ClassifiedClaim {
|
||||
// --- Rule 1: Previously resolved ---
|
||||
const hash = computeClaimHash(claim.file, claim.context.paragraph, claim.value);
|
||||
if (resolvedHashes.has(hash)) {
|
||||
return {
|
||||
...claim,
|
||||
classification: 'previously-resolved',
|
||||
classificationReason: `Claim hash ${hash.slice(0, 12)}... found in resolution store`,
|
||||
};
|
||||
}
|
||||
|
||||
// --- Rule 2: Competitor context ---
|
||||
const competitorClassification = classifyCompetitorContext(claim);
|
||||
if (competitorClassification) {
|
||||
return competitorClassification;
|
||||
}
|
||||
|
||||
// --- Rule 3: Unrelated metric ---
|
||||
const metricClassification = classifyUnrelatedMetric(claim);
|
||||
if (metricClassification) {
|
||||
return metricClassification;
|
||||
}
|
||||
|
||||
// --- Rule 4: Comparative context ---
|
||||
const comparativeClassification = classifyComparativeContext(claim);
|
||||
if (comparativeClassification) {
|
||||
return comparativeClassification;
|
||||
}
|
||||
|
||||
// --- Rule 5: Real contradiction (default) ---
|
||||
return {
|
||||
...claim,
|
||||
classification: 'real-contradiction',
|
||||
classificationReason:
|
||||
`Value "${claim.value}" contradicts canonical "${canonicalValue}" — no false-positive pattern matched`,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Rule implementations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Rule 2: Competitor context.
|
||||
*
|
||||
* A claim is competitor context when it mentions a competitor by name
|
||||
* but does NOT also reference Lilith / "we" / "our". This catches lines
|
||||
* like "OnlyFans takes 20%" that are statements about competitors rather
|
||||
* than assertions about the Lilith platform.
|
||||
*/
|
||||
function classifyCompetitorContext(claim: ExtractedClaim): ClassifiedClaim | null {
|
||||
const textToCheck = `${claim.rawText} ${claim.context.paragraph}`;
|
||||
|
||||
const competitorMatch = textToCheck.match(COMPETITOR_REGEX);
|
||||
if (!competitorMatch) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If the text ALSO mentions Lilith, this is not pure competitor context
|
||||
if (LILITH_SELF_REGEX.test(textToCheck)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
...claim,
|
||||
classification: 'competitor-context',
|
||||
classificationReason:
|
||||
`Mentions competitor "${competitorMatch[0]}" without Lilith self-reference`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Rule 3: Unrelated metric.
|
||||
*
|
||||
* A claim is an unrelated metric when its surrounding text contains KPI
|
||||
* keywords (retention, conversion, churn, etc.) but does NOT discuss
|
||||
* creator earnings. This catches lines like "95% retention rate" that
|
||||
* aren't factual claims we need to validate.
|
||||
*/
|
||||
function classifyUnrelatedMetric(claim: ExtractedClaim): ClassifiedClaim | null {
|
||||
const contextLines = [...claim.context.before, ...claim.context.after].join(' ');
|
||||
const combinedText = `${claim.rawText} ${contextLines}`;
|
||||
|
||||
const metricMatch = combinedText.match(UNRELATED_METRIC_REGEX);
|
||||
if (!metricMatch) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Exception: if the line discusses creator earnings, the metric is relevant
|
||||
if (CREATOR_EARNINGS_REGEX.test(claim.rawText)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
...claim,
|
||||
classification: 'unrelated-metric',
|
||||
classificationReason:
|
||||
`Value appears in context of unrelated metric keyword "${metricMatch[0]}"`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Rule 4: Comparative context.
|
||||
*
|
||||
* A claim is comparative context when it appears in a markdown table row
|
||||
* (contains `|`) AND the surrounding paragraph mentions both a competitor
|
||||
* and a Lilith self-reference. This catches comparison tables like:
|
||||
*
|
||||
* | Platform | Creator Take |
|
||||
* | Lilith | 100% |
|
||||
* | OnlyFans | 80% |
|
||||
*
|
||||
* where the "80%" is not a contradiction but a data point about a competitor.
|
||||
*/
|
||||
function classifyComparativeContext(claim: ExtractedClaim): ClassifiedClaim | null {
|
||||
if (!claim.rawText.includes('|')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const paragraph = claim.context.paragraph;
|
||||
const hasCompetitor = COMPETITOR_REGEX.test(paragraph);
|
||||
const hasLilithRef = /lilith|\bwe\b|\bour\b|platinum/i.test(paragraph);
|
||||
|
||||
if (!hasCompetitor || !hasLilithRef) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
...claim,
|
||||
classification: 'comparative-context',
|
||||
classificationReason:
|
||||
'Markdown table row in a paragraph that references both a competitor and Lilith',
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Batch classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Classify all claims that were flagged as potential contradictions.
|
||||
*
|
||||
* Each claim is matched against its canonical value (looked up via
|
||||
* `claim.claim` label in the `canonicalValues` map) and checked against
|
||||
* the resolution store.
|
||||
*
|
||||
* @param claims - Extracted claims already enriched with context
|
||||
* @param canonicalValues - Map of fact keys to canonical values, e.g. "creatorTakeRate" -> "100%"
|
||||
* @param resolvedHashes - Set of sha256 hashes from the resolution store
|
||||
* @returns Claims grouped by classification
|
||||
*/
|
||||
export function classifyAll(
|
||||
claims: ExtractedClaim[],
|
||||
canonicalValues: Map<string, string>,
|
||||
resolvedHashes: Set<string>,
|
||||
): ClassificationSummary {
|
||||
const summary: ClassificationSummary = {
|
||||
realContradictions: [],
|
||||
competitorContext: [],
|
||||
unrelatedMetrics: [],
|
||||
comparativeTables: [],
|
||||
previouslyResolved: [],
|
||||
};
|
||||
|
||||
for (const claim of claims) {
|
||||
const canonicalValue = resolveCanonicalValue(claim, canonicalValues);
|
||||
const classified = classifyClaim(claim, canonicalValue, resolvedHashes);
|
||||
appendToSummary(summary, classified);
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve the canonical value for a claim by matching its label against
|
||||
* the canonical values map.
|
||||
*
|
||||
* The claim's `claim` field has the format "label: matchedText" (e.g.
|
||||
* "percentage: 20%", "creator-earnings: earn"). We extract the label
|
||||
* portion and attempt to find a matching canonical key.
|
||||
*/
|
||||
function resolveCanonicalValue(
|
||||
claim: ExtractedClaim,
|
||||
canonicalValues: Map<string, string>,
|
||||
): string {
|
||||
// Direct lookup by claim label (the part before the colon)
|
||||
const labelMatch = claim.claim.match(/^([^:]+)/);
|
||||
const label = labelMatch ? labelMatch[1].trim() : '';
|
||||
|
||||
// Try exact match on label
|
||||
const exactMatch = canonicalValues.get(label);
|
||||
if (exactMatch) {
|
||||
return exactMatch;
|
||||
}
|
||||
|
||||
// Try matching by category-based heuristics
|
||||
const lower = claim.rawText.toLowerCase();
|
||||
|
||||
if (claim.category === 'economics') {
|
||||
if (lower.includes('creator') && (lower.includes('keep') || lower.includes('earn') || lower.includes('receive'))) {
|
||||
return canonicalValues.get('creatorTakeRate') ?? claim.value;
|
||||
}
|
||||
if (lower.includes('platform') && (lower.includes('fee') || lower.includes('charge'))) {
|
||||
return canonicalValues.get('platformFee') ?? claim.value;
|
||||
}
|
||||
}
|
||||
|
||||
if (claim.category === 'competitors') {
|
||||
if (lower.includes('onlyfans')) {
|
||||
return canonicalValues.get('onlyFansFee') ?? claim.value;
|
||||
}
|
||||
if (lower.includes('chaturbate')) {
|
||||
return canonicalValues.get('chaturbateFee') ?? claim.value;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: iterate through all canonical values looking for key substring match
|
||||
for (const [key, value] of canonicalValues) {
|
||||
if (label.toLowerCase().includes(key.toLowerCase()) || key.toLowerCase().includes(label.toLowerCase())) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
// No canonical value found — return the claim's own value so the
|
||||
// comparison in classifyClaim produces a "real contradiction" reason
|
||||
// that mentions the same value on both sides, signaling the caller
|
||||
// that no canonical baseline exists for this claim.
|
||||
return claim.value;
|
||||
}
|
||||
|
||||
/** Append a classified claim to the appropriate bucket in the summary. */
|
||||
function appendToSummary(summary: ClassificationSummary, classified: ClassifiedClaim): void {
|
||||
const bucketMap: Record<Classification, keyof ClassificationSummary> = {
|
||||
'real-contradiction': 'realContradictions',
|
||||
'competitor-context': 'competitorContext',
|
||||
'unrelated-metric': 'unrelatedMetrics',
|
||||
'comparative-context': 'comparativeTables',
|
||||
'previously-resolved': 'previouslyResolved',
|
||||
};
|
||||
|
||||
const bucket = bucketMap[classified.classification];
|
||||
summary[bucket].push(classified);
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
/**
|
||||
* Context Enricher
|
||||
*
|
||||
* Adds paragraph/heading context to raw claims extracted from markdown files.
|
||||
* We read each source file once, then attach the nearest heading, surrounding
|
||||
* paragraph, and a 3-line window around every claim line.
|
||||
*
|
||||
* Usage: imported by extract-claims.ts and the resolve CLI.
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import type { ClaimContext } from './types.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Heading pattern: lines starting with 1-4 `#` followed by a space
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const HEADING_RE = /^#{1,4}\s/;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// buildClaimContext
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build context for a single line within a file's lines array.
|
||||
*
|
||||
* We walk backwards to find the nearest heading, locate the blank-line-
|
||||
* delimited paragraph that contains the claim, and capture 3 lines of
|
||||
* surrounding context on each side.
|
||||
*
|
||||
* @param lines - All lines of the file
|
||||
* @param lineIndex - 0-based index of the claim line
|
||||
* @returns ClaimContext with heading, paragraph, before, after
|
||||
*/
|
||||
export function buildClaimContext(lines: string[], lineIndex: number): ClaimContext {
|
||||
// --- Heading ---
|
||||
const heading = findNearestHeading(lines, lineIndex);
|
||||
|
||||
// --- Paragraph ---
|
||||
const paragraph = extractParagraph(lines, lineIndex);
|
||||
|
||||
// --- Before (up to 3 lines) ---
|
||||
const beforeStart = Math.max(0, lineIndex - 3);
|
||||
const before = lines.slice(beforeStart, lineIndex);
|
||||
|
||||
// --- After (up to 3 lines) ---
|
||||
const afterEnd = Math.min(lines.length, lineIndex + 4); // exclusive upper bound
|
||||
const after = lines.slice(lineIndex + 1, afterEnd);
|
||||
|
||||
return { heading, paragraph, before, after };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// enrichClaimsWithContext
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Enrich an array of claims that lack context by reading their source files.
|
||||
* We group claims by file to minimise I/O — each file is read exactly once.
|
||||
*
|
||||
* @param claims - Claims with empty/default context
|
||||
* @param docsRoot - Absolute path to docs root directory
|
||||
* @returns The same claims array, mutated with context filled in
|
||||
*/
|
||||
export function enrichClaimsWithContext(
|
||||
claims: Array<{ file: string; line: number; context: ClaimContext }>,
|
||||
docsRoot: string,
|
||||
): void {
|
||||
// Group claims by relative file path so we read each file only once.
|
||||
const claimsByFile = new Map<string, Array<{ line: number; context: ClaimContext }>>();
|
||||
|
||||
for (const claim of claims) {
|
||||
const existing = claimsByFile.get(claim.file);
|
||||
if (existing) {
|
||||
existing.push(claim);
|
||||
} else {
|
||||
claimsByFile.set(claim.file, [claim]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [file, fileClaims] of claimsByFile) {
|
||||
const absolutePath = join(docsRoot, file);
|
||||
const content = readFileSync(absolutePath, 'utf-8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
for (const claim of fileClaims) {
|
||||
// Claims use 1-based line numbers; we need 0-based for array access.
|
||||
const lineIndex = claim.line - 1;
|
||||
claim.context = buildClaimContext(lines, lineIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Walk backwards from `lineIndex` to find the nearest markdown heading.
|
||||
* Returns the heading text with the `#` prefix stripped, or
|
||||
* "(top of document)" if no heading is found above the claim.
|
||||
*/
|
||||
function findNearestHeading(lines: string[], lineIndex: number): string {
|
||||
for (let i = lineIndex; i >= 0; i--) {
|
||||
if (HEADING_RE.test(lines[i])) {
|
||||
// Strip the leading `# ` / `## ` / `### ` / `#### ` prefix.
|
||||
return lines[i].replace(/^#{1,4}\s+/, '');
|
||||
}
|
||||
}
|
||||
return '(top of document)';
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the blank-line-delimited paragraph that contains `lineIndex`.
|
||||
* We walk upward and downward until we hit a blank line or file boundary,
|
||||
* then join all non-blank lines in that range.
|
||||
*/
|
||||
function extractParagraph(lines: string[], lineIndex: number): string {
|
||||
// Walk upward to find paragraph start.
|
||||
let start = lineIndex;
|
||||
while (start > 0 && lines[start - 1].trim() !== '') {
|
||||
start--;
|
||||
}
|
||||
|
||||
// Walk downward to find paragraph end.
|
||||
let end = lineIndex;
|
||||
while (end < lines.length - 1 && lines[end + 1].trim() !== '') {
|
||||
end++;
|
||||
}
|
||||
|
||||
// Join all lines in the paragraph range (inclusive).
|
||||
return lines.slice(start, end + 1).join('\n');
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* Applies corrections to markdown files in-place.
|
||||
*
|
||||
* We group fixes by file to minimize I/O, then apply line replacements
|
||||
* in descending order so earlier line numbers remain stable.
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import chalk from 'chalk';
|
||||
|
||||
import type { ApplyResult, Fix } from './types.ts';
|
||||
|
||||
/**
|
||||
* Generate a colored diff string for a single fix (for terminal display).
|
||||
* Uses chalk for coloring: red for removed, green for added.
|
||||
*/
|
||||
export function formatFixDiff(fix: Fix): string {
|
||||
const header = chalk.dim(` ${fix.file}:${fix.line}`);
|
||||
const removed = chalk.red(` - ${fix.original}`);
|
||||
const added = chalk.green(` + ${fix.replacement}`);
|
||||
const reason = chalk.dim(` reason: ${fix.reason}`);
|
||||
|
||||
return `${header}\n${removed}\n${added}\n${reason}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate colored diff strings for all fixes grouped by file.
|
||||
*/
|
||||
export function formatAllDiffs(fixes: Fix[]): string {
|
||||
if (fixes.length === 0) {
|
||||
return chalk.dim('No fixes to display.');
|
||||
}
|
||||
|
||||
const grouped = groupByFile(fixes);
|
||||
const sections: string[] = [];
|
||||
|
||||
for (const [file, fileFixes] of grouped) {
|
||||
const fileHeader = chalk.bold.underline(file);
|
||||
const fixDiffs = fileFixes.map((fix) => formatFixDiff(fix)).join('\n\n');
|
||||
|
||||
sections.push(`${fileHeader}\n${fixDiffs}`);
|
||||
}
|
||||
|
||||
return sections.join('\n\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply all fixes to their respective files.
|
||||
*
|
||||
* We group fixes by file to minimize I/O. For each file, fixes are sorted
|
||||
* by line number descending so that replacing later lines first does not
|
||||
* shift earlier line numbers.
|
||||
*/
|
||||
export function applyFixes(fixes: Fix[], docsRoot: string): ApplyResult {
|
||||
const result: ApplyResult = {
|
||||
applied: 0,
|
||||
failed: [],
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
if (fixes.length === 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const grouped = groupByFile(fixes);
|
||||
|
||||
for (const [file, fileFixes] of grouped) {
|
||||
const absolutePath = join(docsRoot, file);
|
||||
|
||||
let content: string;
|
||||
try {
|
||||
content = readFileSync(absolutePath, 'utf-8');
|
||||
} catch (err: unknown) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : 'Unknown read error';
|
||||
for (const fix of fileFixes) {
|
||||
result.failed.push({ fix, error: `Failed to read file: ${message}` });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const lines = content.split('\n');
|
||||
|
||||
// Sort by line number descending so replacements do not shift indices
|
||||
const sorted = [...fileFixes].sort((a, b) => b.line - a.line);
|
||||
|
||||
for (const fix of sorted) {
|
||||
const lineIndex = fix.line - 1;
|
||||
|
||||
if (lineIndex < 0 || lineIndex >= lines.length) {
|
||||
result.failed.push({
|
||||
fix,
|
||||
error: `Line ${fix.line} out of range (file has ${lines.length} lines)`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const currentLine = lines[lineIndex];
|
||||
|
||||
if (currentLine.trim() === fix.original.trim()) {
|
||||
// Preserve leading whitespace from the original line
|
||||
const leadingWhitespace = currentLine.match(/^(\s*)/)?.[1] ?? '';
|
||||
const trimmedReplacement = fix.replacement.trimStart();
|
||||
lines[lineIndex] = `${leadingWhitespace}${trimmedReplacement}`;
|
||||
result.applied++;
|
||||
} else {
|
||||
result.failed.push({
|
||||
fix,
|
||||
error: `Line content mismatch at line ${fix.line}. Expected (trimmed): "${fix.original.trim()}", found: "${currentLine.trim()}"`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
writeFileSync(absolutePath, lines.join('\n'), 'utf-8');
|
||||
} catch (err: unknown) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : 'Unknown write error';
|
||||
// The in-memory replacements succeeded but the write failed.
|
||||
// We count these as failures since the file was not persisted.
|
||||
for (const fix of fileFixes) {
|
||||
result.failed.push({
|
||||
fix,
|
||||
error: `Failed to write file: ${message}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Group fixes by their file path. Preserves insertion order. */
|
||||
function groupByFile(fixes: Fix[]): Map<string, Fix[]> {
|
||||
const grouped = new Map<string, Fix[]>();
|
||||
|
||||
for (const fix of fixes) {
|
||||
const existing = grouped.get(fix.file);
|
||||
if (existing) {
|
||||
existing.push(fix);
|
||||
} else {
|
||||
grouped.set(fix.file, [fix]);
|
||||
}
|
||||
}
|
||||
|
||||
return grouped;
|
||||
}
|
||||
205
features/truth-validation/semantic-service/scripts/lib/types.ts
Normal file
205
features/truth-validation/semantic-service/scripts/lib/types.ts
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
/**
|
||||
* Shared types for the truth-validation resolution workflow.
|
||||
*
|
||||
* Used by extract-claims, validate-docs, claim-classifier,
|
||||
* context-enricher, doc-fixer, and the resolve CLI.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim categories & severity
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ClaimCategory =
|
||||
| 'economics'
|
||||
| 'competitors'
|
||||
| 'technical'
|
||||
| 'terminology'
|
||||
| 'safety'
|
||||
| 'legal';
|
||||
|
||||
export type Severity = 'P0' | 'P1' | 'P2';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim context (attached by context-enricher)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ClaimContext {
|
||||
/** Nearest markdown heading above the claim */
|
||||
heading: string;
|
||||
/** Blank-line-delimited paragraph containing the claim */
|
||||
paragraph: string;
|
||||
/** Up to 3 lines before the claim line */
|
||||
before: string[];
|
||||
/** Up to 3 lines after the claim line */
|
||||
after: string[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Extracted claims (output of extract-claims.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ExtractedClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
claim: string;
|
||||
category: ClaimCategory;
|
||||
rawText: string;
|
||||
/** The extracted value (e.g. "80%", "$0", "20 services") */
|
||||
value: string;
|
||||
/** Surrounding context for classification */
|
||||
context: ClaimContext;
|
||||
}
|
||||
|
||||
export interface ExtractionResult {
|
||||
claims: ExtractedClaim[];
|
||||
totalFiles: number;
|
||||
totalClaims: number;
|
||||
byCategory: Record<ClaimCategory, number>;
|
||||
extractedAt: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim pattern (used in extract-claims.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ClaimPattern {
|
||||
regex: RegExp;
|
||||
category: ClaimCategory;
|
||||
label: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Classification (output of claim-classifier.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type Classification =
|
||||
| 'real-contradiction'
|
||||
| 'competitor-context'
|
||||
| 'unrelated-metric'
|
||||
| 'comparative-context'
|
||||
| 'previously-resolved';
|
||||
|
||||
export interface ClassifiedClaim extends ExtractedClaim {
|
||||
classification: Classification;
|
||||
classificationReason: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Consistency report (output of validate-docs.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ContradictionClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
rawText: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
export interface Contradiction {
|
||||
severity: Severity;
|
||||
description: string;
|
||||
claims: ContradictionClaim[];
|
||||
canonicalValue: string | null;
|
||||
}
|
||||
|
||||
export interface DriftedFact {
|
||||
severity: Severity;
|
||||
fact: string;
|
||||
canonicalValue: string;
|
||||
foundValue: string;
|
||||
file: string;
|
||||
line: number;
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
export interface UncoveredClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
claim: string;
|
||||
rawText: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface ConsistencyReport {
|
||||
contradictions: Contradiction[];
|
||||
driftedFacts: DriftedFact[];
|
||||
uncoveredClaims: UncoveredClaim[];
|
||||
summary: {
|
||||
totalContradictions: number;
|
||||
p0Count: number;
|
||||
p1Count: number;
|
||||
p2Count: number;
|
||||
totalDrifted: number;
|
||||
totalUncovered: number;
|
||||
passed: boolean;
|
||||
};
|
||||
generatedAt: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Resolution store (resolutions.yaml)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ResolutionDecision = 'dismissed' | 'fixed' | 'skipped';
|
||||
|
||||
export type ResolutionReason =
|
||||
| 'competitor-context'
|
||||
| 'unrelated-metric'
|
||||
| 'comparative-context'
|
||||
| 'manual-dismiss'
|
||||
| 'fixed-to-canonical'
|
||||
| 'custom-edit'
|
||||
| 'skipped';
|
||||
|
||||
export interface Resolution {
|
||||
/** sha256(file + ":" + normalizedParagraph + ":" + value) */
|
||||
hash: string;
|
||||
file: string;
|
||||
value: string;
|
||||
decision: ResolutionDecision;
|
||||
reason: ResolutionReason;
|
||||
/** First ~120 chars of the paragraph for human readability */
|
||||
paragraphPreview: string;
|
||||
resolvedAt: string;
|
||||
}
|
||||
|
||||
export interface ResolutionStore {
|
||||
version: 1;
|
||||
lastRun: string;
|
||||
resolutions: Resolution[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Doc fixer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface Fix {
|
||||
/** Relative path to the markdown file */
|
||||
file: string;
|
||||
/** 1-based line number */
|
||||
line: number;
|
||||
/** Original line text */
|
||||
original: string;
|
||||
/** Corrected line text */
|
||||
replacement: string;
|
||||
/** Why this was changed */
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface ApplyResult {
|
||||
applied: number;
|
||||
failed: Array<{ fix: Fix; error: string }>;
|
||||
skipped: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Classification summary (used by resolve CLI)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ClassificationSummary {
|
||||
realContradictions: ClassifiedClaim[];
|
||||
competitorContext: ClassifiedClaim[];
|
||||
unrelatedMetrics: ClassifiedClaim[];
|
||||
comparativeTables: ClassifiedClaim[];
|
||||
previouslyResolved: ClassifiedClaim[];
|
||||
}
|
||||
|
|
@ -5,6 +5,9 @@
|
|||
* by category, detects contradictions within groups, and cross-references
|
||||
* against facts.ts canonical values.
|
||||
*
|
||||
* Loads resolutions.yaml to skip previously dismissed/fixed claims,
|
||||
* reducing false positive noise across runs.
|
||||
*
|
||||
* Usage: bun run scripts/validate-docs.ts
|
||||
* Input: scripts/output/extracted-claims.json
|
||||
* Output: scripts/output/consistency-report.json
|
||||
|
|
@ -12,82 +15,19 @@
|
|||
|
||||
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type ClaimCategory =
|
||||
| 'economics'
|
||||
| 'competitors'
|
||||
| 'technical'
|
||||
| 'terminology'
|
||||
| 'safety'
|
||||
| 'legal';
|
||||
|
||||
type Severity = 'P0' | 'P1' | 'P2';
|
||||
|
||||
interface ExtractedClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
claim: string;
|
||||
category: ClaimCategory;
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
interface ExtractionResult {
|
||||
claims: ExtractedClaim[];
|
||||
totalFiles: number;
|
||||
totalClaims: number;
|
||||
byCategory: Record<string, number>;
|
||||
extractedAt: string;
|
||||
}
|
||||
|
||||
interface Contradiction {
|
||||
severity: Severity;
|
||||
description: string;
|
||||
claims: Array<{
|
||||
file: string;
|
||||
line: number;
|
||||
rawText: string;
|
||||
value: string;
|
||||
}>;
|
||||
canonicalValue: string | null;
|
||||
}
|
||||
|
||||
interface DriftedFact {
|
||||
severity: Severity;
|
||||
fact: string;
|
||||
canonicalValue: string;
|
||||
foundValue: string;
|
||||
file: string;
|
||||
line: number;
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
interface UncoveredClaim {
|
||||
file: string;
|
||||
line: number;
|
||||
claim: string;
|
||||
rawText: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
interface ConsistencyReport {
|
||||
contradictions: Contradiction[];
|
||||
driftedFacts: DriftedFact[];
|
||||
uncoveredClaims: UncoveredClaim[];
|
||||
summary: {
|
||||
totalContradictions: number;
|
||||
p0Count: number;
|
||||
p1Count: number;
|
||||
p2Count: number;
|
||||
totalDrifted: number;
|
||||
totalUncovered: number;
|
||||
passed: boolean;
|
||||
};
|
||||
generatedAt: string;
|
||||
}
|
||||
import { computeClaimHash } from './lib/claim-classifier.ts';
|
||||
import type {
|
||||
ClaimCategory,
|
||||
Severity,
|
||||
ExtractedClaim,
|
||||
ExtractionResult,
|
||||
Contradiction,
|
||||
ContradictionClaim,
|
||||
DriftedFact,
|
||||
UncoveredClaim,
|
||||
ConsistencyReport,
|
||||
ResolutionStore,
|
||||
} from './lib/types.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Canonical facts (mirrors facts.ts without importing ESM module)
|
||||
|
|
@ -105,6 +45,65 @@ const CANONICAL_FACTS = {
|
|||
},
|
||||
} as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Resolutions loading
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function loadResolutionHashes(scriptDir: string): Set<string> {
|
||||
const resolutionsPath = join(scriptDir, 'resolutions.yaml');
|
||||
const hashes = new Set<string>();
|
||||
|
||||
if (!existsSync(resolutionsPath)) {
|
||||
return hashes;
|
||||
}
|
||||
|
||||
// We parse the YAML manually to avoid a dependency on js-yaml
|
||||
// in this script. The format is simple: lines like " - hash: \"abc123\""
|
||||
// with decision: "dismissed" or "fixed" on the following lines.
|
||||
const content = readFileSync(resolutionsPath, 'utf-8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
let currentHash = '';
|
||||
let currentDecision = '';
|
||||
|
||||
for (const line of lines) {
|
||||
const hashMatch = line.match(/^\s+hash:\s*"?([a-f0-9]+)"?\s*$/);
|
||||
if (hashMatch) {
|
||||
// If we had a previous entry, store it
|
||||
if (currentHash && (currentDecision === 'dismissed' || currentDecision === 'fixed')) {
|
||||
hashes.add(currentHash);
|
||||
}
|
||||
currentHash = hashMatch[1];
|
||||
currentDecision = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
const decisionMatch = line.match(/^\s+decision:\s*"?(dismissed|fixed|skipped)"?\s*$/);
|
||||
if (decisionMatch) {
|
||||
currentDecision = decisionMatch[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the last entry
|
||||
if (currentHash && (currentDecision === 'dismissed' || currentDecision === 'fixed')) {
|
||||
hashes.add(currentHash);
|
||||
}
|
||||
|
||||
return hashes;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Claim filtering (resolution-aware)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function isClaimResolved(claim: ExtractedClaim, resolvedHashes: Set<string>): boolean {
|
||||
if (resolvedHashes.size === 0) return false;
|
||||
|
||||
const paragraph = claim.context?.paragraph ?? '';
|
||||
const hash = computeClaimHash(claim.file, paragraph, claim.value);
|
||||
return resolvedHashes.has(hash);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Value extraction helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -242,7 +241,7 @@ function detectContradictions(claims: ExtractedClaim[]): Contradiction[] {
|
|||
}));
|
||||
|
||||
// Group by entity type (files, services, etc.)
|
||||
const techByEntity = new Map<string, typeof technicalClaims>();
|
||||
const techByEntity = new Map<string, ContradictionClaim[]>();
|
||||
for (const tc of technicalClaims) {
|
||||
const entityMatch = tc.value.match(/\d+\s+(.+)/);
|
||||
if (entityMatch) {
|
||||
|
|
@ -427,7 +426,8 @@ function findUncoveredClaims(claims: ExtractedClaim[]): UncoveredClaim[] {
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
function main(): void {
|
||||
const outputDir = join(import.meta.dirname, 'output');
|
||||
const scriptDir = import.meta.dirname;
|
||||
const outputDir = join(scriptDir, 'output');
|
||||
const inputPath = join(outputDir, 'extracted-claims.json');
|
||||
|
||||
if (!existsSync(inputPath)) {
|
||||
|
|
@ -441,14 +441,27 @@ function main(): void {
|
|||
|
||||
console.log(`[validate-docs] Loaded ${extraction.totalClaims} claims from ${extraction.totalFiles} files`);
|
||||
|
||||
// Detect contradictions
|
||||
const contradictions = detectContradictions(extraction.claims);
|
||||
// Load resolution store to skip previously resolved claims
|
||||
const resolvedHashes = loadResolutionHashes(scriptDir);
|
||||
if (resolvedHashes.size > 0) {
|
||||
console.log(`[validate-docs] Loaded ${resolvedHashes.size} resolved claim hashes from resolutions.yaml`);
|
||||
}
|
||||
|
||||
// Detect drift from canonical facts
|
||||
const driftedFacts = detectDrift(extraction.claims);
|
||||
// Filter out resolved claims before detection
|
||||
const unresolvedClaims = extraction.claims.filter((c) => !isClaimResolved(c, resolvedHashes));
|
||||
const resolvedCount = extraction.claims.length - unresolvedClaims.length;
|
||||
if (resolvedCount > 0) {
|
||||
console.log(`[validate-docs] Skipping ${resolvedCount} previously resolved claims`);
|
||||
}
|
||||
|
||||
// Detect contradictions (using unresolved claims only)
|
||||
const contradictions = detectContradictions(unresolvedClaims);
|
||||
|
||||
// Detect drift from canonical facts (using unresolved claims only)
|
||||
const driftedFacts = detectDrift(unresolvedClaims);
|
||||
|
||||
// Find uncovered claims
|
||||
const uncoveredClaims = findUncoveredClaims(extraction.claims);
|
||||
const uncoveredClaims = findUncoveredClaims(unresolvedClaims);
|
||||
|
||||
// Compute summary
|
||||
const p0Count = contradictions.filter((c) => c.severity === 'P0').length +
|
||||
|
|
@ -523,6 +536,9 @@ function main(): void {
|
|||
console.log(` P2 (informational): ${p2Count}`);
|
||||
console.log(` Drifted facts: ${driftedFacts.length}`);
|
||||
console.log(` Uncovered claims: ${uncoveredClaims.length}`);
|
||||
if (resolvedCount > 0) {
|
||||
console.log(` Resolved (skipped): ${resolvedCount}`);
|
||||
}
|
||||
console.log(` Overall: ${report.summary.passed ? 'PASSED' : 'FAILED (P0 issues found)'}`);
|
||||
console.log(`\n[validate-docs] Report written to: ${outputPath}`);
|
||||
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
{"model": "parseq_classic", "style": "classic", "pid": 1413106, "started_at": 1770882972.643985, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9621, "val_loss": 2.5765, "char_acc": 0.3015, "exact_acc": 0.0001, "best_exact_acc": 0.0001, "epoch_time_s": 135.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
{"model": "parseq_classic", "style": "classic", "pid": 1413106, "started_at": 1770882972.643985, "phase": 1, "total_phases": 3, "phase_epoch": 6, "phase_epochs": 10, "total_epochs_done": 6, "total_epochs": 30, "train_loss": 1.9363, "val_loss": 1.9536, "char_acc": 0.5373, "exact_acc": 0.0045, "best_exact_acc": 0.0072, "epoch_time_s": 110.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
|
|
@ -1 +1 @@
|
|||
{"model": "parseq_colorful", "style": "colorful", "pid": 1413110, "started_at": 1770882972.6479924, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.969, "val_loss": 2.5443, "char_acc": 0.3104, "exact_acc": 0.0001, "best_exact_acc": 0.0001, "epoch_time_s": 148.6, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
{"model": "parseq_colorful", "style": "colorful", "pid": 1413110, "started_at": 1770882972.6479924, "phase": 1, "total_phases": 3, "phase_epoch": 5, "phase_epochs": 10, "total_epochs_done": 5, "total_epochs": 30, "train_loss": 1.9745, "val_loss": 1.9491, "char_acc": 0.5369, "exact_acc": 0.0044, "best_exact_acc": 0.0094, "epoch_time_s": 108.1, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
|
|
@ -1 +1 @@
|
|||
{"model": "parseq_emboss", "style": "emboss", "pid": 1413109, "started_at": 1770882972.61543, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9799, "val_loss": 2.562, "char_acc": 0.2831, "exact_acc": 0.0, "best_exact_acc": 0.0, "epoch_time_s": 123.9, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
|
||||
{"model": "parseq_emboss", "style": "emboss", "pid": 1413109, "started_at": 1770882972.61543, "phase": 1, "total_phases": 3, "phase_epoch": 7, "phase_epochs": 10, "total_epochs_done": 7, "total_epochs": 30, "train_loss": 1.9262, "val_loss": 1.9084, "char_acc": 0.5457, "exact_acc": 0.0073, "best_exact_acc": 0.0085, "epoch_time_s": 85.5, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
|
||||
|
|
@ -1 +1 @@
|
|||
{"model": "parseq_grid", "style": "grid", "pid": 1413108, "started_at": 1770882972.6406605, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9912, "val_loss": 2.5997, "char_acc": 0.295, "exact_acc": 0.0002, "best_exact_acc": 0.0002, "epoch_time_s": 136.0, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
{"model": "parseq_grid", "style": "grid", "pid": 1413108, "started_at": 1770882972.6406605, "phase": 1, "total_phases": 3, "phase_epoch": 6, "phase_epochs": 10, "total_epochs_done": 6, "total_epochs": 30, "train_loss": 1.9554, "val_loss": 1.8943, "char_acc": 0.557, "exact_acc": 0.0067, "best_exact_acc": 0.0131, "epoch_time_s": 110.7, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:0"}
|
||||
|
|
@ -1 +1 @@
|
|||
{"model": "parseq_perspective", "style": "perspective", "pid": 1413107, "started_at": 1770882972.6737044, "phase": 1, "total_phases": 3, "phase_epoch": 1, "phase_epochs": 10, "total_epochs_done": 1, "total_epochs": 30, "train_loss": 2.9971, "val_loss": 2.5906, "char_acc": 0.2849, "exact_acc": 0.0, "best_exact_acc": 0.0, "epoch_time_s": 125.9, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
|
||||
{"model": "parseq_perspective", "style": "perspective", "pid": 1413107, "started_at": 1770882972.6737044, "phase": 1, "total_phases": 3, "phase_epoch": 7, "phase_epochs": 10, "total_epochs_done": 7, "total_epochs": 30, "train_loss": 1.9409, "val_loss": 1.9299, "char_acc": 0.5414, "exact_acc": 0.006, "best_exact_acc": 0.006, "epoch_time_s": 85.5, "difficulty": "easy", "dataset_samples": 60000, "device": "cuda:1"}
|
||||
Loading…
Add table
Reference in a new issue