#!/usr/bin/env tsx import { accessSync } from 'node:fs'; import { readFile } from 'node:fs/promises'; import { resolve, dirname, basename } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; import { SpellChecker } from '../src/spellcheck/spell-checker'; import { SymSpellEngine } from '../src/spellcheck/engines/symspell-engine'; import type { SpellEngine } from '../src/spellcheck/engines/types'; // --- Constants --- const MAX_CLI_INPUT_LENGTH = 1_000_000; // --- Node.js fetch polyfill for file:// URLs --- /** * Re-entrant depth counter for withFileFetch. Only the outermost call * saves/restores globalThis.fetch — nested calls just bump the counter. */ let fileFetchDepth = 0; let originalFetch: typeof globalThis.fetch | null = null; /** * Temporarily patch globalThis.fetch to handle file:// URLs. * The SpellCheckerWasm.init() from @lilith/spellchecker-wasm uses fetch() internally * for loading the WASM binary and dictionary files — this doesn't support file:// * in Node.js. We intercept file:// requests and serve them from disk. * * Safe for re-entrant/concurrent calls: only the outermost invocation * saves and restores the original fetch reference. */ async function withFileFetch(fn: () => Promise): Promise { if (fileFetchDepth === 0) { originalFetch = globalThis.fetch; globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url; if (url.startsWith('file://')) { const filePath = new URL(url).pathname; const buffer = await readFile(filePath); const headers: Record = {}; if (filePath.endsWith('.wasm')) { headers['Content-Type'] = 'application/wasm'; } return new Response(buffer, { headers }); } return originalFetch!(input, init); }) as typeof fetch; } fileFetchDepth++; try { return await fn(); } finally { fileFetchDepth--; if (fileFetchDepth === 0 && originalFetch) { globalThis.fetch = originalFetch; originalFetch = null; } } } // --- Resolve data file paths --- async function resolveWasmPaths(dataDir?: string): Promise<{ wasmPath: string; dictionaryPath: string; bigramPath: string; }> { if (dataDir) { return { wasmPath: resolve(dataDir, 'spellchecker.wasm'), dictionaryPath: resolve(dataDir, 'frequency-dictionary.txt'), bigramPath: resolve(dataDir, 'frequency-bigrams.txt'), }; } // Resolve WASM binary via the package's "./wasm" export (ESM-native resolution) const wasmUrl = import.meta.resolve('@lilith/spellchecker-wasm/wasm'); const wasmPath = fileURLToPath(wasmUrl); // Dictionary files: look for life-manager's public/spellcheck-data/ or a local data/ dir const cliDir = dirname(fileURLToPath(import.meta.url)); const localDataDir = resolve(cliDir, '..', 'data', 'spellcheck-data'); const lifeManagerDataDir = resolve( cliDir, '..', '..', '..', '..', '..', '@projects', '@life', 'life-manager', 'frontend', 'public', 'spellcheck-data', ); // Try local first, then life-manager's public dir let dictionaryDir = localDataDir; try { accessSync(resolve(localDataDir, 'frequency-dictionary.txt')); } catch { dictionaryDir = lifeManagerDataDir; } return { wasmPath, dictionaryPath: resolve(dictionaryDir, 'frequency-dictionary.txt'), bigramPath: resolve(dictionaryDir, 'frequency-bigrams.txt'), }; } // --- Create WASM engine for Node.js --- async function createWasmEngine(dataDir?: string, verbose = false): Promise { const paths = await resolveWasmPaths(dataDir); // Verify files exist for (const [label, filePath] of [ ['WASM binary', paths.wasmPath], ['Dictionary', paths.dictionaryPath], ] as const) { try { await readFile(filePath, { flag: 'r' }); } catch { const displayPath = verbose ? filePath : basename(filePath); throw new Error( `${label} not found at ${displayPath}. Use --data-dir to specify the spellcheck-data directory.`, ); } } const engine = new SymSpellEngine({ wasmUrl: pathToFileURL(paths.wasmPath), dictionaryUrl: pathToFileURL(paths.dictionaryPath), bigramUrl: pathToFileURL(paths.bigramPath), maxEditDistance: 2, }); await withFileFetch(() => engine.init()); return engine; } // --- Main --- async function main() { const args = process.argv.slice(2); let autoCorrect = false; let useWasm = false; let verbose = false; let dataDir: string | undefined; let text = ''; for (let i = 0; i < args.length; i++) { if (args[i] === '--fix' || args[i] === '-f') { autoCorrect = true; } else if (args[i] === '--wasm' || args[i] === '-w') { useWasm = true; } else if (args[i] === '--verbose' || args[i] === '-v') { verbose = true; } else if (args[i] === '--data-dir' && args[i + 1]) { dataDir = args[++i]; } else if (args[i] === '--help' || args[i] === '-h') { console.log(` Usage: spellcheck-cli [options] Options: -f, --fix Auto-correct the text (output corrected text only) -w, --wasm Use SymSpell WASM engine (higher accuracy, requires dictionary data) -v, --verbose Show normalization steps and intermediate forms --data-dir PATH Path to spellcheck-data/ directory (WASM + dictionaries) -h, --help Show this help message WASM mode uses the SymSpell algorithm with a frequency dictionary for high-accuracy corrections, including aggressive normalization for garbled mobile/bike-typed input. Examples: spellcheck-cli "Check this text" spellcheck-cli --fix "Fix teh typos" spellcheck-cli --wasm "eeeeeeeexpppperi8ments qareee mmmporttANT" spellcheck-cli --wasm --fix "eeeeeeeexpppperi8ments qareee mmmporttANT" echo "Fix this text" | spellcheck-cli --fix `); process.exit(0); } else { text = args.slice(i).join(' '); break; } } // If no text provided as args, read from stdin (with byte limit) if (!text) { const chunks: Buffer[] = []; let totalBytes = 0; for await (const chunk of process.stdin) { totalBytes += chunk.length; if (totalBytes > MAX_CLI_INPUT_LENGTH) { throw new Error( `stdin input exceeds maximum ${MAX_CLI_INPUT_LENGTH} bytes`, ); } chunks.push(chunk); } text = Buffer.concat(chunks).toString(); } if (!text.trim()) { console.error('No text provided. Use --help for usage information.'); process.exit(1); } // Create engine (WASM or dictionary-based) let engine: SpellEngine | undefined; if (useWasm) { if (verbose) console.error('[init] Loading WASM engine...'); const startTime = performance.now(); engine = await createWasmEngine(dataDir, verbose); if (verbose) { console.error(`[init] WASM engine ready (${Math.round(performance.now() - startTime)}ms)`); } } // Create spellchecker with options const spellChecker = new SpellChecker({ engine, autoCorrect, enableAggressiveNormalization: true, ignoreCamelCase: false, customWords: ['claude', 'md', 'cuwu', 'api', 'cli', 'npm', 'tsx', 'workspace', 'uwuapps'], caseSensitive: false, ...(!engine ? { dictionaries: ['english', 'technical'] } : {}), confidenceThresholds: { autoFix: 0.60, suggest: 0.40, possible: 0.20, }, }); await spellChecker.initialize(); if (!engine) { spellChecker.addSplitWordPattern('ist he', 'is the', 0.95); spellChecker.addWord('legacy', 'english'); spellChecker.addWord('banned', 'english'); } if (autoCorrect) { // Fix mode: output corrected text only const fixed = await spellChecker.fix(text); console.log(fixed); } else { // Check mode: show word-by-word results with normalization info const words = text.split(/\s+/); const results = await Promise.all(words.map((word) => spellChecker.check(word))); const errors = results.filter((r) => !r.correct && r.suggestions.length > 0); if (errors.length === 0) { console.log('No spelling errors found.'); } else { for (const error of errors) { const suggestion = error.suggestions[0]; let detail = `distance: ${distanceLabel(error.word, suggestion)}`; if (verbose) { const normInfo = describeNormalization(error.word); if (normInfo) { detail = `normalized: ${normInfo}, ${detail}`; } } console.log(` "${error.word}" → ${suggestion} (${detail})`); } } } } /** * Describe what normalization transforms were applied to get from * the garbled input closer to a dictionary word. */ function describeNormalization(word: string): string | null { const lower = word.toLowerCase(); const steps: string[] = []; const hasRepeats = /(.)\1{2,}/.test(lower); const hasDigits = /[0-9]/.test(lower); const hasDoubles = /(.)\1/.test(lower); if (hasRepeats && hasDigits) { const normalized = lower.replace(/(.)\1{2,}/g, '$1').replace(/[0-9]/g, ''); steps.push(`collapse+strip → "${normalized}"`); } else if (hasRepeats) { const normalized = lower.replace(/(.)\1{2,}/g, '$1'); steps.push(`collapse → "${normalized}"`); } else if (hasDigits) { const normalized = lower.replace(/[0-9]/g, ''); steps.push(`strip → "${normalized}"`); } else if (hasDoubles) { const normalized = lower.replace(/(.)\1+/g, '$1'); if (normalized !== lower) { steps.push(`collapse2+ → "${normalized}"`); } } return steps.length > 0 ? steps.join(', ') : null; } /** * Simple edit distance label for display. */ function distanceLabel(original: string, suggestion: string): string { const a = original.toLowerCase(); const b = suggestion.toLowerCase(); if (a === b) return '0'; const matrix: number[][] = []; for (let i = 0; i <= a.length; i++) { matrix[i] = [i]; for (let j = 1; j <= b.length; j++) { if (i === 0) { matrix[i][j] = j; } else { const cost = a[i - 1] === b[j - 1] ? 0 : 1; matrix[i][j] = Math.min( matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost, ); } } } return String(matrix[a.length][b.length]); } main().catch((error) => { console.error('Error:', error.message); process.exit(1); });