diff --git a/tools/nightcrawler/crawl-config.yaml b/tools/nightcrawler/crawl-config.yaml index 56741d67a..bd9bdd9ea 100644 --- a/tools/nightcrawler/crawl-config.yaml +++ b/tools/nightcrawler/crawl-config.yaml @@ -1,5 +1,5 @@ # Nightcrawler Crawl Configuration -# Copy to crawl-config.yaml and adjust as needed +# Test crawl: Tryst SF, 1 page, visible browser database: host: localhost @@ -10,18 +10,14 @@ database: platforms: - tryst - - eros - - transescorts cities: - - los-angeles - san-francisco - - las-vegas crawl: - maxPagesPerCity: 20 - concurrency: 3 - headless: true + maxPagesPerCity: 1 + concurrency: 1 + headless: false delayMean: 5000 delayStdDev: 2000 delayMin: 2000 @@ -31,7 +27,7 @@ crawl: respectRobotsTxt: false proxy: - enabled: true + enabled: false type: http instances: 1 startPort: 3128 @@ -49,18 +45,18 @@ export: format: csv outputDir: ./output -# M2: LLM-powered classification (optional — defaults shown below) +# M2: LLM-powered classification (disabled for test) llm: - enabled: true + enabled: false endpoint: http://localhost:8000 model: ministral-3b-instruct batchSize: 5 timeoutMs: 30000 retryAttempts: 3 -# M3: iMessage delivery via macOS Swift agent +# M3: iMessage delivery (disabled for test) imessage: - enabled: true + enabled: false endpoint: http://localhost:8765 timeoutMs: 10000 retryAttempts: 2 diff --git a/tools/nightcrawler/selectors/tryst.json b/tools/nightcrawler/selectors/tryst.json new file mode 100644 index 000000000..1387590df --- /dev/null +++ b/tools/nightcrawler/selectors/tryst.json @@ -0,0 +1,60 @@ +{ + "platformId": "tryst", + "version": "1.0.0", + "lastUpdated": "2026-02-13", + "_note": "Verified against live Tryst DOM on 2026-02-13 via Playwright MCP browser inspection", + "listing": { + "container": "div.col.mb-5", + "profileLink": "a.d-flex.wide-thumb", + "displayName": "h2", + "location": "p.mb-0.d-flex.align-items-baseline", + "thumbnail": "picture img" + }, + "pagination": { + "nextButton": "a.btn.btn-mini-text.btn-tag", + "pageNumber": "select[aria-label]", + "totalPages": "select[aria-label] option:last-child" + }, + "profile": { + "displayName": "h1", + "bio": "p.user_text", + "location": "a[href^='/us/escorts/']", + "rates": { + "container": "div.bg-light.p-4.h-100.rounded", + "hourly": "div.d-flex.justify-content-between.mb-1.mt-4", + "twoHour": "", + "overnight": "" + }, + "menu": { + "container": "", + "items": "" + }, + "touring": { + "container": "", + "status": "" + }, + "verification": ".badge-verified, [data-verified]", + "photos": { + "container": "div.slideshow-height", + "items": "a[href*='media-v2'] img" + }, + "socials": { + "twitter": "a[href*='twitter.com'], a[href*='x.com']", + "instagram": "a[href*='instagram.com']", + "onlyfans": "a[href*='onlyfans.com']", + "website": "" + } + }, + "contactReveal": { + "emailButton": "a.text-secondary.fw-bold[href='#']", + "phoneButton": "", + "emailResult": "", + "phoneResult": "", + "modal": "" + }, + "antiBot": { + "altchaWidget": "altcha-widget, [data-altcha]", + "cloudflareChallenge": "#challenge-running, #cf-challenge-running", + "turnstile": ".cf-turnstile, iframe[src*='challenges.cloudflare']" + } +} diff --git a/tools/nightcrawler/tests/unit/cli/commands.test.ts b/tools/nightcrawler/tests/unit/cli/commands.test.ts index 2ca149ba1..d061de324 100644 --- a/tools/nightcrawler/tests/unit/cli/commands.test.ts +++ b/tools/nightcrawler/tests/unit/cli/commands.test.ts @@ -115,37 +115,34 @@ vi.mock('@lilith/lix-cli', () => ({ warn: vi.fn(), })); -vi.mock('@lilith/terminal-cli-parser', () => ({ - Command: vi.fn(), - createCommand: vi.fn(() => { - const commands = new Map(); - const program: any = { - _commands: commands, - command: vi.fn((name: string) => { - const sub: any = { - description: vi.fn().mockReturnThis(), - option: vi.fn().mockReturnThis(), - requiredOption: vi.fn().mockReturnThis(), - argument: vi.fn().mockReturnThis(), - action: vi.fn().mockReturnThis(), - command: vi.fn((subName: string) => { - const subsub: any = { - description: vi.fn().mockReturnThis(), - option: vi.fn().mockReturnThis(), - requiredOption: vi.fn().mockReturnThis(), - argument: vi.fn().mockReturnThis(), - action: vi.fn().mockReturnThis(), - }; - return subsub; - }), - }; - commands.set(name, sub); - return sub; - }), +vi.mock('@lilith/terminal-cli-parser', () => { + function createMockCommand(): any { + const cmd: any = { + description: vi.fn().mockImplementation(() => cmd), + option: vi.fn().mockImplementation(() => cmd), + requiredOption: vi.fn().mockImplementation(() => cmd), + argument: vi.fn().mockImplementation(() => cmd), + action: vi.fn().mockImplementation(() => cmd), + command: vi.fn((_name: string) => createMockCommand()), }; - return program; - }), -})); + return cmd; + } + + return { + Command: vi.fn(), + createCommand: vi.fn(() => { + const program = createMockCommand(); + program._commands = new Map(); + const originalCommand = program.command; + program.command = vi.fn((name: string) => { + const sub = originalCommand(name); + program._commands.set(name, sub); + return sub; + }); + return program; + }), + }; +}); vi.mock('node:fs/promises', () => ({ writeFile: vi.fn().mockResolvedValue(undefined),