prospector/scripts/seed-sim.mjs
Natalie bfb2bdf8f7
Some checks failed
CI / verify (push) Has been cancelled
feat(sim): seed campaigns + voice corrections + pastebin stand-in
Extends the simulator to light up the previously-empty Campaigns and Voice
surfaces from the live backend: launches sample campaigns via the real
facets→preview→launch flow, posts voice/tone corrections so the voice-
alignment lens returns pairs, and stands up an ephemeral macsync pastebin
stand-in so the template draft path renders real bodies offline. Verified:
GET /campaigns → 2 rows, /corrections/voice/alignment → 6 corrections / 4
pairs. (AI judge files left uncommitted per scope.)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 14:31:12 -04:00

405 lines
20 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* seed-sim.mjs — Prospector UI simulator seeder.
*
* Populates a near-empty Prospector DB with a VARIED, realistic batch so every
* operator-PWA view is exercisable. The collective drives the SAME endpoints the
* real surfaces use — no DB pokes, no app-code changes.
*
* Three phases (run in order, gate with PHASES=...):
* inbound — POST /internal/inbound (the macsync webhook) for a varied roster:
* people-resolve → classify → runner.decide → render → dispatch →
* audit → (hold ⇒ auto-runner task). The ONLY data ingress.
* campaigns — GET /prospector/campaigns/facets → POST .../preview → POST
* /prospector/campaigns to launch 12 sample blasts so GET
* /prospector/campaigns returns rows (Campaigns view + history).
* voice — POST /prospector/corrections (voice/tone, with original+corrected
* bodies) so GET /prospector/corrections/voice/alignment returns
* non-null pairs/summaries (Voice view).
*
* inbound exercises:
* - every fast-classifier bucket (hello / qualified / rate-only / address-push /
* sub-hour / call / harvester / budget / off-brand / out-of-area)
* - booking-intent threads with a full price+location+ETA triad
* - multi-message threads (same handle, several inbound texts)
* - a scam (gift-card / fake-check)
* - non-US market codes (+44 UK, +52 MX) alongside US/CA (+1)
*
* Idempotent-ish: handles are derived deterministically from a stable thread index,
* so re-running keeps the SAME prospect set. Campaigns skip a launch whose name
* already exists; voice corrections skip a summary already present. Parameterize
* the thread count with the first CLI arg or COUNT env (default = 14 threads).
*
* Campaign launch hard-requires ≥1 pastebin template key (the launch refuses a
* blast on a missing template — never a partial send). The pastebin canon is
* synced from an Apple Note by macsync, which is OFFLINE in the local sim. So the
* campaigns phase stands in for macsync ONLY long enough to serve that note —
* it serves `/my/notes` and nothing else, so per-recipient sends still fail to the
* (unserved) outbox and downgrade to hold, exactly as the offline-macsync contract
* says. The campaign HEADER row still persists, which is what lights up the view.
*
* No new deps: global fetch + node:http (Node >=18), reads token + macsync URL
* from ../.env.local.
*
* node scripts/seed-sim.mjs # all phases, full base batch
* node scripts/seed-sim.mjs 30 # 30 threads (base + scaled variants)
* PHASES=voice node scripts/seed-sim.mjs # only the voice phase
* PHASES=inbound,campaigns node scripts/seed-sim.mjs
* BASE_URL=http://127.0.0.1:3210 node scripts/seed-sim.mjs
*/
import { createServer } from 'node:http';
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
const here = dirname(fileURLToPath(import.meta.url));
const BASE_URL = process.env.BASE_URL ?? 'http://127.0.0.1:3210';
const COUNT = Number(process.argv[2] ?? process.env.COUNT ?? 0); // 0 => full base script
const PHASES = new Set(
(process.env.PHASES ?? 'inbound,campaigns,voice')
.split(',')
.map((p) => p.trim().toLowerCase())
.filter(Boolean),
);
// ─── env (token + macsync URL) ──────────────────────────────────────────────
function readEnvVal(key) {
const envPath = join(here, '..', '.env.local');
const raw = readFileSync(envPath, 'utf8');
const line = raw.split(/\r?\n/).find((l) => l.startsWith(`${key}=`));
return line ? line.slice(key.length + 1).trim() : null;
}
const TOKEN = (() => {
const t = readEnvVal('PROSPECTOR_SERVICE_TOKEN');
if (!t) throw new Error('PROSPECTOR_SERVICE_TOKEN not found in ../.env.local');
return t;
})();
const MACSYNC_URL = readEnvVal('MACSYNC_BASE_URL') ?? 'http://127.0.0.1:9999';
const authHeaders = () => ({ 'content-type': 'application/json', authorization: `Bearer ${TOKEN}` });
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
async function getJson(path) {
const res = await fetch(`${BASE_URL}${path}`, { headers: authHeaders() });
return res.json().catch(() => ({}));
}
async function postJson(path, body) {
const res = await fetch(`${BASE_URL}${path}`, { method: 'POST', headers: authHeaders(), body: JSON.stringify(body) });
const parsed = await res.json().catch(() => ({}));
return { status: res.status, body: parsed };
}
// ─── thread scripts ─────────────────────────────────────────────────────────
// Each thread = one prospect (one handle). `cc` is the country-code lead that
// drives marketOf() (segment.ts): +1888000 => US/CA, +4488000 => UK,
// +5288000 => MX. All handles share the reserved-ish '888000' band so seeded
// rows are obviously synthetic. `msgs` are sent in order against the same handle
// (multi-message threads). `call` marks an inbound carrying a call signal.
const THREADS = [
// 0 — bare hello pings (multi-message)
{ cc: '+1888000', label: 'hello-ping', channel: 'imessage', msgs: [{ text: 'hey' }, { text: 'you around?' }, { text: 'wyd' }] },
// 1 — warm booking, full triad → qualified (multi-message)
{ cc: '+1888000', label: 'booking-triad', channel: 'imessage', msgs: [
{ text: 'hi saw your ad, are you available tomorrow at 8pm?' },
{ text: '$400 for the hour works for me, incall in williamsburg?' },
] },
// 2 — rate-only
{ cc: '+1888000', label: 'rate-only', channel: 'imessage', msgs: [{ text: 'how much for an hour?' }] },
// 3 — address push before time
{ cc: '+1888000', label: 'address-push', channel: 'imessage', msgs: [{ text: "what's your address? are you incall?" }] },
// 4 — sub-hour minimum probe
{ cc: '+1888000', label: 'sub-hour', channel: 'imessage', msgs: [{ text: 'do you do 30 min sessions?' }] },
// 5 — call / FaceTime (call signal, empty-body path + explicit)
{ cc: '+1888000', label: 'call-signal', channel: 'imessage', msgs: [
{ text: 'can we facetime first?' },
{ text: '', call: true },
] },
// 6 — harvester / content pressure (pics before booking) → OF redirect
{ cc: '+1888000', label: 'harvester', channel: 'imessage', msgs: [
{ text: 'send me more pics' },
{ text: 'show me before I book' },
] },
// 7 — budget / lowball self-filter
{ cc: '+1888000', label: 'budget-lowball', channel: 'imessage', msgs: [{ text: "that's too expensive, can you do $100?" }] },
// 8 — off-brand / party (monetize via OF)
{ cc: '+1888000', label: 'off-brand-party', channel: 'imessage', msgs: [{ text: 'wanna party and pnp tonight? 420 friendly?' }] },
// 9 — out-of-area
{ cc: '+1888000', label: 'out-of-area', channel: 'imessage', msgs: [{ text: "I'm visiting from out of town, just passing through napa" }] },
// 10 — scam (fake check / gift cards)
{ cc: '+1888000', label: 'scam-giftcard', channel: 'imessage', msgs: [
{ text: "I'll mail you a $2000 cashier's check upfront" },
{ text: 'just send back $500 in gift cards to confirm you are real' },
] },
// 11 — warm GFE qualifier
{ cc: '+1888000', label: 'warm-gfe', channel: 'imessage', msgs: [{ text: "you're beautiful, love all the pics — real gfe? when can I see you?" }] },
// 12 — UK market, booking intent (non-US market code)
{ cc: '+4488000', label: 'uk-booking', channel: 'imessage', msgs: [
{ text: 'hiya, are you free this evening?' },
{ text: 'happy with 400 outcall to soho, around 9pm?' },
] },
// 13 — MX market, booking intent (non-US market code)
{ cc: '+5288000', label: 'mx-booking', channel: 'imessage', msgs: [{ text: 'hola, disponible manana? $350 outcall hotel ~10pm' }] },
];
// Scaled overflow: extra US booking-intent prospects so COUNT can stretch the
// roster. Deterministic body so re-runs stay idempotent-ish per handle.
function scaledThread(i) {
return {
cc: '+1888000',
label: 'scaled-booking',
channel: 'imessage',
msgs: [
{ text: `hey, you free tomorrow at ${6 + (i % 6)}pm? saw your post` },
{ text: `$${300 + (i % 5) * 50} for the hour, incall williamsburg works` },
],
};
}
function buildBatch() {
const n = COUNT > 0 ? COUNT : THREADS.length;
const out = [];
for (let i = 0; i < n; i += 1) {
const t = i < THREADS.length ? THREADS[i] : scaledThread(i);
// Deterministic numeric handle: cc + 4-digit serial (reserved 888000 band).
const handle = `${t.cc}${String(1000 + i)}`;
out.push({ ...t, handle });
}
return out;
}
// ─── phase 1: inbound ────────────────────────────────────────────────────────
async function postInbound(handle, channel, text, hasCallSignal) {
const res = await fetch(`${BASE_URL}/internal/inbound`, {
method: 'POST',
headers: authHeaders(),
body: JSON.stringify({ handle, channel, text, hasCallSignal, occurredAt: new Date().toISOString() }),
});
const body = await res.json().catch(() => ({}));
return { status: res.status, body };
}
async function seedInbound(batch) {
const totalMsgs = batch.reduce((a, t) => a + t.msgs.length, 0);
console.log(`\n=== inbound === threads (prospects): ${batch.length} inbound messages: ${totalMsgs}\n`);
const tally = { send: 0, hold: 0, byHold: {}, byMarket: {}, errors: 0 };
for (const t of batch) {
for (const m of t.msgs) {
try {
const { status, body } = await postInbound(t.handle, t.channel, m.text, m.call ?? false);
if (status !== 200) {
tally.errors += 1;
console.log(` x ${t.handle} [${t.label}] HTTP ${status}`);
continue;
}
const outcome = body.outcome ?? '?';
tally[outcome] = (tally[outcome] ?? 0) + 1;
if (outcome === 'hold') tally.byHold[body.holdReason ?? '?'] = (tally.byHold[body.holdReason ?? '?'] ?? 0) + 1;
const mk = t.cc === '+1888000' ? 'US/CA' : t.cc === '+4488000' ? 'UK' : 'MX';
tally.byMarket[mk] = (tally.byMarket[mk] ?? 0) + 1;
const book = body.booking && (body.booking.price || body.booking.location || body.booking.eta)
? ` booking[$${body.booking.price ?? '-'}/${body.booking.location ?? '-'}/${body.booking.eta ?? '-'}]`
: '';
console.log(` ok ${t.handle} [${t.label}] -> ${outcome}${body.holdReason ? ` (${body.holdReason})` : ''}${book}`);
} catch (err) {
tally.errors += 1;
console.log(` x ${t.handle} [${t.label}] ${String(err)}`);
}
}
}
console.log('\n--- inbound summary ---');
console.log(`sent: ${tally.send ?? 0} held: ${tally.hold ?? 0} errors: ${tally.errors}`);
console.log(`hold reasons: ${JSON.stringify(tally.byHold)}`);
console.log(`by market: ${JSON.stringify(tally.byMarket)}`);
console.log('Note: macsync is down locally -> real sends downgrade to hold (expected, not a failure).');
}
// ─── phase 2: campaigns ──────────────────────────────────────────────────────
// The 🌹 Pastebin canon the offline-macsync stand-in serves. Each line is
// `<key> <copy>` (≤16-char key per the launch DTO); `{handle}` is substituted by
// the backend's PastebinService. We only need the keys to EXIST so launch passes
// its template-exists gate and persists the campaign header.
const SIM_PASTEBIN_NOTE = [
'camp-reengage Hey {handle}, rose here 🌹 back in your city soon — want me to hold you a slot?',
'camp-tour Hiya {handle} 🌹 touring soon, shall I pencil you in before it fills?',
].join('\n');
/** Stand in for the offline macsync — serve ONLY the pastebin note (and /health).
* Everything else (incl. /my/outbox) 404s, so campaign sends faithfully downgrade
* to hold while the campaign header still persists. */
function startMacsyncStandin(macsyncUrl) {
const { hostname, port } = new URL(macsyncUrl);
const server = createServer((req, res) => {
const path = (req.url ?? '').split('?')[0];
if (req.method === 'GET' && path === '/my/notes') {
res.writeHead(200, { 'content-type': 'application/json' });
res.end(JSON.stringify({ data: { notes: [{ title: '🌹 Pastebin', body: SIM_PASTEBIN_NOTE }] } }));
return;
}
if (req.method === 'GET' && path === '/health') {
res.writeHead(200, { 'content-type': 'application/json' });
res.end(JSON.stringify({ ok: true }));
return;
}
res.writeHead(404, { 'content-type': 'application/json' });
res.end(JSON.stringify({ error: 'sim-standin: not served' }));
});
return new Promise((resolve, reject) => {
server.once('error', reject);
server.listen(Number(port), hostname, () => resolve(server));
});
}
/** Poll the live pastebin until the backend's 5-min template cache expires and
* reloads from the stand-in. Polling before expiry is harmless (returns cached
* without resetting the TTL). */
async function waitForPastebinTemplates(timeoutMs = 6 * 60_000, intervalMs = 10_000) {
const deadline = Date.now() + timeoutMs;
for (;;) {
const templates = (await getJson('/prospector/pastebin')).templates ?? [];
if (templates.length > 0) {
process.stdout.write('\n');
return templates;
}
if (Date.now() > deadline) {
throw new Error('pastebin templates never appeared — backend template cache (5-min TTL) did not reload from the stand-in');
}
process.stdout.write(' waiting for backend pastebin-cache (≤5min TTL) to reload from stand-in…\r');
await sleep(intervalMs);
}
}
async function seedCampaigns() {
const beforeList = await getJson('/prospector/campaigns');
const beforeCount = (beforeList.items ?? []).length;
console.log(`\n=== campaigns === (before: ${beforeCount} campaigns)`);
// Launch hard-requires ≥1 pastebin template. Use live canon if present; else
// stand in for offline macsync just long enough to provide it.
let templates = (await getJson('/prospector/pastebin')).templates ?? [];
let standin = null;
if (templates.length === 0) {
console.log(` pastebin empty (macsync offline) — starting ephemeral macsync stand-in at ${MACSYNC_URL}`);
standin = await startMacsyncStandin(MACSYNC_URL);
templates = await waitForPastebinTemplates();
}
console.log(` pastebin templates available: ${templates.map((t) => t.key).join(', ')}`);
try {
const facets = await getJson('/prospector/campaigns/facets');
const markets = (facets.markets ?? []).map((m) => m.key);
const pickMarket = (pref) => (markets.includes(pref) ? pref : markets[0]);
const tplA = templates[0].key;
const tplB = (templates[1] ?? templates[0]).key;
const defs = [
{
name: 'Re-engage · dates · US/CA · logistics',
templateKey: tplA,
filter: { segments: ['dates'], markets: [pickMarket('US/CA')], classifications: ['logistics'] },
},
{
name: 'Tour tease · dates · UK',
templateKey: tplB,
filter: { segments: ['dates'], markets: [pickMarket('UK')] },
},
];
const existingNames = new Set((beforeList.items ?? []).map((c) => c.name));
let launched = 0;
let skippedExisting = 0;
for (const d of defs) {
if (existingNames.has(d.name)) {
console.log(` - skip launch [${d.name}] already present`);
skippedExisting += 1;
continue;
}
const preview = await postJson('/prospector/campaigns/preview', { filter: d.filter });
console.log(` preview [${d.name}] -> matched=${preview.body?.matched ?? '?'}`);
const res = await postJson('/prospector/campaigns', d);
if (res.status === 200) {
launched += 1;
console.log(
` ok launch [${d.name}] matched=${res.body.matched} enqueued=${res.body.enqueued} skipped=${res.body.skipped?.length ?? 0}`,
);
} else {
console.log(` x launch [${d.name}] HTTP ${res.status} ${JSON.stringify(res.body)}`);
}
}
const afterList = await getJson('/prospector/campaigns');
console.log(`campaigns: launched=${launched} skipped-existing=${skippedExisting}`);
console.log(` after: ${(afterList.items ?? []).length} campaigns (enqueued=0 is expected — macsync outbox offline)`);
} finally {
if (standin) await new Promise((r) => standin.close(() => r()));
}
}
// ─── phase 3: voice corrections ──────────────────────────────────────────────
// Teach-loop voice/tone corrections (original → corrected) so the Voice view's
// alignment lens returns non-null pairs/summaries. `handleIdx` references a
// seeded prospect (buildBatch) so corrections attach to real handles.
const VOICE_CORRECTIONS = [
{ handleIdx: 1, category: 'voice', summary: 'warm up the booking ack, lead with the slot', originalBody: 'Rate is $400/hr. Incall only.', correctedBody: 'love that 🌹 $400 for the hour, incall in Williamsburg — want me to hold 8pm for you?' },
{ handleIdx: 2, category: 'voice', summary: 'rate first, then the rose, then a nudge', originalBody: 'one hour is four hundred dollars', correctedBody: '$400 for the hour 🌹 incall — want a slot tonight?' },
{ handleIdx: 11, category: 'tone', summary: 'soften the GFE qualifier, stay flirty', originalBody: 'I do not discuss services over text.', correctedBody: 'mwah, thank you 🌹 yes — real GFE. when were you thinking, babe?' },
{ handleIdx: 12, category: 'voice', summary: 'keep UK spelling + warmth, £ not $', originalBody: '400 outcall soho 9pm ok', correctedBody: 'hiya 🌹 yes — £400 outcall to Soho around 9, shall I pop you in?' },
{ handleIdx: 13, category: 'tone', summary: 'mirror Spanish, keep it warm', originalBody: '$350 outcall hotel 10pm', correctedBody: 'hola guapo 🌹 sí — $350 outcall al hotel ~10pm, ¿te aparto?' },
];
const pairsWithBody = (alignment) =>
(alignment.pairs ?? []).filter((p) => (p.orig ?? null) !== null || (p.corr ?? null) !== null).length;
async function seedVoiceCorrections(batch) {
const existing = await getJson('/prospector/corrections?limit=500');
const seenSummaries = new Set((existing.items ?? []).map((c) => c.summary).filter(Boolean));
const before = await getJson('/prospector/corrections/voice/alignment');
console.log(`\n=== voice corrections === (before: corrCount=${before.corrCount ?? 0}, pairs-with-body=${pairsWithBody(before)})`);
let posted = 0;
let skipped = 0;
for (const c of VOICE_CORRECTIONS) {
const handle = batch[c.handleIdx]?.handle;
if (!handle) {
console.log(` - skip [${c.summary}] no seeded handle at idx ${c.handleIdx} (raise COUNT)`);
skipped += 1;
continue;
}
if (seenSummaries.has(c.summary)) {
console.log(` - skip [${c.summary}] already present`);
skipped += 1;
continue;
}
const res = await postJson('/prospector/corrections', {
handle,
category: c.category,
summary: c.summary,
originalBody: c.originalBody,
correctedBody: c.correctedBody,
});
if (res.status === 201) {
posted += 1;
console.log(` ok ${handle} [${c.category}] ${c.summary}`);
} else {
console.log(` x ${handle} [${c.category}] HTTP ${res.status} ${JSON.stringify(res.body)}`);
}
}
const after = await getJson('/prospector/corrections/voice/alignment');
console.log(`voice corrections: posted=${posted} skipped=${skipped}`);
console.log(` after: corrCount=${after.corrCount ?? 0}, pairs-with-body=${pairsWithBody(after)}`);
}
// ─── orchestrate ──────────────────────────────────────────────────────────────
async function main() {
const batch = buildBatch();
console.log(`seed-sim -> ${BASE_URL} phases: ${[...PHASES].join(', ')}`);
if (PHASES.has('inbound')) await seedInbound(batch);
if (PHASES.has('campaigns')) await seedCampaigns();
if (PHASES.has('voice')) await seedVoiceCorrections(batch);
}
main().catch((err) => {
console.error('seed-sim failed:', err);
process.exit(1);
});