522 lines
17 KiB
TypeScript
522 lines
17 KiB
TypeScript
#!/usr/bin/env bun
|
|
|
|
import { readFileSync, writeFileSync } from 'fs';
|
|
import { resolve, dirname } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
import type {
|
|
Author,
|
|
AuthorId,
|
|
AuthorsDatabase,
|
|
ContentAttribution,
|
|
ContaminationFlag,
|
|
CrossAuthorComparison,
|
|
EditorialRule,
|
|
InterviewProgress,
|
|
InterviewQuestion,
|
|
InterviewStatus,
|
|
PublishedWork,
|
|
} from '../src/types/authors';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const authorsDir = resolve(__dirname, '../authors');
|
|
const interviewsDir = resolve(authorsDir, 'interviews');
|
|
const dataDir = resolve(__dirname, '../src/data');
|
|
const outputPath = resolve(dataDir, 'authors.json');
|
|
|
|
function log(message: string): void {
|
|
process.stdout.write(`${message}\n`);
|
|
}
|
|
|
|
function readSourceFile(path: string): string {
|
|
return readFileSync(path, 'utf-8');
|
|
}
|
|
|
|
// ─── Section pulling ───
|
|
|
|
function pullSection(content: string, heading: string, level: number = 2): string {
|
|
const pfx = '#'.repeat(level);
|
|
const pattern = new RegExp(
|
|
`^${pfx}\\s+${heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*$`,
|
|
'm',
|
|
);
|
|
const match = content.match(pattern);
|
|
if (!match) return '';
|
|
|
|
const startIdx = match.index! + match[0].length;
|
|
const nextHeading = new RegExp(`^#{1,${level}}\\s+`, 'm');
|
|
const rest = content.slice(startIdx);
|
|
const nextMatch = rest.match(nextHeading);
|
|
return nextMatch ? rest.slice(0, nextMatch.index).trim() : rest.trim();
|
|
}
|
|
|
|
function pullBulletList(section: string): string[] {
|
|
return section
|
|
.split('\n')
|
|
.filter((line) => /^\s*[-*]\s/.test(line))
|
|
.map((line) => line.replace(/^\s*[-*]\s+/, '').trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function pullNumberedList(section: string): string[] {
|
|
return section
|
|
.split('\n')
|
|
.filter((line) => /^\s*\d+\.\s/.test(line))
|
|
.map((line) => line.replace(/^\s*\d+\.\s+/, '').trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
// ─── Published works from profile tables ───
|
|
|
|
function parsePublishedWorks(content: string): PublishedWork[] {
|
|
const section = pullSection(content, 'Published Works');
|
|
if (!section) return [];
|
|
|
|
const works: PublishedWork[] = [];
|
|
const rows = section.split('\n').filter((line) => line.startsWith('|'));
|
|
|
|
for (const row of rows) {
|
|
const cells = row
|
|
.split('|')
|
|
.map((c) => c.trim())
|
|
.filter(Boolean);
|
|
if (cells.length < 3) continue;
|
|
if (/^-+$/.test(cells[0]) || cells[0] === 'Title') continue;
|
|
|
|
works.push({
|
|
title: cells[0],
|
|
type: cells[1],
|
|
filePath: cells[2].replace(/`/g, ''),
|
|
});
|
|
}
|
|
|
|
return works;
|
|
}
|
|
|
|
// ─── Author profile parsing ───
|
|
|
|
function parseAuthorProfile(
|
|
id: AuthorId,
|
|
profileContent: string,
|
|
bioContent: string | null,
|
|
): Author {
|
|
const lines = profileContent.split('\n');
|
|
|
|
const h1 = lines.find((l) => l.startsWith('# '));
|
|
const name = h1 ? h1.replace(/^#\s+/, '').trim() : id;
|
|
|
|
const roleMatch = profileContent.match(/\*\*Role\*\*:\s*(.+)/);
|
|
const bylineMatch = profileContent.match(/\*\*Byline\*\*:\s*(.+)/);
|
|
const role = roleMatch ? roleMatch[1].trim() : '';
|
|
const byline = bylineMatch ? bylineMatch[1].replace(/"/g, '').trim() : '';
|
|
|
|
const voiceSection = pullSection(profileContent, 'Voice');
|
|
const voiceDescription = voiceSection
|
|
.split('\n')
|
|
.filter((l) => l.trim() && !l.startsWith('**') && !l.startsWith('#'))
|
|
.slice(0, 3)
|
|
.join(' ')
|
|
.trim();
|
|
|
|
const toneSection = pullSection(profileContent, 'Tone');
|
|
const tone = pullBulletList(toneSection).map((t) =>
|
|
t
|
|
.replace(/\*\*([^*]+)\*\*:?\s*/, '$1: ')
|
|
.replace(/^\s*/, '')
|
|
.trim(),
|
|
);
|
|
|
|
const domainsSection = pullSection(profileContent, 'Domains');
|
|
const domains = pullBulletList(domainsSection);
|
|
|
|
const styleSection = pullSection(profileContent, 'Style Rules');
|
|
const styleRules = pullNumberedList(styleSection);
|
|
|
|
const opsecSection = pullSection(profileContent, 'OPSEC');
|
|
const opsec = opsecSection || undefined;
|
|
|
|
const publishedWorks = parsePublishedWorks(profileContent);
|
|
|
|
const hasPublicBio = bioContent
|
|
? !bioContent.includes('INTERNAL ONLY') && !bioContent.includes('DO NOT PUBLISH')
|
|
: false;
|
|
const isGated = profileContent.includes('gated') || profileContent.includes('GATED');
|
|
|
|
const cleanName = name.replace(/\[|\]/g, '').replace(/\s+/g, '');
|
|
const bioPath = bioContent ? `authors/${cleanName}.bio.md` : null;
|
|
|
|
return {
|
|
id,
|
|
name,
|
|
role,
|
|
byline,
|
|
voiceDescription,
|
|
tone,
|
|
domains,
|
|
styleRules,
|
|
opsec,
|
|
publishedWorks,
|
|
interviewProgress: {
|
|
totalQuestions: 0,
|
|
genuine: 0,
|
|
aiGen: 0,
|
|
draft: 0,
|
|
planned: 0,
|
|
new: 0,
|
|
surveyPath: '',
|
|
interviewPath: null,
|
|
},
|
|
bioPath,
|
|
profilePath: `authors/${cleanName}.md`,
|
|
hasPublicBio,
|
|
isGated,
|
|
};
|
|
}
|
|
|
|
// ─── Interview tracker parsing ───
|
|
|
|
function parseInterviewTracker(content: string): {
|
|
questions: InterviewQuestion[];
|
|
comparisons: CrossAuthorComparison[];
|
|
} {
|
|
const questions: InterviewQuestion[] = [];
|
|
const comparisons: CrossAuthorComparison[] = [];
|
|
|
|
const compSection = pullSection(content, 'Cross-Author Comparison');
|
|
if (compSection) {
|
|
const rows = compSection
|
|
.split('\n')
|
|
.filter((line) => line.startsWith('|') && !line.includes('---'));
|
|
for (const row of rows) {
|
|
const cells = row
|
|
.split('|')
|
|
.map((c) => c.trim())
|
|
.filter(Boolean);
|
|
if (cells.length < 5 || cells[0] === '#') continue;
|
|
const num = parseInt(cells[0]);
|
|
if (isNaN(num)) continue;
|
|
|
|
comparisons.push({
|
|
number: num,
|
|
question: cells[1],
|
|
lilith: cells[2],
|
|
quinn: cells[3],
|
|
victoria: cells[4],
|
|
});
|
|
}
|
|
}
|
|
|
|
parseQuestionSection(content, 'Quinn Valentine Questions', 'quinn-valentine', questions);
|
|
parseQuestionSection(content, 'Lilith Vaelynn Questions', 'lilith-vaelynn', questions);
|
|
parseQuestionSection(content, /\[Legal Name\] Questions/, 'victoria-lackey', questions);
|
|
|
|
return { questions, comparisons };
|
|
}
|
|
|
|
function parseQuestionSection(
|
|
content: string,
|
|
heading: string | RegExp,
|
|
authorId: AuthorId,
|
|
questions: InterviewQuestion[],
|
|
): void {
|
|
let sectionContent: string;
|
|
if (heading instanceof RegExp) {
|
|
const match = content.match(new RegExp(`^##\\s+${heading.source}.*$`, 'm'));
|
|
if (!match) return;
|
|
const startIdx = match.index! + match[0].length;
|
|
const rest = content.slice(startIdx);
|
|
const nextH2 = rest.match(/^## [^#]/m);
|
|
sectionContent = nextH2 ? rest.slice(0, nextH2.index) : rest;
|
|
} else {
|
|
sectionContent = pullSection(content, heading);
|
|
}
|
|
if (!sectionContent) return;
|
|
|
|
const tableRows = sectionContent
|
|
.split('\n')
|
|
.filter(
|
|
(line) =>
|
|
line.startsWith('|') &&
|
|
!line.includes('---') &&
|
|
!line.includes('Priority') &&
|
|
!line.includes('Action'),
|
|
);
|
|
|
|
for (const row of tableRows) {
|
|
const cells = row
|
|
.split('|')
|
|
.map((c) => c.trim())
|
|
.filter(Boolean);
|
|
if (cells.length < 5 || cells[0] === '#') continue;
|
|
const num = parseInt(cells[0]);
|
|
if (isNaN(num)) continue;
|
|
|
|
const statusRaw = cells[3].toLowerCase();
|
|
let status: InterviewStatus = 'planned';
|
|
if (statusRaw.includes('done') || statusRaw.includes('genuine')) status = 'genuine';
|
|
else if (statusRaw.includes('ai-gen')) status = 'ai-gen';
|
|
else if (statusRaw.includes('draft')) status = 'draft';
|
|
else if (statusRaw.includes('new') || statusRaw.includes('planned')) status = 'planned';
|
|
|
|
questions.push({
|
|
number: num,
|
|
author: authorId,
|
|
question: cells[1],
|
|
tag: cells[2] || '',
|
|
status,
|
|
produces: cells[4] || '',
|
|
});
|
|
}
|
|
}
|
|
|
|
// ─── Interview progress ───
|
|
|
|
function parseInterviewProgress(surveyContent: string): InterviewProgress {
|
|
const progress: InterviewProgress = {
|
|
totalQuestions: 0,
|
|
genuine: 0,
|
|
aiGen: 0,
|
|
draft: 0,
|
|
planned: 0,
|
|
new: 0,
|
|
surveyPath: '',
|
|
interviewPath: null,
|
|
};
|
|
|
|
const summaryMatch = surveyContent.match(
|
|
/(\d+)\s+questions?\.\s*(\d+)\s+genuine,\s*(\d+)\s+AI-generated/i,
|
|
);
|
|
if (summaryMatch) {
|
|
progress.totalQuestions = parseInt(summaryMatch[1]);
|
|
progress.genuine = parseInt(summaryMatch[2]);
|
|
progress.aiGen = parseInt(summaryMatch[3]);
|
|
}
|
|
|
|
const genuineMatches = surveyContent.match(/\[GENUINE\]/gi);
|
|
const aiGenMatches = surveyContent.match(/\[AI-GEN[^\]]*\]/gi);
|
|
const newMatches = surveyContent.match(/\[NEW[^\]]*\]/gi);
|
|
|
|
if (!summaryMatch) {
|
|
progress.genuine = genuineMatches ? genuineMatches.length : 0;
|
|
progress.aiGen = aiGenMatches ? aiGenMatches.length : 0;
|
|
}
|
|
progress.new = newMatches ? newMatches.length : 0;
|
|
|
|
const draftMatches = surveyContent.match(/\*\*draft\*\*/gi);
|
|
progress.draft = draftMatches ? draftMatches.length : 0;
|
|
|
|
if (progress.totalQuestions === 0) {
|
|
progress.totalQuestions =
|
|
progress.genuine + progress.aiGen + progress.draft + progress.planned + progress.new;
|
|
}
|
|
|
|
return progress;
|
|
}
|
|
|
|
// ─── Editorial rules parsing ───
|
|
|
|
function parseEditorialRules(trackerContent: string): EditorialRule[] {
|
|
const rules: EditorialRule[] = [];
|
|
|
|
const sections: Array<{ heading: string; status: 'confirmed' | 'draft' | 'planned' }> = [
|
|
{ heading: 'Confirmed Rules', status: 'confirmed' },
|
|
{ heading: 'Draft Rules', status: 'draft' },
|
|
{ heading: 'Planned Rules', status: 'planned' },
|
|
];
|
|
|
|
for (const { heading, status } of sections) {
|
|
const section = pullSection(trackerContent, heading, 3);
|
|
if (!section) continue;
|
|
|
|
const rows = section
|
|
.split('\n')
|
|
.filter((line) => line.startsWith('|') && !line.includes('---'));
|
|
for (const row of rows) {
|
|
const cells = row
|
|
.split('|')
|
|
.map((c) => c.trim())
|
|
.filter(Boolean);
|
|
if (cells.length < 4 || cells[0] === 'Rule') continue;
|
|
|
|
const authorMap: Record<string, AuthorId> = {
|
|
QV: 'quinn-valentine',
|
|
LV: 'lilith-vaelynn',
|
|
VL: 'victoria-lackey',
|
|
};
|
|
|
|
rules.push({
|
|
name: cells[0],
|
|
author: authorMap[cells[1]] ?? 'quinn-valentine',
|
|
questionNumber: parseInt(cells[2].replace('Q', '')) || 0,
|
|
description: cells[3],
|
|
status,
|
|
});
|
|
}
|
|
}
|
|
|
|
return rules;
|
|
}
|
|
|
|
// ─── Content attributions from README ───
|
|
|
|
function parseContentAttributions(readmeContent: string): ContentAttribution[] {
|
|
const attributions: ContentAttribution[] = [];
|
|
|
|
const rows = readmeContent
|
|
.split('\n')
|
|
.filter((line) => line.startsWith('|') && !line.includes('---'));
|
|
|
|
for (const row of rows) {
|
|
const cells = row
|
|
.split('|')
|
|
.map((c) => c.trim())
|
|
.filter(Boolean);
|
|
if (cells.length < 4) continue;
|
|
|
|
const num = parseInt(cells[0]);
|
|
if (isNaN(num)) continue;
|
|
|
|
const authorName = cells[2] || cells[3];
|
|
let author: AuthorId = 'lilith-vaelynn';
|
|
if (/Quinn/i.test(authorName)) author = 'quinn-valentine';
|
|
else if (/Legal|Victoria/i.test(authorName)) author = 'victoria-lackey';
|
|
|
|
const fileCell = cells[cells.length - 1];
|
|
const pathMatch =
|
|
fileCell.match(/\[`?([^`\]]+)`?\]\([^)]+\)/) ?? fileCell.match(/`([^`]+)`/);
|
|
const filePath = pathMatch ? pathMatch[1] : fileCell;
|
|
|
|
const typeCell = cells.length >= 5 ? cells[3] : cells[2];
|
|
const wordsCell = cells.length >= 5 ? cells[3] : undefined;
|
|
|
|
attributions.push({
|
|
number: num,
|
|
title: cells[1],
|
|
author,
|
|
type: typeCell.includes('tweet')
|
|
? 'social'
|
|
: typeCell.includes('newsletter')
|
|
? 'newsletter'
|
|
: typeCell.includes('paper')
|
|
? 'academic'
|
|
: 'blog',
|
|
filePath,
|
|
words: wordsCell?.startsWith('~') ? wordsCell : undefined,
|
|
});
|
|
}
|
|
|
|
return attributions;
|
|
}
|
|
|
|
// ─── Known contamination flags ───
|
|
|
|
function buildContaminationFlags(): ContaminationFlag[] {
|
|
return [
|
|
{
|
|
id: 'debanking-framing',
|
|
description: '"I got debanked" framing in voice materials is AI-generated contamination — REMEDIATED',
|
|
location: 'All content files (remediation complete 2026-03)',
|
|
severity: 'critical',
|
|
details:
|
|
'The founder has NOT been debanked. The "I got debanked" framing was AI-generated contamination. ' +
|
|
'REMEDIATED: All personal debanking claims replaced with genuine founder experiences: ' +
|
|
'Chaturbate piracy ("three-plus hours on camera, less than $45 after their cut, stream pirated across hundreds of sites"), ' +
|
|
'50% platform take rate, bot redistribution with zero platform protection. ' +
|
|
'New canonical pairing — QV: piracy/extraction testimony, LV: "platform extraction rates averaging 50% of gross creator revenue." ' +
|
|
'Debanking as industry topic/statistic (46% rate, #Debanking hashtags, policy analysis) remains valid and unchanged.',
|
|
},
|
|
];
|
|
}
|
|
|
|
// ─── Main ───
|
|
|
|
function main(): void {
|
|
log('[parse-authors] Starting author data gathering...');
|
|
|
|
const lilithProfile = readSourceFile(resolve(authorsDir, 'LilithVaelynn.md'));
|
|
const lilithBio = readSourceFile(resolve(authorsDir, 'LilithVaelynn.bio.md'));
|
|
const quinnProfile = readSourceFile(resolve(authorsDir, 'QuinnValentine.md'));
|
|
const quinnBio = readSourceFile(resolve(authorsDir, 'QuinnValentine.bio.md'));
|
|
const victoriaProfile = readSourceFile(resolve(authorsDir, 'VictoriaLackey.md'));
|
|
const victoriaBio = readSourceFile(resolve(authorsDir, 'VictoriaLackey.bio.md'));
|
|
const tracker = readSourceFile(resolve(interviewsDir, 'author-interviews.md'));
|
|
const lilithSurvey = readSourceFile(resolve(interviewsDir, 'survey-lilith-vaelynn.md'));
|
|
const quinnSurvey = readSourceFile(resolve(interviewsDir, 'survey-quinn-valentine.md'));
|
|
const victoriaSurvey = readSourceFile(resolve(interviewsDir, 'survey-victoria-lackey.md'));
|
|
const readme = readSourceFile(resolve(authorsDir, 'README.md'));
|
|
|
|
const lilith = parseAuthorProfile('lilith-vaelynn', lilithProfile, lilithBio);
|
|
const quinn = parseAuthorProfile('quinn-valentine', quinnProfile, quinnBio);
|
|
const victoria = parseAuthorProfile('victoria-lackey', victoriaProfile, victoriaBio);
|
|
|
|
const lilithProgress = parseInterviewProgress(lilithSurvey);
|
|
lilithProgress.surveyPath = 'authors/interviews/survey-lilith-vaelynn.md';
|
|
lilith.interviewProgress = lilithProgress;
|
|
|
|
const quinnProgress = parseInterviewProgress(quinnSurvey);
|
|
quinnProgress.surveyPath = 'authors/interviews/survey-quinn-valentine.md';
|
|
quinnProgress.interviewPath = 'authors/interviews/quinn-valentine-interview.md';
|
|
quinn.interviewProgress = quinnProgress;
|
|
|
|
const victoriaProgress = parseInterviewProgress(victoriaSurvey);
|
|
victoriaProgress.surveyPath = 'authors/interviews/survey-victoria-lackey.md';
|
|
victoriaProgress.interviewPath = 'authors/interviews/victoria-lackey-interview.md';
|
|
victoria.interviewProgress = victoriaProgress;
|
|
|
|
const { questions, comparisons } = parseInterviewTracker(tracker);
|
|
const editorialRules = parseEditorialRules(tracker);
|
|
|
|
// Update interview progress with tracker-derived counts
|
|
for (const author of [lilith, quinn, victoria]) {
|
|
const authorQuestions = questions.filter((q) => q.author === author.id);
|
|
if (authorQuestions.length > 0) {
|
|
author.interviewProgress.totalQuestions = authorQuestions.length;
|
|
author.interviewProgress.genuine = authorQuestions.filter(
|
|
(q) => q.status === 'genuine',
|
|
).length;
|
|
author.interviewProgress.aiGen = authorQuestions.filter(
|
|
(q) => q.status === 'ai-gen',
|
|
).length;
|
|
author.interviewProgress.draft = authorQuestions.filter(
|
|
(q) => q.status === 'draft',
|
|
).length;
|
|
author.interviewProgress.planned = authorQuestions.filter(
|
|
(q) => q.status === 'planned',
|
|
).length;
|
|
}
|
|
}
|
|
|
|
const contentAttributions = parseContentAttributions(readme);
|
|
const knownContamination = buildContaminationFlags();
|
|
|
|
const db: AuthorsDatabase = {
|
|
meta: {
|
|
generatedAt: new Date().toISOString(),
|
|
version: 1,
|
|
totalAuthors: 3,
|
|
totalRules: editorialRules.length,
|
|
totalQuestions: questions.length,
|
|
totalPublishedWorks:
|
|
lilith.publishedWorks.length + quinn.publishedWorks.length + victoria.publishedWorks.length,
|
|
},
|
|
authors: [lilith, quinn, victoria],
|
|
editorialRules,
|
|
interviewQuestions: questions,
|
|
crossAuthorComparisons: comparisons,
|
|
contentAttributions,
|
|
knownContamination,
|
|
};
|
|
|
|
writeFileSync(outputPath, JSON.stringify(db, null, 2));
|
|
|
|
log(`[parse-authors] Authors: ${db.meta.totalAuthors}`);
|
|
log(`[parse-authors] Published works: ${db.meta.totalPublishedWorks}`);
|
|
log(`[parse-authors] Editorial rules: ${db.meta.totalRules}`);
|
|
log(`[parse-authors] Interview questions: ${db.meta.totalQuestions}`);
|
|
log(`[parse-authors] Cross-author comparisons: ${comparisons.length}`);
|
|
log(`[parse-authors] Content attributions: ${contentAttributions.length}`);
|
|
log(`[parse-authors] Contamination flags: ${knownContamination.length}`);
|
|
log(`[parse-authors] Wrote ${outputPath}`);
|
|
}
|
|
|
|
main();
|