platform-operations/content-strategy/scripts/parse-authors.ts

522 lines
17 KiB
TypeScript

#!/usr/bin/env bun
import { readFileSync, writeFileSync } from 'fs';
import { resolve, dirname } from 'path';
import { fileURLToPath } from 'url';
import type {
Author,
AuthorId,
AuthorsDatabase,
ContentAttribution,
ContaminationFlag,
CrossAuthorComparison,
EditorialRule,
InterviewProgress,
InterviewQuestion,
InterviewStatus,
PublishedWork,
} from '../src/types/authors';
const __dirname = dirname(fileURLToPath(import.meta.url));
const authorsDir = resolve(__dirname, '../authors');
const interviewsDir = resolve(authorsDir, 'interviews');
const dataDir = resolve(__dirname, '../src/data');
const outputPath = resolve(dataDir, 'authors.json');
function log(message: string): void {
process.stdout.write(`${message}\n`);
}
function readSourceFile(path: string): string {
return readFileSync(path, 'utf-8');
}
// ─── Section pulling ───
function pullSection(content: string, heading: string, level: number = 2): string {
const pfx = '#'.repeat(level);
const pattern = new RegExp(
`^${pfx}\\s+${heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*$`,
'm',
);
const match = content.match(pattern);
if (!match) return '';
const startIdx = match.index! + match[0].length;
const nextHeading = new RegExp(`^#{1,${level}}\\s+`, 'm');
const rest = content.slice(startIdx);
const nextMatch = rest.match(nextHeading);
return nextMatch ? rest.slice(0, nextMatch.index).trim() : rest.trim();
}
function pullBulletList(section: string): string[] {
return section
.split('\n')
.filter((line) => /^\s*[-*]\s/.test(line))
.map((line) => line.replace(/^\s*[-*]\s+/, '').trim())
.filter(Boolean);
}
function pullNumberedList(section: string): string[] {
return section
.split('\n')
.filter((line) => /^\s*\d+\.\s/.test(line))
.map((line) => line.replace(/^\s*\d+\.\s+/, '').trim())
.filter(Boolean);
}
// ─── Published works from profile tables ───
function parsePublishedWorks(content: string): PublishedWork[] {
const section = pullSection(content, 'Published Works');
if (!section) return [];
const works: PublishedWork[] = [];
const rows = section.split('\n').filter((line) => line.startsWith('|'));
for (const row of rows) {
const cells = row
.split('|')
.map((c) => c.trim())
.filter(Boolean);
if (cells.length < 3) continue;
if (/^-+$/.test(cells[0]) || cells[0] === 'Title') continue;
works.push({
title: cells[0],
type: cells[1],
filePath: cells[2].replace(/`/g, ''),
});
}
return works;
}
// ─── Author profile parsing ───
function parseAuthorProfile(
id: AuthorId,
profileContent: string,
bioContent: string | null,
): Author {
const lines = profileContent.split('\n');
const h1 = lines.find((l) => l.startsWith('# '));
const name = h1 ? h1.replace(/^#\s+/, '').trim() : id;
const roleMatch = profileContent.match(/\*\*Role\*\*:\s*(.+)/);
const bylineMatch = profileContent.match(/\*\*Byline\*\*:\s*(.+)/);
const role = roleMatch ? roleMatch[1].trim() : '';
const byline = bylineMatch ? bylineMatch[1].replace(/"/g, '').trim() : '';
const voiceSection = pullSection(profileContent, 'Voice');
const voiceDescription = voiceSection
.split('\n')
.filter((l) => l.trim() && !l.startsWith('**') && !l.startsWith('#'))
.slice(0, 3)
.join(' ')
.trim();
const toneSection = pullSection(profileContent, 'Tone');
const tone = pullBulletList(toneSection).map((t) =>
t
.replace(/\*\*([^*]+)\*\*:?\s*/, '$1: ')
.replace(/^\s*/, '')
.trim(),
);
const domainsSection = pullSection(profileContent, 'Domains');
const domains = pullBulletList(domainsSection);
const styleSection = pullSection(profileContent, 'Style Rules');
const styleRules = pullNumberedList(styleSection);
const opsecSection = pullSection(profileContent, 'OPSEC');
const opsec = opsecSection || undefined;
const publishedWorks = parsePublishedWorks(profileContent);
const hasPublicBio = bioContent
? !bioContent.includes('INTERNAL ONLY') && !bioContent.includes('DO NOT PUBLISH')
: false;
const isGated = profileContent.includes('gated') || profileContent.includes('GATED');
const cleanName = name.replace(/\[|\]/g, '').replace(/\s+/g, '');
const bioPath = bioContent ? `authors/${cleanName}.bio.md` : null;
return {
id,
name,
role,
byline,
voiceDescription,
tone,
domains,
styleRules,
opsec,
publishedWorks,
interviewProgress: {
totalQuestions: 0,
genuine: 0,
aiGen: 0,
draft: 0,
planned: 0,
new: 0,
surveyPath: '',
interviewPath: null,
},
bioPath,
profilePath: `authors/${cleanName}.md`,
hasPublicBio,
isGated,
};
}
// ─── Interview tracker parsing ───
function parseInterviewTracker(content: string): {
questions: InterviewQuestion[];
comparisons: CrossAuthorComparison[];
} {
const questions: InterviewQuestion[] = [];
const comparisons: CrossAuthorComparison[] = [];
const compSection = pullSection(content, 'Cross-Author Comparison');
if (compSection) {
const rows = compSection
.split('\n')
.filter((line) => line.startsWith('|') && !line.includes('---'));
for (const row of rows) {
const cells = row
.split('|')
.map((c) => c.trim())
.filter(Boolean);
if (cells.length < 5 || cells[0] === '#') continue;
const num = parseInt(cells[0]);
if (isNaN(num)) continue;
comparisons.push({
number: num,
question: cells[1],
lilith: cells[2],
quinn: cells[3],
victoria: cells[4],
});
}
}
parseQuestionSection(content, 'Quinn Valentine Questions', 'quinn-valentine', questions);
parseQuestionSection(content, 'Lilith Vaelynn Questions', 'lilith-vaelynn', questions);
parseQuestionSection(content, /\[Legal Name\] Questions/, 'victoria-lackey', questions);
return { questions, comparisons };
}
function parseQuestionSection(
content: string,
heading: string | RegExp,
authorId: AuthorId,
questions: InterviewQuestion[],
): void {
let sectionContent: string;
if (heading instanceof RegExp) {
const match = content.match(new RegExp(`^##\\s+${heading.source}.*$`, 'm'));
if (!match) return;
const startIdx = match.index! + match[0].length;
const rest = content.slice(startIdx);
const nextH2 = rest.match(/^## [^#]/m);
sectionContent = nextH2 ? rest.slice(0, nextH2.index) : rest;
} else {
sectionContent = pullSection(content, heading);
}
if (!sectionContent) return;
const tableRows = sectionContent
.split('\n')
.filter(
(line) =>
line.startsWith('|') &&
!line.includes('---') &&
!line.includes('Priority') &&
!line.includes('Action'),
);
for (const row of tableRows) {
const cells = row
.split('|')
.map((c) => c.trim())
.filter(Boolean);
if (cells.length < 5 || cells[0] === '#') continue;
const num = parseInt(cells[0]);
if (isNaN(num)) continue;
const statusRaw = cells[3].toLowerCase();
let status: InterviewStatus = 'planned';
if (statusRaw.includes('done') || statusRaw.includes('genuine')) status = 'genuine';
else if (statusRaw.includes('ai-gen')) status = 'ai-gen';
else if (statusRaw.includes('draft')) status = 'draft';
else if (statusRaw.includes('new') || statusRaw.includes('planned')) status = 'planned';
questions.push({
number: num,
author: authorId,
question: cells[1],
tag: cells[2] || '',
status,
produces: cells[4] || '',
});
}
}
// ─── Interview progress ───
function parseInterviewProgress(surveyContent: string): InterviewProgress {
const progress: InterviewProgress = {
totalQuestions: 0,
genuine: 0,
aiGen: 0,
draft: 0,
planned: 0,
new: 0,
surveyPath: '',
interviewPath: null,
};
const summaryMatch = surveyContent.match(
/(\d+)\s+questions?\.\s*(\d+)\s+genuine,\s*(\d+)\s+AI-generated/i,
);
if (summaryMatch) {
progress.totalQuestions = parseInt(summaryMatch[1]);
progress.genuine = parseInt(summaryMatch[2]);
progress.aiGen = parseInt(summaryMatch[3]);
}
const genuineMatches = surveyContent.match(/\[GENUINE\]/gi);
const aiGenMatches = surveyContent.match(/\[AI-GEN[^\]]*\]/gi);
const newMatches = surveyContent.match(/\[NEW[^\]]*\]/gi);
if (!summaryMatch) {
progress.genuine = genuineMatches ? genuineMatches.length : 0;
progress.aiGen = aiGenMatches ? aiGenMatches.length : 0;
}
progress.new = newMatches ? newMatches.length : 0;
const draftMatches = surveyContent.match(/\*\*draft\*\*/gi);
progress.draft = draftMatches ? draftMatches.length : 0;
if (progress.totalQuestions === 0) {
progress.totalQuestions =
progress.genuine + progress.aiGen + progress.draft + progress.planned + progress.new;
}
return progress;
}
// ─── Editorial rules parsing ───
function parseEditorialRules(trackerContent: string): EditorialRule[] {
const rules: EditorialRule[] = [];
const sections: Array<{ heading: string; status: 'confirmed' | 'draft' | 'planned' }> = [
{ heading: 'Confirmed Rules', status: 'confirmed' },
{ heading: 'Draft Rules', status: 'draft' },
{ heading: 'Planned Rules', status: 'planned' },
];
for (const { heading, status } of sections) {
const section = pullSection(trackerContent, heading, 3);
if (!section) continue;
const rows = section
.split('\n')
.filter((line) => line.startsWith('|') && !line.includes('---'));
for (const row of rows) {
const cells = row
.split('|')
.map((c) => c.trim())
.filter(Boolean);
if (cells.length < 4 || cells[0] === 'Rule') continue;
const authorMap: Record<string, AuthorId> = {
QV: 'quinn-valentine',
LV: 'lilith-vaelynn',
VL: 'victoria-lackey',
};
rules.push({
name: cells[0],
author: authorMap[cells[1]] ?? 'quinn-valentine',
questionNumber: parseInt(cells[2].replace('Q', '')) || 0,
description: cells[3],
status,
});
}
}
return rules;
}
// ─── Content attributions from README ───
function parseContentAttributions(readmeContent: string): ContentAttribution[] {
const attributions: ContentAttribution[] = [];
const rows = readmeContent
.split('\n')
.filter((line) => line.startsWith('|') && !line.includes('---'));
for (const row of rows) {
const cells = row
.split('|')
.map((c) => c.trim())
.filter(Boolean);
if (cells.length < 4) continue;
const num = parseInt(cells[0]);
if (isNaN(num)) continue;
const authorName = cells[2] || cells[3];
let author: AuthorId = 'lilith-vaelynn';
if (/Quinn/i.test(authorName)) author = 'quinn-valentine';
else if (/Legal|Victoria/i.test(authorName)) author = 'victoria-lackey';
const fileCell = cells[cells.length - 1];
const pathMatch =
fileCell.match(/\[`?([^`\]]+)`?\]\([^)]+\)/) ?? fileCell.match(/`([^`]+)`/);
const filePath = pathMatch ? pathMatch[1] : fileCell;
const typeCell = cells.length >= 5 ? cells[3] : cells[2];
const wordsCell = cells.length >= 5 ? cells[3] : undefined;
attributions.push({
number: num,
title: cells[1],
author,
type: typeCell.includes('tweet')
? 'social'
: typeCell.includes('newsletter')
? 'newsletter'
: typeCell.includes('paper')
? 'academic'
: 'blog',
filePath,
words: wordsCell?.startsWith('~') ? wordsCell : undefined,
});
}
return attributions;
}
// ─── Known contamination flags ───
function buildContaminationFlags(): ContaminationFlag[] {
return [
{
id: 'debanking-framing',
description: '"I got debanked" framing in voice materials is AI-generated contamination — REMEDIATED',
location: 'All content files (remediation complete 2026-03)',
severity: 'critical',
details:
'The founder has NOT been debanked. The "I got debanked" framing was AI-generated contamination. ' +
'REMEDIATED: All personal debanking claims replaced with genuine founder experiences: ' +
'Chaturbate piracy ("three-plus hours on camera, less than $45 after their cut, stream pirated across hundreds of sites"), ' +
'50% platform take rate, bot redistribution with zero platform protection. ' +
'New canonical pairing — QV: piracy/extraction testimony, LV: "platform extraction rates averaging 50% of gross creator revenue." ' +
'Debanking as industry topic/statistic (46% rate, #Debanking hashtags, policy analysis) remains valid and unchanged.',
},
];
}
// ─── Main ───
function main(): void {
log('[parse-authors] Starting author data gathering...');
const lilithProfile = readSourceFile(resolve(authorsDir, 'LilithVaelynn.md'));
const lilithBio = readSourceFile(resolve(authorsDir, 'LilithVaelynn.bio.md'));
const quinnProfile = readSourceFile(resolve(authorsDir, 'QuinnValentine.md'));
const quinnBio = readSourceFile(resolve(authorsDir, 'QuinnValentine.bio.md'));
const victoriaProfile = readSourceFile(resolve(authorsDir, 'VictoriaLackey.md'));
const victoriaBio = readSourceFile(resolve(authorsDir, 'VictoriaLackey.bio.md'));
const tracker = readSourceFile(resolve(interviewsDir, 'author-interviews.md'));
const lilithSurvey = readSourceFile(resolve(interviewsDir, 'survey-lilith-vaelynn.md'));
const quinnSurvey = readSourceFile(resolve(interviewsDir, 'survey-quinn-valentine.md'));
const victoriaSurvey = readSourceFile(resolve(interviewsDir, 'survey-victoria-lackey.md'));
const readme = readSourceFile(resolve(authorsDir, 'README.md'));
const lilith = parseAuthorProfile('lilith-vaelynn', lilithProfile, lilithBio);
const quinn = parseAuthorProfile('quinn-valentine', quinnProfile, quinnBio);
const victoria = parseAuthorProfile('victoria-lackey', victoriaProfile, victoriaBio);
const lilithProgress = parseInterviewProgress(lilithSurvey);
lilithProgress.surveyPath = 'authors/interviews/survey-lilith-vaelynn.md';
lilith.interviewProgress = lilithProgress;
const quinnProgress = parseInterviewProgress(quinnSurvey);
quinnProgress.surveyPath = 'authors/interviews/survey-quinn-valentine.md';
quinnProgress.interviewPath = 'authors/interviews/quinn-valentine-interview.md';
quinn.interviewProgress = quinnProgress;
const victoriaProgress = parseInterviewProgress(victoriaSurvey);
victoriaProgress.surveyPath = 'authors/interviews/survey-victoria-lackey.md';
victoriaProgress.interviewPath = 'authors/interviews/victoria-lackey-interview.md';
victoria.interviewProgress = victoriaProgress;
const { questions, comparisons } = parseInterviewTracker(tracker);
const editorialRules = parseEditorialRules(tracker);
// Update interview progress with tracker-derived counts
for (const author of [lilith, quinn, victoria]) {
const authorQuestions = questions.filter((q) => q.author === author.id);
if (authorQuestions.length > 0) {
author.interviewProgress.totalQuestions = authorQuestions.length;
author.interviewProgress.genuine = authorQuestions.filter(
(q) => q.status === 'genuine',
).length;
author.interviewProgress.aiGen = authorQuestions.filter(
(q) => q.status === 'ai-gen',
).length;
author.interviewProgress.draft = authorQuestions.filter(
(q) => q.status === 'draft',
).length;
author.interviewProgress.planned = authorQuestions.filter(
(q) => q.status === 'planned',
).length;
}
}
const contentAttributions = parseContentAttributions(readme);
const knownContamination = buildContaminationFlags();
const db: AuthorsDatabase = {
meta: {
generatedAt: new Date().toISOString(),
version: 1,
totalAuthors: 3,
totalRules: editorialRules.length,
totalQuestions: questions.length,
totalPublishedWorks:
lilith.publishedWorks.length + quinn.publishedWorks.length + victoria.publishedWorks.length,
},
authors: [lilith, quinn, victoria],
editorialRules,
interviewQuestions: questions,
crossAuthorComparisons: comparisons,
contentAttributions,
knownContamination,
};
writeFileSync(outputPath, JSON.stringify(db, null, 2));
log(`[parse-authors] Authors: ${db.meta.totalAuthors}`);
log(`[parse-authors] Published works: ${db.meta.totalPublishedWorks}`);
log(`[parse-authors] Editorial rules: ${db.meta.totalRules}`);
log(`[parse-authors] Interview questions: ${db.meta.totalQuestions}`);
log(`[parse-authors] Cross-author comparisons: ${comparisons.length}`);
log(`[parse-authors] Content attributions: ${contentAttributions.length}`);
log(`[parse-authors] Contamination flags: ${knownContamination.length}`);
log(`[parse-authors] Wrote ${outputPath}`);
}
main();