text-processing-content-fla.../src/types.ts

152 lines
3.6 KiB
TypeScript

/**
* Content Flagging Types
*
* Real-time content analysis for browser-side flag scoring.
* Enables users to see content flags before submission.
*/
/**
* Categories of content flags
*/
export type FlagCategory =
| 'profanity'
| 'hate_speech'
| 'spam'
| 'contact_info'
| 'solicitation'
| 'threats'
| 'adult_content'
| 'scam_patterns'
| 'coded_language'
| 'predatory_behavior'
| 'trafficking_signals'
| 'doxxing'
| 'law_enforcement'
/**
* Severity levels for flags
*/
export type FlagSeverity = 'low' | 'medium' | 'high' | 'critical'
/**
* Individual flag detected in content
*/
export interface ContentFlag {
/** Category of the flag */
category: FlagCategory
/** Severity level */
severity: FlagSeverity
/** Score contribution (0-100) */
score: number
/** Matched pattern or phrase */
match: string
/** Position in text (character offset) */
offset: number
/** Length of matched content */
length: number
/** Human-readable explanation */
reason: string
}
/**
* Aggregated result from content analysis
*/
export interface ContentFlagResult {
/** Overall flag score (0-100, higher = more flags) */
score: number
/** Whether content passes threshold */
passes: boolean
/** Threshold used for pass/fail */
threshold: number
/** Individual flags detected */
flags: ContentFlag[]
/** Breakdown by category */
categoryScores: Record<FlagCategory, number>
/** Processing time in ms */
processingTimeMs: number
/** Sentiment analysis result (if enabled) */
sentiment?: {
score: number // -1 to 1
label: 'negative' | 'neutral' | 'positive'
}
}
/**
* Configuration for content flagging
*/
export interface ContentFlaggingConfig {
/** Score threshold (0-100) - content above this fails */
threshold: number
/** Categories to check (default: all) */
enabledCategories?: FlagCategory[]
/** Category-specific weights (default: 1.0) */
categoryWeights?: Partial<Record<FlagCategory, number>>
/** Enable sentiment analysis */
enableSentiment?: boolean
/** Custom word lists to add */
customWordLists?: {
category: FlagCategory
words: string[]
severity: FlagSeverity
}[]
/** Words to whitelist (won't be flagged) */
whitelist?: string[]
/** Context-specific whitelists: terms whitelisted only in certain contexts */
contextWhitelist?: Partial<Record<string, string[]>>
/** Context type affects analysis (e.g., 'bio' vs 'message') */
context?: 'bio' | 'message' | 'listing' | 'review' | 'general'
/** Maximum input length in characters (default: 50_000) */
maxInputLength?: number
/** Maximum flags to collect before stopping analysis (default: 100) */
maxFlags?: number
/** Replace matched text in results with redacted placeholder (default: false) */
redactMatches?: boolean
}
/**
* Default configuration
*/
export const MAX_CATEGORY_WEIGHT = 10
export const DEFAULT_FLAGGING_CONFIG: ContentFlaggingConfig = {
threshold: 50,
enabledCategories: [
'profanity',
'hate_speech',
'spam',
'contact_info',
'solicitation',
'threats',
'scam_patterns',
],
categoryWeights: {
profanity: 0.5,
hate_speech: 2.0,
spam: 0.8,
contact_info: 1.0,
solicitation: 0.7,
threats: 2.5,
adult_content: 0.3,
scam_patterns: 1.5,
coded_language: 0.3,
predatory_behavior: 2.0,
trafficking_signals: 3.0,
doxxing: 2.5,
law_enforcement: 1.0,
},
enableSentiment: true,
context: 'general',
maxInputLength: 50_000,
maxFlags: 100,
redactMatches: false,
}
/**
* Severity score mappings
*/
export const SEVERITY_SCORES: Record<FlagSeverity, number> = {
low: 5,
medium: 15,
high: 30,
critical: 50,
}