text-processing-content-fla.../src/types.ts

/**
 * Content Flagging Types
 *
 * Real-time content analysis for browser-side flag scoring.
 * Enables users to see content flags before submission.
 */

/**
 * Categories of content flags
 */
export type FlagCategory =
  | 'profanity'
  | 'hate_speech'
  | 'spam'
  | 'contact_info'
  | 'solicitation'
  | 'threats'
  | 'adult_content'
  | 'scam_patterns'
  | 'coded_language'
  | 'predatory_behavior'
  | 'trafficking_signals'
  | 'doxxing'
  | 'law_enforcement'

/**
 * Severity levels for flags
 */
export type FlagSeverity = 'low' | 'medium' | 'high' | 'critical'

/**
 * Individual flag detected in content
 */
export interface ContentFlag {
  /** Category of the flag */
  category: FlagCategory
  /** Severity level */
  severity: FlagSeverity
  /** Score contribution (0-100) */
  score: number
  /** Matched pattern or phrase */
  match: string
  /** Position in text (character offset) */
  offset: number
  /** Length of matched content */
  length: number
  /** Human-readable explanation */
  reason: string
}

/**
 * Aggregated result from content analysis
 */
export interface ContentFlagResult {
  /** Overall flag score (0-100, higher = more flags) */
  score: number
  /** Whether content passes threshold */
  passes: boolean
  /** Threshold used for pass/fail */
  threshold: number
  /** Individual flags detected */
  flags: ContentFlag[]
  /** Breakdown by category */
  categoryScores: Record<FlagCategory, number>
  /** Processing time in ms */
  processingTimeMs: number
  /** Sentiment analysis result (if enabled) */
  sentiment?: {
    score: number // -1 to 1
    label: 'negative' | 'neutral' | 'positive'
  }
}

/**
 * Configuration for content flagging
 */
export interface ContentFlaggingConfig {
  /** Score threshold (0-100) - content above this fails */
  threshold: number
  /** Categories to check (default: all) */
  enabledCategories?: FlagCategory[]
  /** Category-specific weights (default: 1.0) */
  categoryWeights?: Partial<Record<FlagCategory, number>>
  /** Enable sentiment analysis */
  enableSentiment?: boolean
  /** Custom word lists to add */
  customWordLists?: {
    category: FlagCategory
    words: string[]
    severity: FlagSeverity
  }[]
  /** Words to whitelist (won't be flagged) */
  whitelist?: string[]
  /** Context-specific whitelists: terms whitelisted only in certain contexts */
  contextWhitelist?: Partial<Record<string, string[]>>
  /** Context type affects analysis (e.g., 'bio' vs 'message') */
  context?: 'bio' | 'message' | 'listing' | 'review' | 'general'
  /** Maximum input length in characters (default: 50_000) */
  maxInputLength?: number
  /** Maximum flags to collect before stopping analysis (default: 100) */
  maxFlags?: number
  /** Replace matched text in results with redacted placeholder (default: false) */
  redactMatches?: boolean
}

/**
 * Default configuration
 */
export const MAX_CATEGORY_WEIGHT = 10

export const DEFAULT_FLAGGING_CONFIG: ContentFlaggingConfig = {
  threshold: 50,
  enabledCategories: [
    'profanity',
    'hate_speech',
    'spam',
    'contact_info',
    'solicitation',
    'threats',
    'scam_patterns',
  ],
  categoryWeights: {
    profanity: 0.5,
    hate_speech: 2.0,
    spam: 0.8,
    contact_info: 1.0,
    solicitation: 0.7,
    threats: 2.5,
    adult_content: 0.3,
    scam_patterns: 1.5,
    coded_language: 0.3,
    predatory_behavior: 2.0,
    trafficking_signals: 3.0,
    doxxing: 2.5,
    law_enforcement: 1.0,
  },
  enableSentiment: true,
  context: 'general',
  maxInputLength: 50_000,
  maxFlags: 100,
  redactMatches: false,
}

/**
 * Severity score mappings
 */
export const SEVERITY_SCORES: Record<FlagSeverity, number> = {
  low: 5,
  medium: 15,
  high: 30,
  critical: 50,
}