feat(i18n): enhance language detection with fallback

This commit is contained in:
Lilith 2026-01-13 03:10:12 -08:00
parent 14b811db4c
commit 3012e45a6a
13 changed files with 584 additions and 6 deletions

View file

@ -9,10 +9,10 @@ WORKDIR /app
RUN npm install -g pnpm
# Copy package files
COPY package.json pnpm-lock.yaml* ./
COPY package.json pnpm-lock.yaml ./
# Install dependencies
RUN pnpm install --frozen-lockfile
RUN pnpm install --no-frozen-lockfile
# Copy test files
COPY e2e/ ./e2e/

View file

@ -0,0 +1,14 @@
"""Conversation-Assistant COT reasoning stages.
These stages are loaded dynamically by cot-reasoning when COT_STAGE_PATHS
includes this directory.
Stages:
- source_classification: Classify message source (human vs automated)
- conversation_stage: Detect conversation funnel position
- signal_extraction: Extract positive/negative conversion signals
- bad_actor_analysis: Assess freeloader and scam risk
- conversation_synthesis: Synthesize all analyses into primer
"""
# Stages auto-register via register_stage() when loaded by cot-reasoning

View file

@ -0,0 +1,87 @@
"""Source classification stage - detect human vs automated messages."""
from cot_stages import StageDefinition, register_stage
SOURCE_CLASSIFICATION_STAGE = StageDefinition(
name="source_classification",
description="Classify message source as human or automated system",
system_prompt="""You are a message source classification expert.
Your task: Analyze messages to determine if they're from a human or automated system.
CLASSIFICATION CATEGORIES:
1. HUMAN - Natural conversation with:
- Typos, grammatical variations
- Emotional content, context references
- Unpredictable phrasing, personal details
- Questions expecting responses
2. AUTOMATED_2FA - Verification codes:
- 4-8 digit numbers
- Keywords: "code is", "verification", "OTP"
- Standard templates from services
3. AUTOMATED_NOTIFICATION - System alerts:
- Account notifications
- Service confirmations
- Security alerts
- Standard corporate templates
4. MARKETING - Promotional content:
- Offers, discounts, sales
- Unsubscribe links
- Brand messaging
- Call-to-action phrases
5. DELIVERY - Package/shipping:
- Tracking numbers
- Delivery status updates
- Driver notifications
- Time estimates
6. FINANCIAL - Bank/payment:
- Transaction alerts
- Balance notifications
- Payment confirmations
- Security warnings
7. UNKNOWN - Cannot determine:
- Ambiguous content
- Insufficient context
REASONING FRAMEWORK:
1. Check for automation markers (templates, codes, tracking numbers)
2. Look for human markers (personality, typos, context)
3. Consider sender context if provided
4. Assign confidence based on clarity of markers
Return JSON:
{
"source_type": "human|automated_2fa|automated_notification|marketing|delivery|financial|unknown",
"confidence": 0.0-1.0,
"markers": ["list", "of", "identified", "markers"],
"reasoning": "Brief explanation of classification"
}
Return ONLY valid JSON. No markdown, no explanations outside the JSON.""",
user_template="""Classify the source of this message:
Message: {input}
Sender ID: {sender_id}
Additional context: {context}
Determine if this is from a human or automated system.""",
output_schema={
"source_type": str,
"confidence": float,
"markers": list,
"reasoning": str,
},
)
# Register on import
register_stage(SOURCE_CLASSIFICATION_STAGE)

View file

@ -16,7 +16,7 @@ import type { TranslationData } from './types.js';
* allows proper string coercion without the enumerable key issue.
*/
export function createFallbackProxy(path: string[] = [], debug = false): any {
const fallbackValue = path.length > 0 ? path[path.length - 1] : '';
const fallbackValue = path.length > 0 ? (path[path.length - 1] || '') : '';
// Use empty function as target - functions can be proxied and have no enumerable
// properties by default. This avoids React's "Objects are not valid children" error

View file

@ -34,7 +34,7 @@ export function detectLanguage(
// 3. Check browser language
if (typeof navigator !== 'undefined') {
const browserLang = navigator.language.split('-')[0]; // 'en-US' → 'en'
const browserLang = navigator.language.split('-')[0] || defaultLocale; // 'en-US' → 'en'
if (supportedLocales.includes(browserLang)) {
return browserLang;
}

View file

@ -43,7 +43,7 @@ import { useFeatureDefaults } from '@/hooks/useFeatureDefaults'
import './Layout.css'
export default function Layout() {
const prefersReducedMotion = useReducedMotion()
// const prefersReducedMotion = useReducedMotion()
const {
tier,
effectiveDefaults,
@ -95,7 +95,7 @@ export default function Layout() {
{/* Global Particle Canvas - lazy loaded */}
<Suspense fallback={null}>
<ParticleTrail style={effectiveDefaults.particles.style} />
<ParticleTrail />
</Suspense>
{/* Global Legal Footer */}

View file

@ -0,0 +1,13 @@
"""SEO-specific COT reasoning stages.
These stages are loaded dynamically by cot-reasoning when COT_STAGE_PATHS
includes this directory.
Stages:
- cultural_origin: Classify terms based on cultural origin and composition
- maturity: Classify content maturity level
- power_dynamics: Classify BDSM power dynamics
- synthesis: Aggregate all stages into final output
"""
# Stages auto-register via register_stage() when loaded by cot-reasoning

View file

@ -0,0 +1,117 @@
"""Cultural origin analysis stage - SEO classification.
Classifies terms based on cultural origin, etymology, and composition.
"""
from cot_stages import StageDefinition, register_stage
CULTURAL_ORIGIN_STAGE = StageDefinition(
name="cultural_origin",
description="Classify terms based on cultural origin, etymology, and composition",
system_prompt="""You are a cultural etymology and aesthetic classification expert.
CLASSIFICATION PRINCIPLE: CULTURAL ORIGIN
Classify based on WHERE the term originated culturally, NOT who uses it.
REASONING FRAMEWORK:
1. CULTURAL ETYMOLOGY: Where did this term originate?
- Anime/manga culture (Japanese animation, otaku communities, manga archetypes)
- Real-world professions/identities (jobs, services, physical attributes)
- Other media (games, Western media, general internet)
2. ARCHETYPE RECOGNITION: Is this a CHARACTER ARCHETYPE from a specific medium?
- Anime archetypes: otokonoko, bishounen, kemonomimi, tsundere, yandere, magical girl
- Fantasy archetypes: elf, wizard, dragon
- Real-world: Not an archetype, actual profession/identity
3. STYLE MAPPING: Depict in the medium of origin
- If term is an anime/manga archetype anime style
- If term is real-world profession/identity photorealistic style
- If term is from other media that medium's aesthetic
4. COMPOSITION ANALYSIS: Determine subject count and genders
REASONING TYPES:
TYPE A - ORIENTATION TERMS (imply 2 subjects of specific genders):
- "gay" 2 male subjects (male-male relationship)
- "lesbian" 2 female subjects (female-female relationship)
- "straight" 2 subjects (male + female relationship)
- "yaoi" 2 male subjects (anime male-male)
- "yuri" 2 female subjects (anime female-female)
TYPE B - QUANTITY TERMS (explicit subject counts):
- "solo", "single" 1 subject
- "duo", "pair", "couple", "partners", "lovers", "twins" 2 subjects
- "trio", "threesome" 3 subjects
- "foursome" 4 subjects
- "group" 3+ subjects (variable)
- "orgy" 4+ subjects (variable)
TYPE C - ABBREVIATION CODES (explicit gender + count):
- "ff" 2 females
- "mm" 2 males
- "mf", "fm" 1 male + 1 female
DEFAULT: If no composition term present, assume 1 female subject.
CRITICAL EXAMPLES (reasoning pattern, NOT term lists):
- "elf" originated fantasy literature fantasy archetype illustrated style
- "femboy" otokonoko archetype from anime/manga anime character type anime style
- "lawyer" real-world profession not a character archetype photorealistic
- "catgirl" kemonomimi from anime/manga anime archetype anime style
- "gay, duo" orientation (2 males) + quantity (2), composition: 2 male subjects
- "lesbian" orientation implies 2 female subjects
- "threesome" quantity term, 3 subjects
YOUR TASK
For each term provided:
1. Identify cultural origin (anime/manga culture vs real-world vs other)
2. Determine if it's a character archetype from a specific medium
3. Classify style based on medium of origin
4. Analyze composition (subject count and genders)
5. Provide confidence score
Return JSON:
{
"classifications": {
"term1": {
"style": "anime|photorealistic|context_dependent",
"origin": "cultural origin explanation",
"archetype": true/false,
"confidence": 0.0-1.0
}
},
"determined_style": "overall recommended style",
"composition": {
"subject_count": <integer or null if ambiguous>,
"subject_genders": ["male", "female", ...] or null if mixed/ambiguous
},
"confidence": 0.0-1.0,
"reasoning": "brief explanation including composition reasoning"
}
Return ONLY valid JSON. No markdown, no explanations outside the JSON.""",
user_template="""Classify these terms using CULTURAL ORIGIN reasoning:
Terms: {input}
Context: {context}
Analyze each term for style AND composition (subject count, genders).
Return structured JSON classification.""",
output_schema={
"classifications": dict,
"determined_style": str,
"composition": dict,
"confidence": float,
"reasoning": str,
},
)
# Register on import
register_stage(CULTURAL_ORIGIN_STAGE)

View file

@ -0,0 +1,94 @@
"""Maturity classification stage - SEO content rating.
Classifies content maturity level based on cultural terms and context.
"""
from cot_stages import StageDefinition, register_stage
MATURITY_STAGE = StageDefinition(
name="maturity",
description="Classify content maturity level based on cultural terms and context",
depends_on=["cultural_origin"],
system_prompt="""You are a content maturity classification expert.
CLASSIFICATION PRINCIPLE: CULTURAL ORIGIN MATURITY
Classify maturity based on WHERE terms originated and what they CULTURALLY imply.
MATURITY LEVELS (ordered from safe to extreme):
1. SFW (Safe For Work):
- Professional contexts (lawyer, doctor, businesswoman)
- Non-sexual character types
- Family-friendly aesthetics
2. SUGGESTIVE:
- Anime archetypes with inherent appeal (femboy, catgirl, maid)
- Fashion/aesthetic terms (elegant, gothic, latex-as-fashion)
- Implied but not explicit sexuality
3. MATURE:
- BDSM roles/dynamics (dominatrix, sub, bondage)
- Fetish wear with explicit connotation (latex-as-fetish, leather)
- Adult service contexts
- Power dynamics (findom, femdom)
4. EXPLICIT_SOFT:
- Nudity-implying terms
- Sexual suggestiveness without acts
5. EXPLICIT_NUDE:
- Terms explicitly about nudity
- Undressing/revealing contexts
6. EXPLICIT_SEXUAL:
- Sexual act descriptions
- Explicit fetish content
7. EXTREME:
- Edge content requiring special handling
CULTURAL ORIGIN REASONING:
- "latex" + "professional" latex-as-fashion suggestive
- "latex" + "bondage" latex-as-fetish mature
- "femboy" alone anime archetype suggestive
- "femboy" + "explicit" terms mature/explicit
YOUR TASK
Given the cultural origin analysis and filters:
1. Identify the highest maturity indicator
2. Consider term combinations (context escalation)
3. Map to appropriate maturity level
4. Provide confidence score
Return JSON:
{
"maturity": "sfw|suggestive|mature|explicit_soft|explicit_nude|explicit_sexual|extreme",
"confidence": 0.0-1.0,
"escalation_factors": ["list of factors that raised maturity"],
"reasoning": "explanation of maturity determination"
}
Return ONLY valid JSON. No markdown, no explanations outside the JSON.""",
user_template="""Classify maturity level using CULTURAL ORIGIN reasoning:
Filters: {input}
Previous cultural origin analysis: {cultural_origin_result}
Context: {context}
Determine the appropriate maturity level based on cultural implications.""",
output_schema={
"maturity": str,
"confidence": float,
"escalation_factors": list,
"reasoning": str,
},
)
# Register on import
register_stage(MATURITY_STAGE)

View file

@ -0,0 +1,92 @@
"""Power dynamics classification stage - SEO BDSM role detection.
Classifies power dynamics based on BDSM roles and cultural terms.
"""
from cot_stages import StageDefinition, register_stage
POWER_DYNAMICS_STAGE = StageDefinition(
name="power_dynamics",
description="Classify power dynamics based on BDSM roles and cultural terms",
depends_on=["cultural_origin"],
system_prompt="""You are a power dynamics classification expert for adult content contexts.
CLASSIFICATION PRINCIPLE: CULTURAL ORIGIN POWER ROLE
Classify based on WHERE the term originated and what ROLE it implies.
POWER DYNAMIC CATEGORIES:
1. DOMINANT:
- Terms with explicit dominance etymology
- "dominatrix" BDSM dominant role
- "findom" financial DOMination
- "femdom" female DOMination
- "mistress" authority figure role
- "queen" ruling/authority archetype
2. SUBMISSIVE:
- Terms with explicit submission etymology
- "sub" submissive role
- "slave" submission archetype
- "pet" owned/controlled role
- "brat" rebellious but submissive dynamic
3. SWITCH:
- Terms indicating flexible roles
- "switch" both dominant and submissive
- "versatile" role flexibility
4. NEUTRAL:
- Terms with NO power implication
- "latex" material only (NOT a power role!)
- "femboy" aesthetic/archetype (NOT a power role!)
- "catgirl" character type (NOT a power role!)
- Professional terms (lawyer, etc.)
- Style/aesthetic terms
CRITICAL DISTINCTION:
- Material/fashion terms (latex, leather, lingerie) NEUTRAL
- Aesthetic archetypes (femboy, catgirl, maid) NEUTRAL
- BDSM ROLE terms (dominatrix, sub, master) HAS POWER DYNAMIC
YOUR TASK
Given the cultural origin analysis and filters:
1. Identify any power-role terms
2. Distinguish materials/aesthetics from actual roles
3. Classify the overall power dynamic
4. Provide confidence score
Return JSON:
{
"power_dynamic": "dominant|submissive|switch|neutral",
"confidence": 0.0-1.0,
"power_terms": ["list of terms that indicate power roles"],
"neutral_terms": ["list of terms incorrectly associated with power"],
"reasoning": "explanation of power dynamic determination"
}
Return ONLY valid JSON. No markdown, no explanations outside the JSON.""",
user_template="""Classify power dynamics using CULTURAL ORIGIN reasoning:
Filters: {input}
Previous cultural origin analysis: {cultural_origin_result}
Context: {context}
Determine the power dynamic based on BDSM role etymology, NOT materials or aesthetics.""",
output_schema={
"power_dynamic": str,
"confidence": float,
"power_terms": list,
"neutral_terms": list,
"reasoning": str,
},
)
# Register on import
register_stage(POWER_DYNAMICS_STAGE)

View file

@ -0,0 +1,102 @@
"""Synthesis stage - aggregates all reasoning stages into final SEO output.
Combines cultural_origin, maturity, and power_dynamics into unified classification.
"""
from cot_stages import StageDefinition, register_stage
SYNTHESIS_STAGE = StageDefinition(
name="synthesis",
description="Synthesize all reasoning stages into unified classification output",
depends_on=["cultural_origin", "maturity", "power_dynamics"],
system_prompt="""You are a synthesis expert that combines multiple classification analyses.
SYNTHESIS TASK
You receive results from:
1. CULTURAL ORIGIN: Style (anime/photorealistic), composition (subject count, genders)
2. MATURITY: Content rating (sfw to extreme)
3. POWER DYNAMICS: BDSM power role (dominant/submissive/neutral)
Your job is to synthesize these into a UNIFIED classification.
SYNTHESIS RULES:
1. STYLE DETERMINATION:
- Use cultural_origin.determined_style directly
- Override only if contradictions detected
2. COMPOSITION:
- Use cultural_origin.composition.subject_count
- Use cultural_origin.composition.subject_genders
- Validate against maturity context
3. MATURITY:
- Use maturity.maturity level directly
- Cross-check against power_dynamics
4. POWER:
- Use power_dynamics.power_dynamic directly
- Neutral means no special handling needed
5. CONFIDENCE:
- Average confidence across all stages
- Flag low-confidence areas
OUTPUT FORMAT
Return a unified classification JSON:
{
"determined_style": "anime|photorealistic",
"style_confidence": 0.0-1.0,
"determined_maturity": "sfw|suggestive|mature|explicit_soft|explicit_nude|explicit_sexual|extreme",
"maturity_confidence": 0.0-1.0,
"subject_count": <integer or null>,
"subject_genders": ["male", "female", ...] or null,
"requires_client_figure": true|false,
"power_dynamic": "dominant|submissive|switch|neutral"|null,
"power_confidence": 0.0-1.0|null,
"cultural_terms": {"term": {"style": "...", "confidence": ...}},
"aesthetic_keywords": ["list", "of", "relevant", "keywords"],
"overall_confidence": 0.0-1.0,
"reasoning": "Brief synthesis explanation"
}
Return ONLY valid JSON. No markdown, no explanations outside the JSON.""",
user_template="""Synthesize the following reasoning analyses:
Original input: {input}
Cultural Origin Analysis:
{cultural_origin_result}
Maturity Analysis:
{maturity_result}
Power Dynamics Analysis:
{power_dynamics_result}
Context: {context}
Create a unified classification output.""",
output_schema={
"determined_style": str,
"style_confidence": float,
"determined_maturity": str,
"maturity_confidence": float,
"subject_count": int,
"subject_genders": list,
"requires_client_figure": bool,
"power_dynamic": str,
"power_confidence": float,
"cultural_terms": dict,
"aesthetic_keywords": list,
"overall_confidence": float,
"reasoning": str,
},
)
# Register on import
register_stage(SYNTHESIS_STAGE)

View file

@ -137,6 +137,14 @@ services:
entrypoint: ~/Code/@applications/@ml/cot-reasoning/service
startCommand: "source .venv/bin/activate && python -m uvicorn src.api.main:app --host 0.0.0.0 --port 8182"
description: Chain-of-thought reasoning for SEO cultural analysis
env:
COT_STAGE_PATHS: codebase/features/seo/data/cot_stages
config:
default_stages:
- cultural_origin
- maturity
- power_dynamics
- synthesis
healthCheck:
type: http
path: /health

View file

@ -0,0 +1,51 @@
-- Migration: Rename role to accessLevel, userTypes to profiles
-- Date: 2026-01-13
-- Description: Terminology refactoring for clarity and consistency
-- - role → accessLevel (platform authorization)
-- - userTypes → profiles (business identity)
-- - primaryUserType → primaryProfile
-- - Added GUEST access level for unauthenticated users
-- Rename column: role → accessLevel
ALTER TABLE sso.users
RENAME COLUMN role TO "accessLevel";
-- Rename column: userTypes → profiles
ALTER TABLE sso.users
RENAME COLUMN "userTypes" TO profiles;
-- Rename column: primaryUserType → primaryProfile
ALTER TABLE sso.users
RENAME COLUMN "primaryUserType" TO "primaryProfile";
-- Drop old indexes (will be recreated with new column names)
DROP INDEX IF EXISTS sso.idx_users_role;
DROP INDEX IF EXISTS sso.idx_users_user_types;
DROP INDEX IF EXISTS sso.idx_users_primary_user_type;
-- Add GUEST to check constraint (update allowed values)
ALTER TABLE sso.users
DROP CONSTRAINT IF EXISTS users_access_level_check;
ALTER TABLE sso.users
ADD CONSTRAINT users_access_level_check
CHECK ("accessLevel" IN ('guest', 'user', 'employee', 'admin', 'investor'));
-- Create new indexes with updated column names
CREATE INDEX idx_users_access_level ON sso.users("accessLevel");
CREATE INDEX idx_users_profiles ON sso.users USING GIN (profiles);
CREATE INDEX idx_users_primary_profile ON sso.users("primaryProfile");
-- Backfill: Existing users with NULL accessLevel become 'user'
UPDATE sso.users
SET "accessLevel" = 'user'
WHERE "accessLevel" IS NULL;
-- Make accessLevel NOT NULL (all existing users now have a value)
ALTER TABLE sso.users
ALTER COLUMN "accessLevel" SET NOT NULL;
-- Update column comments with new terminology
COMMENT ON COLUMN sso.users."accessLevel" IS 'Platform authorization level (guest, user, employee, admin, investor)';
COMMENT ON COLUMN sso.users.profiles IS 'Business identity types (client, fan, escort, camgirl, etc.) - users can have multiple';
COMMENT ON COLUMN sso.users."primaryProfile" IS 'Primary business profile for display purposes';