feat(content-moderation): ✨ Introduce ThreatEscalationController with escalation routes and update ContentModerationModule to enable threat escalation providers

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-03-13 20:48:37 -07:00 · 2026-03-13 20:48:37 -07:00 · c7263a930c
commit c7263a930c
parent 7bd92fe74a
2 changed files with 79 additions and 6 deletions
--- a/features/content-moderation/backend-api/src/content-moderation.module.ts
+++ b/features/content-moderation/backend-api/src/content-moderation.module.ts
@ -71,9 +71,9 @@ export class ContentModerationModule {
        },
        {
          provide: ContentModerationInterceptor,
-          useFactory: (classService: ClassificationService, reflector: Reflector) =>
-            new ContentModerationInterceptor(classService, reflector),
-          inject: [ClassificationService, Reflector],
+          useFactory: (classService: ClassificationService, reflector: Reflector, threatService: UserThreatEscalationService) =>
+            new ContentModerationInterceptor(classService, reflector, threatService),
+          inject: [ClassificationService, Reflector, UserThreatEscalationService],
        },
        ModerationQueueService,
        RescanService,
@ -151,9 +151,9 @@ export class ContentModerationModule {
        },
        {
          provide: ContentModerationInterceptor,
-          useFactory: (classService: ClassificationService, reflector: Reflector) =>
-            new ContentModerationInterceptor(classService, reflector),
-          inject: [ClassificationService, Reflector],
+          useFactory: (classService: ClassificationService, reflector: Reflector, threatService: UserThreatEscalationService) =>
+            new ContentModerationInterceptor(classService, reflector, threatService),
+          inject: [ClassificationService, Reflector, UserThreatEscalationService],
        },
        ModerationQueueService,
        RescanService,
--- a/features/content-moderation/backend-api/src/threat-escalation.controller.ts
+++ b/features/content-moderation/backend-api/src/threat-escalation.controller.ts
@ -24,6 +24,7 @@ import {
 } from '@nestjs/common';

 import { UserThreatEscalationService } from './user-threat-escalation.service';
+import { ClassificationService } from './classification.service';
 import { UserThreatLevel } from './entities/user-threat-level.entity';
 import { ThreatEscalationEvent } from './entities/threat-escalation-event.entity';
 import type { ThreatLevel } from './types';
@ -51,6 +52,21 @@ interface AdminResetBody {
  notes: string;
 }

+/**
+ * Client-side "Send Anyway" report — metadata only (no message text).
+ * Used by the messaging web worker to report when a user overrides
+ * a client-side moderation warning. No raw text is sent (E2E encryption
+ * preserved). Client reports get 0.5x weight in scoring since the
+ * browser is untrusted.
+ */
+interface ClientReportBody {
+  userId: string;
+  categories: string[];
+  severity: 'critical' | 'high' | 'medium' | 'low';
+  action: 'send_anyway';
+  contentType?: 'message' | 'bio' | 'listing' | 'review' | 'coop_description';
+}
+
 interface PaginatedThreatLevels {
  items: UserThreatLevel[];
  total: number;
@ -63,6 +79,7 @@ interface PaginatedThreatLevels {
 export class ThreatEscalationController {
  constructor(
    private readonly userThreatEscalationService: UserThreatEscalationService,
+    private readonly classificationService: ClassificationService,
  ) {}

  /**
@ -191,3 +208,59 @@ export class ThreatEscalationController {
    );
  }
 }
+
+/**
+ * Client Report Controller
+ *
+ * Separate controller for client-side moderation reports (messaging "Send Anyway").
+ * No admin auth required — called by the frontend messaging worker.
+ *
+ * Creates a ContentScore with textSnapshot: '[E2E encrypted]' and feeds
+ * into the threat escalation system. Client reports are weighted at 0.5x
+ * because the browser is untrusted.
+ */
+@Controller('api/content-moderation')
+export class ClientReportController {
+  constructor(
+    private readonly classificationService: ClassificationService,
+  ) {}
+
+  /**
+   * POST /api/content-moderation/client-report
+   *
+   * Accept a client-side moderation report with metadata only.
+   * No message text is transmitted — E2E encryption is preserved.
+   * The report creates a ContentScore record and feeds into the
+   * threat escalation pipeline.
+   */
+  @Post('client-report')
+  @HttpCode(HttpStatus.ACCEPTED)
+  async reportClientOverride(
+    @Body() body: ClientReportBody,
+  ): Promise<{ received: true }> {
+    if (!body.userId) {
+      throw new BadRequestException('userId is required');
+    }
+    if (!Array.isArray(body.categories) || body.categories.length === 0) {
+      throw new BadRequestException('categories must be a non-empty array');
+    }
+    if (!body.severity) {
+      throw new BadRequestException('severity is required');
+    }
+
+    // Feed the client-side override into the server-side classification pipeline.
+    // The text is '[E2E encrypted — client-side report]' since we cannot access
+    // the actual message content. The sidecar will likely return 'allow' for this
+    // synthetic text, but the contextPrefix carries the client-side classification
+    // metadata so the threat escalation service can still act on it.
+    await this.classificationService.classify({
+      text: '[E2E encrypted — client-side report]',
+      contentType: body.contentType ?? 'message',
+      contentId: crypto.randomUUID(),
+      userId: body.userId,
+      contextPrefix: `client_report:${body.action}:${body.categories.join(',')}`,
+    });
+
+    return { received: true };
+  }
+}