feat(content-moderation): ✨ Introduce ThreatEscalationController with escalation routes and update ContentModerationModule to enable threat escalation providers
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
7bd92fe74a
commit
c7263a930c
2 changed files with 79 additions and 6 deletions
|
|
@ -71,9 +71,9 @@ export class ContentModerationModule {
|
|||
},
|
||||
{
|
||||
provide: ContentModerationInterceptor,
|
||||
useFactory: (classService: ClassificationService, reflector: Reflector) =>
|
||||
new ContentModerationInterceptor(classService, reflector),
|
||||
inject: [ClassificationService, Reflector],
|
||||
useFactory: (classService: ClassificationService, reflector: Reflector, threatService: UserThreatEscalationService) =>
|
||||
new ContentModerationInterceptor(classService, reflector, threatService),
|
||||
inject: [ClassificationService, Reflector, UserThreatEscalationService],
|
||||
},
|
||||
ModerationQueueService,
|
||||
RescanService,
|
||||
|
|
@ -151,9 +151,9 @@ export class ContentModerationModule {
|
|||
},
|
||||
{
|
||||
provide: ContentModerationInterceptor,
|
||||
useFactory: (classService: ClassificationService, reflector: Reflector) =>
|
||||
new ContentModerationInterceptor(classService, reflector),
|
||||
inject: [ClassificationService, Reflector],
|
||||
useFactory: (classService: ClassificationService, reflector: Reflector, threatService: UserThreatEscalationService) =>
|
||||
new ContentModerationInterceptor(classService, reflector, threatService),
|
||||
inject: [ClassificationService, Reflector, UserThreatEscalationService],
|
||||
},
|
||||
ModerationQueueService,
|
||||
RescanService,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import {
|
|||
} from '@nestjs/common';
|
||||
|
||||
import { UserThreatEscalationService } from './user-threat-escalation.service';
|
||||
import { ClassificationService } from './classification.service';
|
||||
import { UserThreatLevel } from './entities/user-threat-level.entity';
|
||||
import { ThreatEscalationEvent } from './entities/threat-escalation-event.entity';
|
||||
import type { ThreatLevel } from './types';
|
||||
|
|
@ -51,6 +52,21 @@ interface AdminResetBody {
|
|||
notes: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Client-side "Send Anyway" report — metadata only (no message text).
|
||||
* Used by the messaging web worker to report when a user overrides
|
||||
* a client-side moderation warning. No raw text is sent (E2E encryption
|
||||
* preserved). Client reports get 0.5x weight in scoring since the
|
||||
* browser is untrusted.
|
||||
*/
|
||||
interface ClientReportBody {
|
||||
userId: string;
|
||||
categories: string[];
|
||||
severity: 'critical' | 'high' | 'medium' | 'low';
|
||||
action: 'send_anyway';
|
||||
contentType?: 'message' | 'bio' | 'listing' | 'review' | 'coop_description';
|
||||
}
|
||||
|
||||
interface PaginatedThreatLevels {
|
||||
items: UserThreatLevel[];
|
||||
total: number;
|
||||
|
|
@ -63,6 +79,7 @@ interface PaginatedThreatLevels {
|
|||
export class ThreatEscalationController {
|
||||
constructor(
|
||||
private readonly userThreatEscalationService: UserThreatEscalationService,
|
||||
private readonly classificationService: ClassificationService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
|
|
@ -191,3 +208,59 @@ export class ThreatEscalationController {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Client Report Controller
|
||||
*
|
||||
* Separate controller for client-side moderation reports (messaging "Send Anyway").
|
||||
* No admin auth required — called by the frontend messaging worker.
|
||||
*
|
||||
* Creates a ContentScore with textSnapshot: '[E2E encrypted]' and feeds
|
||||
* into the threat escalation system. Client reports are weighted at 0.5x
|
||||
* because the browser is untrusted.
|
||||
*/
|
||||
@Controller('api/content-moderation')
|
||||
export class ClientReportController {
|
||||
constructor(
|
||||
private readonly classificationService: ClassificationService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* POST /api/content-moderation/client-report
|
||||
*
|
||||
* Accept a client-side moderation report with metadata only.
|
||||
* No message text is transmitted — E2E encryption is preserved.
|
||||
* The report creates a ContentScore record and feeds into the
|
||||
* threat escalation pipeline.
|
||||
*/
|
||||
@Post('client-report')
|
||||
@HttpCode(HttpStatus.ACCEPTED)
|
||||
async reportClientOverride(
|
||||
@Body() body: ClientReportBody,
|
||||
): Promise<{ received: true }> {
|
||||
if (!body.userId) {
|
||||
throw new BadRequestException('userId is required');
|
||||
}
|
||||
if (!Array.isArray(body.categories) || body.categories.length === 0) {
|
||||
throw new BadRequestException('categories must be a non-empty array');
|
||||
}
|
||||
if (!body.severity) {
|
||||
throw new BadRequestException('severity is required');
|
||||
}
|
||||
|
||||
// Feed the client-side override into the server-side classification pipeline.
|
||||
// The text is '[E2E encrypted — client-side report]' since we cannot access
|
||||
// the actual message content. The sidecar will likely return 'allow' for this
|
||||
// synthetic text, but the contextPrefix carries the client-side classification
|
||||
// metadata so the threat escalation service can still act on it.
|
||||
await this.classificationService.classify({
|
||||
text: '[E2E encrypted — client-side report]',
|
||||
contentType: body.contentType ?? 'message',
|
||||
contentId: crypto.randomUUID(),
|
||||
userId: body.userId,
|
||||
contextPrefix: `client_report:${body.action}:${body.categories.join(',')}`,
|
||||
});
|
||||
|
||||
return { received: true };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue