lilith-platform.live/codebase/@features/api/tests/processors/pii-extractor.llm.test.ts

import { describe, expect, test } from 'bun:test';

import { runLlmTier, type ModelBossClientLike, type LlmExtractionMessage } from '@/processors/pii-extractor/llm-tier';
import { buildSystemPrompt, buildUserPrompt, PII_EXTRACTION_SCHEMA } from '@/processors/pii-extractor/prompts';

// -----------------------------------------------------------------------
// Deterministic mock for ModelBossClient
// -----------------------------------------------------------------------

function makeMockClient(responsePayload: unknown): ModelBossClientLike & {
  capturedOpts: Parameters<ModelBossClientLike['chatJson']>[0] | null;
} {
  let capturedOpts: Parameters<ModelBossClientLike['chatJson']>[0] | null = null;
  return {
    capturedOpts: null as Parameters<ModelBossClientLike['chatJson']>[0] | null,
    async chatJson<T>(opts: Parameters<ModelBossClientLike['chatJson']>[0]): Promise<T> {
      capturedOpts = opts;
      // Simulate parsing via the provided parse function
      return opts.parse(responsePayload) as T;
    },
    get _capturedOpts() {
      return capturedOpts;
    },
  };
}

const SAMPLE_MESSAGES: LlmExtractionMessage[] = [
  { body: "Hi, I'm John from Mountain View. Looking for companionship.", isFromMe: false, sentAt: '2026-04-01T10:00:00Z' },
  { body: 'I work at Google as a software engineer.', isFromMe: false, sentAt: '2026-04-01T10:01:00Z' },
];

const VALID_RESPONSE = {
  name: 'John',
  name_confidence: 0.95,
  location: 'Mountain View',
  location_confidence: 0.9,
  organization: 'Google',
  organization_confidence: 0.85,
  role: 'software engineer',
  role_confidence: 0.85,
  references_to_others: [],
  relationship_kind: 'prospect',
  relationship_kind_confidence: 0.8,
  reasoning: 'Contact introduced himself by name and mentioned employer, inquiring about companionship services.',
};

describe('runLlmTier', () => {
  test('parses a well-formed LLM response', async () => {
    const client = makeMockClient(VALID_RESPONSE);
    const result = await runLlmTier(client, SAMPLE_MESSAGES);

    expect(result.name).toBe('John');
    expect(result.nameConfidence).toBe(0.95);
    expect(result.location).toBe('Mountain View');
    expect(result.locationConfidence).toBe(0.9);
    expect(result.organization).toBe('Google');
    expect(result.role).toBe('software engineer');
    expect(result.relationshipKind).toBe('prospect');
    expect(result.relationshipKindConfidence).toBe(0.8);
    expect(result.referencesToOthers).toHaveLength(0);
  });

  test('falls back to unknown for unrecognized relationship_kind', async () => {
    const client = makeMockClient({ ...VALID_RESPONSE, relationship_kind: 'INVALID_KIND' });
    const result = await runLlmTier(client, SAMPLE_MESSAGES);
    expect(result.relationshipKind).toBe('unknown');
  });

  test('returns null for absent optional fields', async () => {
    const client = makeMockClient({
      name: null,
      name_confidence: 0,
      relationship_kind: 'unknown',
      relationship_kind_confidence: 0.5,
      reasoning: 'No signals found.',
    });
    const result = await runLlmTier(client, SAMPLE_MESSAGES);
    expect(result.name).toBeNull();
    expect(result.location).toBeNull();
    expect(result.organization).toBeNull();
    expect(result.role).toBeNull();
  });

  test('handles malformed references_to_others gracefully', async () => {
    const client = makeMockClient({
      ...VALID_RESPONSE,
      references_to_others: [{ name: 'Alice', relation: 'wife' }, 'not-an-object', null],
    });
    const result = await runLlmTier(client, SAMPLE_MESSAGES);
    expect(result.referencesToOthers).toHaveLength(1);
    expect(result.referencesToOthers[0]).toEqual({ name: 'Alice', relation: 'wife' });
  });

  test('throws on non-object response', async () => {
    const client = makeMockClient('a string response');
    await expect(runLlmTier(client, SAMPLE_MESSAGES)).rejects.toThrow('LLM tier failed');
  });

  test('chatJson is called with correct schema name', async () => {
    let capturedSchemaName = '';
    const client: ModelBossClientLike = {
      async chatJson<T>(opts: Parameters<ModelBossClientLike['chatJson']>[0]): Promise<T> {
        capturedSchemaName = opts.schemaName;
        return opts.parse(VALID_RESPONSE) as T;
      },
    };
    await runLlmTier(client, SAMPLE_MESSAGES);
    expect(capturedSchemaName).toBe('pii_extraction');
  });

  test('prompt contains all required JSON schema fields', () => {
    const schema = PII_EXTRACTION_SCHEMA as { properties: Record<string, unknown> };
    expect(schema.properties).toHaveProperty('name');
    expect(schema.properties).toHaveProperty('relationship_kind');
    expect(schema.properties).toHaveProperty('relationship_kind_confidence');
    expect(schema.properties).toHaveProperty('reasoning');
    expect(schema.properties).toHaveProperty('references_to_others');
  });

  test('user prompt includes message bodies', () => {
    const prompt = buildUserPrompt(SAMPLE_MESSAGES);
    expect(prompt).toContain("I'm John from Mountain View");
    expect(prompt).toContain('[contact]');
  });

  test('system prompt mentions Quinn context', () => {
    const prompt = buildSystemPrompt();
    expect(prompt).toContain('Quinn');
    expect(prompt.length).toBeGreaterThan(200);
  });
});