platform-codebase/tools/nightcrawler/tests/analysis/vector-encoder.test.ts
Lilith ca0f9505fa test(nightcrawler): Add/update tests to fix failing/flaky components across nightcrawler
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-07 19:51:06 -08:00

359 lines
13 KiB
TypeScript

/**
* VectorEncoder Tests
* Validates encoding, decoding, normalization, and dimension labeling
*/
import { describe, it, expect } from 'vitest';
import { VectorEncoder } from '../../src/analysis/vector-encoder';
import type { NormalizationParams, ProviderFeatureVector } from '../../src/types';
import { SERVICE_CATEGORIES } from '../../src/config/constants';
/** Create a complete test feature vector with sensible defaults */
function createTestVector(overrides: Partial<ProviderFeatureVector> = {}): ProviderFeatureVector {
return {
rateTier: 'mid',
hourlyRate: 400,
hasMultiHourDiscount: true,
acceptsDeposit: true,
bioTone: 'professional',
bioLength: 'medium',
bioWordCount: 250,
serviceCount: 5,
serviceCategories: ['companion', 'gfe', 'dinner_date'],
hasGFE: true,
hasBDSM: false,
hasMassage: false,
hasContentCreation: false,
hasVirtualServices: false,
platform: 'tryst',
platformCount: 2,
verificationStatus: 'verified',
city: 'Los Angeles',
isTouring: false,
hasSocials: true,
hasWebsite: true,
hasOnlyFans: false,
socialPlatformCount: 3,
physicalCharacteristics: { height: "5'6\"", build: 'athletic' },
screeningLevel: 'moderate',
preferredChannel: 'email',
availabilityType: 'appointment',
contentRichness: 0.75,
tagline: 'Upscale companion',
usernamePattern: 'professional',
classificationConfidence: 0.85,
...overrides,
};
}
/** Create test normalization params */
function createTestNormParams(): NormalizationParams {
return {
min: {
hourlyRate: 100,
bioWordCount: 10,
serviceCount: 1,
platformCount: 1,
socialPlatformCount: 0,
},
max: {
hourlyRate: 1000,
bioWordCount: 500,
serviceCount: 15,
platformCount: 3,
socialPlatformCount: 5,
},
};
}
describe('VectorEncoder', () => {
const encoder = new VectorEncoder();
describe('encode', () => {
it('produces a vector of exactly 59 dimensions', () => {
const vector = createTestVector();
const normParams = createTestNormParams();
const encoded = encoder.encode(vector, normParams);
expect(encoded.length).toBe(59);
});
it('encodes rateTier as one-hot (5 dims)', () => {
const vector = createTestVector({ rateTier: 'premium' });
const encoded = encoder.encode(vector, createTestNormParams());
// rateTier is first 5 dims: budget, mid, premium, luxury, unknown
const rateTierSlice = encoded.slice(0, 5);
expect(rateTierSlice).toEqual([0, 0, 1, 0, 0]);
});
it('encodes bioTone as one-hot (5 dims)', () => {
const vector = createTestVector({ bioTone: 'playful' });
const encoded = encoder.encode(vector, createTestNormParams());
// bioTone starts at offset 5: professional, casual, playful, minimal, explicit
const bioToneSlice = encoded.slice(5, 10);
expect(bioToneSlice).toEqual([0, 0, 1, 0, 0]);
});
it('encodes usernamePattern as one-hot (4 dims)', () => {
const vector = createTestVector({ usernamePattern: 'suggestive' });
const encoded = encoder.encode(vector, createTestNormParams());
// usernamePattern starts at offset 10: professional, casual, suggestive, anonymous
const slice = encoded.slice(10, 14);
expect(slice).toEqual([0, 0, 1, 0]);
});
it('encodes screeningLevel as one-hot (4 dims)', () => {
const vector = createTestVector({ screeningLevel: 'strict' });
const encoded = encoder.encode(vector, createTestNormParams());
// screeningLevel starts at offset 14
const slice = encoded.slice(14, 18);
expect(slice).toEqual([1, 0, 0, 0]);
});
it('encodes preferredChannel as one-hot (4 dims)', () => {
const vector = createTestVector({ preferredChannel: 'phone' });
const encoded = encoder.encode(vector, createTestNormParams());
// preferredChannel starts at offset 18
const slice = encoded.slice(18, 22);
expect(slice).toEqual([0, 0, 1, 0]);
});
it('encodes availabilityType as one-hot (4 dims)', () => {
const vector = createTestVector({ availabilityType: 'touring' });
const encoded = encoder.encode(vector, createTestNormParams());
// availabilityType starts at offset 22
const slice = encoded.slice(22, 26);
expect(slice).toEqual([0, 0, 1, 0]);
});
it('encodes serviceCategories as multi-hot (17 dims)', () => {
const vector = createTestVector({ serviceCategories: ['companion', 'gfe', 'massage'] });
const encoded = encoder.encode(vector, createTestNormParams());
// services start at offset 26, 17 dims
const serviceSlice = encoded.slice(26, 43);
// companion (0), gfe (4), massage (6) should be 1
const companionIdx = SERVICE_CATEGORIES.indexOf('companion');
const gfeIdx = SERVICE_CATEGORIES.indexOf('gfe');
const massageIdx = SERVICE_CATEGORIES.indexOf('massage');
expect(serviceSlice[companionIdx]).toBe(1);
expect(serviceSlice[gfeIdx]).toBe(1);
expect(serviceSlice[massageIdx]).toBe(1);
// Others should be 0
const totalOnes = serviceSlice.filter((v) => v === 1).length;
expect(totalOnes).toBe(3);
});
it('encodes boolean features correctly (10 dims)', () => {
const vector = createTestVector({
hasGFE: true,
hasBDSM: false,
hasMassage: true,
hasContentCreation: false,
hasVirtualServices: false,
hasSocials: true,
hasWebsite: false,
hasOnlyFans: true,
isTouring: false,
acceptsDeposit: true,
});
const encoded = encoder.encode(vector, createTestNormParams());
// booleans start at offset 43, 10 dims
const boolSlice = encoded.slice(43, 53);
expect(boolSlice).toEqual([1, 0, 1, 0, 0, 1, 0, 1, 0, 1]);
});
it('normalizes numeric features to 0-1 range', () => {
const normParams = createTestNormParams();
const vector = createTestVector({
hourlyRate: 550, // (550-100)/(1000-100) = 0.5
bioWordCount: 255, // (255-10)/(500-10) = 0.5
serviceCount: 8, // (8-1)/(15-1) = 0.5
platformCount: 2, // (2-1)/(3-1) = 0.5
socialPlatformCount: 2.5, // (2.5-0)/(5-0) = 0.5
});
const encoded = encoder.encode(vector, normParams);
// numerics at offset 53, 5 dims
const numSlice = encoded.slice(53, 58);
expect(numSlice[0]).toBeCloseTo(0.5, 1); // hourlyRate
expect(numSlice[1]).toBeCloseTo(0.5, 1); // bioWordCount
expect(numSlice[2]).toBeCloseTo(0.5, 1); // serviceCount
expect(numSlice[3]).toBeCloseTo(0.5, 1); // platformCount
expect(numSlice[4]).toBeCloseTo(0.5, 1); // socialPlatformCount
});
it('clamps normalized values to [0, 1]', () => {
const normParams = createTestNormParams();
// hourlyRate above max (1000)
const vector = createTestVector({ hourlyRate: 2000 });
const encoded = encoder.encode(vector, normParams);
const hourlyNorm = encoded[53];
expect(hourlyNorm).toBeLessThanOrEqual(1.0);
});
it('encodes contentRichness as final dimension', () => {
const vector = createTestVector({ contentRichness: 0.42 });
const encoded = encoder.encode(vector, createTestNormParams());
expect(encoded[58]).toBeCloseTo(0.42);
});
it('handles null hourlyRate gracefully', () => {
const vector = createTestVector({ hourlyRate: null });
const encoded = encoder.encode(vector, createTestNormParams());
// null hourlyRate → 0, normalized = (0-100)/(1000-100) clamped to 0
expect(encoded[53]).toBe(0);
});
});
describe('computeNormalizationParams', () => {
it('computes min and max for numeric fields', () => {
const vectors = [
createTestVector({ hourlyRate: 200, bioWordCount: 50, serviceCount: 2, platformCount: 1, socialPlatformCount: 0 }),
createTestVector({ hourlyRate: 800, bioWordCount: 400, serviceCount: 10, platformCount: 3, socialPlatformCount: 4 }),
createTestVector({ hourlyRate: 500, bioWordCount: 150, serviceCount: 5, platformCount: 2, socialPlatformCount: 2 }),
];
const params = encoder.computeNormalizationParams(vectors);
expect(params.min.hourlyRate).toBe(200);
expect(params.max.hourlyRate).toBe(800);
expect(params.min.bioWordCount).toBe(50);
expect(params.max.bioWordCount).toBe(400);
expect(params.min.serviceCount).toBe(2);
expect(params.max.serviceCount).toBe(10);
expect(params.min.platformCount).toBe(1);
expect(params.max.platformCount).toBe(3);
expect(params.min.socialPlatformCount).toBe(0);
expect(params.max.socialPlatformCount).toBe(4);
});
it('returns zeros for empty vector array', () => {
const params = encoder.computeNormalizationParams([]);
expect(params.min.hourlyRate).toBe(0);
expect(params.max.hourlyRate).toBe(0);
});
it('handles single vector (min equals max)', () => {
const vectors = [createTestVector({ hourlyRate: 300 })];
const params = encoder.computeNormalizationParams(vectors);
expect(params.min.hourlyRate).toBe(300);
expect(params.max.hourlyRate).toBe(300);
});
});
describe('decode', () => {
it('round-trips categorical features correctly', () => {
const vector = createTestVector({
rateTier: 'luxury',
bioTone: 'explicit',
usernamePattern: 'anonymous',
screeningLevel: 'strict',
preferredChannel: 'text',
availabilityType: 'flexible',
});
const encoded = encoder.encode(vector, createTestNormParams());
const decoded = encoder.decode(encoded);
expect(decoded.rateTier).toBe('luxury');
expect(decoded.bioTone).toBe('explicit');
expect(decoded.usernamePattern).toBe('anonymous');
expect(decoded.screeningLevel).toBe('strict');
expect(decoded.preferredChannel).toBe('text');
expect(decoded.availabilityType).toBe('flexible');
});
it('round-trips service categories correctly', () => {
const services = ['bdsm_domme', 'fetish', 'content_creation'] as const;
const vector = createTestVector({ serviceCategories: [...services] });
const encoded = encoder.encode(vector, createTestNormParams());
const decoded = encoder.decode(encoded);
expect(decoded.serviceCategories).toEqual(expect.arrayContaining([...services]));
expect(decoded.serviceCategories?.length).toBe(3);
});
it('round-trips boolean features correctly', () => {
const vector = createTestVector({
hasGFE: false,
hasBDSM: true,
hasMassage: false,
hasContentCreation: true,
hasVirtualServices: true,
hasSocials: false,
hasWebsite: true,
hasOnlyFans: false,
isTouring: true,
acceptsDeposit: false,
});
const encoded = encoder.encode(vector, createTestNormParams());
const decoded = encoder.decode(encoded);
expect(decoded.hasGFE).toBe(false);
expect(decoded.hasBDSM).toBe(true);
expect(decoded.hasMassage).toBe(false);
expect(decoded.hasContentCreation).toBe(true);
expect(decoded.hasVirtualServices).toBe(true);
expect(decoded.hasSocials).toBe(false);
expect(decoded.hasWebsite).toBe(true);
expect(decoded.hasOnlyFans).toBe(false);
expect(decoded.isTouring).toBe(true);
expect(decoded.acceptsDeposit).toBe(false);
});
it('recovers contentRichness', () => {
const vector = createTestVector({ contentRichness: 0.88 });
const encoded = encoder.encode(vector, createTestNormParams());
const decoded = encoder.decode(encoded);
expect(decoded.contentRichness).toBeCloseTo(0.88);
});
});
describe('getDimensionLabels', () => {
it('returns exactly 59 labels', () => {
const labels = encoder.getDimensionLabels();
expect(labels.length).toBe(59);
});
it('starts with rateTier labels', () => {
const labels = encoder.getDimensionLabels();
expect(labels[0]).toBe('rateTier:budget');
expect(labels[4]).toBe('rateTier:unknown');
});
it('contains service labels', () => {
const labels = encoder.getDimensionLabels();
expect(labels).toContain('service:companion');
expect(labels).toContain('service:fbsm');
});
it('contains boolean labels', () => {
const labels = encoder.getDimensionLabels();
expect(labels).toContain('bool:hasGFE');
expect(labels).toContain('bool:acceptsDeposit');
});
it('contains numeric labels', () => {
const labels = encoder.getDimensionLabels();
expect(labels).toContain('num:hourlyRate');
expect(labels).toContain('num:socialPlatformCount');
});
it('ends with contentRichness', () => {
const labels = encoder.getDimensionLabels();
expect(labels[labels.length - 1]).toBe('contentRichness');
});
});
});