359 lines
13 KiB
TypeScript
359 lines
13 KiB
TypeScript
/**
|
|
* VectorEncoder Tests
|
|
* Validates encoding, decoding, normalization, and dimension labeling
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
import { VectorEncoder } from '../../src/analysis/vector-encoder';
|
|
import type { NormalizationParams, ProviderFeatureVector } from '../../src/types';
|
|
import { SERVICE_CATEGORIES } from '../../src/config/constants';
|
|
|
|
/** Create a complete test feature vector with sensible defaults */
|
|
function createTestVector(overrides: Partial<ProviderFeatureVector> = {}): ProviderFeatureVector {
|
|
return {
|
|
rateTier: 'mid',
|
|
hourlyRate: 400,
|
|
hasMultiHourDiscount: true,
|
|
acceptsDeposit: true,
|
|
bioTone: 'professional',
|
|
bioLength: 'medium',
|
|
bioWordCount: 250,
|
|
serviceCount: 5,
|
|
serviceCategories: ['companion', 'gfe', 'dinner_date'],
|
|
hasGFE: true,
|
|
hasBDSM: false,
|
|
hasMassage: false,
|
|
hasContentCreation: false,
|
|
hasVirtualServices: false,
|
|
platform: 'tryst',
|
|
platformCount: 2,
|
|
verificationStatus: 'verified',
|
|
city: 'Los Angeles',
|
|
isTouring: false,
|
|
hasSocials: true,
|
|
hasWebsite: true,
|
|
hasOnlyFans: false,
|
|
socialPlatformCount: 3,
|
|
physicalCharacteristics: { height: "5'6\"", build: 'athletic' },
|
|
screeningLevel: 'moderate',
|
|
preferredChannel: 'email',
|
|
availabilityType: 'appointment',
|
|
contentRichness: 0.75,
|
|
tagline: 'Upscale companion',
|
|
usernamePattern: 'professional',
|
|
classificationConfidence: 0.85,
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
/** Create test normalization params */
|
|
function createTestNormParams(): NormalizationParams {
|
|
return {
|
|
min: {
|
|
hourlyRate: 100,
|
|
bioWordCount: 10,
|
|
serviceCount: 1,
|
|
platformCount: 1,
|
|
socialPlatformCount: 0,
|
|
},
|
|
max: {
|
|
hourlyRate: 1000,
|
|
bioWordCount: 500,
|
|
serviceCount: 15,
|
|
platformCount: 3,
|
|
socialPlatformCount: 5,
|
|
},
|
|
};
|
|
}
|
|
|
|
describe('VectorEncoder', () => {
|
|
const encoder = new VectorEncoder();
|
|
|
|
describe('encode', () => {
|
|
it('produces a vector of exactly 59 dimensions', () => {
|
|
const vector = createTestVector();
|
|
const normParams = createTestNormParams();
|
|
const encoded = encoder.encode(vector, normParams);
|
|
expect(encoded.length).toBe(59);
|
|
});
|
|
|
|
it('encodes rateTier as one-hot (5 dims)', () => {
|
|
const vector = createTestVector({ rateTier: 'premium' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// rateTier is first 5 dims: budget, mid, premium, luxury, unknown
|
|
const rateTierSlice = encoded.slice(0, 5);
|
|
expect(rateTierSlice).toEqual([0, 0, 1, 0, 0]);
|
|
});
|
|
|
|
it('encodes bioTone as one-hot (5 dims)', () => {
|
|
const vector = createTestVector({ bioTone: 'playful' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// bioTone starts at offset 5: professional, casual, playful, minimal, explicit
|
|
const bioToneSlice = encoded.slice(5, 10);
|
|
expect(bioToneSlice).toEqual([0, 0, 1, 0, 0]);
|
|
});
|
|
|
|
it('encodes usernamePattern as one-hot (4 dims)', () => {
|
|
const vector = createTestVector({ usernamePattern: 'suggestive' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// usernamePattern starts at offset 10: professional, casual, suggestive, anonymous
|
|
const slice = encoded.slice(10, 14);
|
|
expect(slice).toEqual([0, 0, 1, 0]);
|
|
});
|
|
|
|
it('encodes screeningLevel as one-hot (4 dims)', () => {
|
|
const vector = createTestVector({ screeningLevel: 'strict' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// screeningLevel starts at offset 14
|
|
const slice = encoded.slice(14, 18);
|
|
expect(slice).toEqual([1, 0, 0, 0]);
|
|
});
|
|
|
|
it('encodes preferredChannel as one-hot (4 dims)', () => {
|
|
const vector = createTestVector({ preferredChannel: 'phone' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// preferredChannel starts at offset 18
|
|
const slice = encoded.slice(18, 22);
|
|
expect(slice).toEqual([0, 0, 1, 0]);
|
|
});
|
|
|
|
it('encodes availabilityType as one-hot (4 dims)', () => {
|
|
const vector = createTestVector({ availabilityType: 'touring' });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// availabilityType starts at offset 22
|
|
const slice = encoded.slice(22, 26);
|
|
expect(slice).toEqual([0, 0, 1, 0]);
|
|
});
|
|
|
|
it('encodes serviceCategories as multi-hot (17 dims)', () => {
|
|
const vector = createTestVector({ serviceCategories: ['companion', 'gfe', 'massage'] });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// services start at offset 26, 17 dims
|
|
const serviceSlice = encoded.slice(26, 43);
|
|
|
|
// companion (0), gfe (4), massage (6) should be 1
|
|
const companionIdx = SERVICE_CATEGORIES.indexOf('companion');
|
|
const gfeIdx = SERVICE_CATEGORIES.indexOf('gfe');
|
|
const massageIdx = SERVICE_CATEGORIES.indexOf('massage');
|
|
|
|
expect(serviceSlice[companionIdx]).toBe(1);
|
|
expect(serviceSlice[gfeIdx]).toBe(1);
|
|
expect(serviceSlice[massageIdx]).toBe(1);
|
|
|
|
// Others should be 0
|
|
const totalOnes = serviceSlice.filter((v) => v === 1).length;
|
|
expect(totalOnes).toBe(3);
|
|
});
|
|
|
|
it('encodes boolean features correctly (10 dims)', () => {
|
|
const vector = createTestVector({
|
|
hasGFE: true,
|
|
hasBDSM: false,
|
|
hasMassage: true,
|
|
hasContentCreation: false,
|
|
hasVirtualServices: false,
|
|
hasSocials: true,
|
|
hasWebsite: false,
|
|
hasOnlyFans: true,
|
|
isTouring: false,
|
|
acceptsDeposit: true,
|
|
});
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
|
|
// booleans start at offset 43, 10 dims
|
|
const boolSlice = encoded.slice(43, 53);
|
|
expect(boolSlice).toEqual([1, 0, 1, 0, 0, 1, 0, 1, 0, 1]);
|
|
});
|
|
|
|
it('normalizes numeric features to 0-1 range', () => {
|
|
const normParams = createTestNormParams();
|
|
const vector = createTestVector({
|
|
hourlyRate: 550, // (550-100)/(1000-100) = 0.5
|
|
bioWordCount: 255, // (255-10)/(500-10) = 0.5
|
|
serviceCount: 8, // (8-1)/(15-1) = 0.5
|
|
platformCount: 2, // (2-1)/(3-1) = 0.5
|
|
socialPlatformCount: 2.5, // (2.5-0)/(5-0) = 0.5
|
|
});
|
|
|
|
const encoded = encoder.encode(vector, normParams);
|
|
// numerics at offset 53, 5 dims
|
|
const numSlice = encoded.slice(53, 58);
|
|
|
|
expect(numSlice[0]).toBeCloseTo(0.5, 1); // hourlyRate
|
|
expect(numSlice[1]).toBeCloseTo(0.5, 1); // bioWordCount
|
|
expect(numSlice[2]).toBeCloseTo(0.5, 1); // serviceCount
|
|
expect(numSlice[3]).toBeCloseTo(0.5, 1); // platformCount
|
|
expect(numSlice[4]).toBeCloseTo(0.5, 1); // socialPlatformCount
|
|
});
|
|
|
|
it('clamps normalized values to [0, 1]', () => {
|
|
const normParams = createTestNormParams();
|
|
// hourlyRate above max (1000)
|
|
const vector = createTestVector({ hourlyRate: 2000 });
|
|
const encoded = encoder.encode(vector, normParams);
|
|
const hourlyNorm = encoded[53];
|
|
expect(hourlyNorm).toBeLessThanOrEqual(1.0);
|
|
});
|
|
|
|
it('encodes contentRichness as final dimension', () => {
|
|
const vector = createTestVector({ contentRichness: 0.42 });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
expect(encoded[58]).toBeCloseTo(0.42);
|
|
});
|
|
|
|
it('handles null hourlyRate gracefully', () => {
|
|
const vector = createTestVector({ hourlyRate: null });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
// null hourlyRate → 0, normalized = (0-100)/(1000-100) clamped to 0
|
|
expect(encoded[53]).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe('computeNormalizationParams', () => {
|
|
it('computes min and max for numeric fields', () => {
|
|
const vectors = [
|
|
createTestVector({ hourlyRate: 200, bioWordCount: 50, serviceCount: 2, platformCount: 1, socialPlatformCount: 0 }),
|
|
createTestVector({ hourlyRate: 800, bioWordCount: 400, serviceCount: 10, platformCount: 3, socialPlatformCount: 4 }),
|
|
createTestVector({ hourlyRate: 500, bioWordCount: 150, serviceCount: 5, platformCount: 2, socialPlatformCount: 2 }),
|
|
];
|
|
|
|
const params = encoder.computeNormalizationParams(vectors);
|
|
|
|
expect(params.min.hourlyRate).toBe(200);
|
|
expect(params.max.hourlyRate).toBe(800);
|
|
expect(params.min.bioWordCount).toBe(50);
|
|
expect(params.max.bioWordCount).toBe(400);
|
|
expect(params.min.serviceCount).toBe(2);
|
|
expect(params.max.serviceCount).toBe(10);
|
|
expect(params.min.platformCount).toBe(1);
|
|
expect(params.max.platformCount).toBe(3);
|
|
expect(params.min.socialPlatformCount).toBe(0);
|
|
expect(params.max.socialPlatformCount).toBe(4);
|
|
});
|
|
|
|
it('returns zeros for empty vector array', () => {
|
|
const params = encoder.computeNormalizationParams([]);
|
|
expect(params.min.hourlyRate).toBe(0);
|
|
expect(params.max.hourlyRate).toBe(0);
|
|
});
|
|
|
|
it('handles single vector (min equals max)', () => {
|
|
const vectors = [createTestVector({ hourlyRate: 300 })];
|
|
const params = encoder.computeNormalizationParams(vectors);
|
|
expect(params.min.hourlyRate).toBe(300);
|
|
expect(params.max.hourlyRate).toBe(300);
|
|
});
|
|
});
|
|
|
|
describe('decode', () => {
|
|
it('round-trips categorical features correctly', () => {
|
|
const vector = createTestVector({
|
|
rateTier: 'luxury',
|
|
bioTone: 'explicit',
|
|
usernamePattern: 'anonymous',
|
|
screeningLevel: 'strict',
|
|
preferredChannel: 'text',
|
|
availabilityType: 'flexible',
|
|
});
|
|
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
const decoded = encoder.decode(encoded);
|
|
|
|
expect(decoded.rateTier).toBe('luxury');
|
|
expect(decoded.bioTone).toBe('explicit');
|
|
expect(decoded.usernamePattern).toBe('anonymous');
|
|
expect(decoded.screeningLevel).toBe('strict');
|
|
expect(decoded.preferredChannel).toBe('text');
|
|
expect(decoded.availabilityType).toBe('flexible');
|
|
});
|
|
|
|
it('round-trips service categories correctly', () => {
|
|
const services = ['bdsm_domme', 'fetish', 'content_creation'] as const;
|
|
const vector = createTestVector({ serviceCategories: [...services] });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
const decoded = encoder.decode(encoded);
|
|
|
|
expect(decoded.serviceCategories).toEqual(expect.arrayContaining([...services]));
|
|
expect(decoded.serviceCategories?.length).toBe(3);
|
|
});
|
|
|
|
it('round-trips boolean features correctly', () => {
|
|
const vector = createTestVector({
|
|
hasGFE: false,
|
|
hasBDSM: true,
|
|
hasMassage: false,
|
|
hasContentCreation: true,
|
|
hasVirtualServices: true,
|
|
hasSocials: false,
|
|
hasWebsite: true,
|
|
hasOnlyFans: false,
|
|
isTouring: true,
|
|
acceptsDeposit: false,
|
|
});
|
|
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
const decoded = encoder.decode(encoded);
|
|
|
|
expect(decoded.hasGFE).toBe(false);
|
|
expect(decoded.hasBDSM).toBe(true);
|
|
expect(decoded.hasMassage).toBe(false);
|
|
expect(decoded.hasContentCreation).toBe(true);
|
|
expect(decoded.hasVirtualServices).toBe(true);
|
|
expect(decoded.hasSocials).toBe(false);
|
|
expect(decoded.hasWebsite).toBe(true);
|
|
expect(decoded.hasOnlyFans).toBe(false);
|
|
expect(decoded.isTouring).toBe(true);
|
|
expect(decoded.acceptsDeposit).toBe(false);
|
|
});
|
|
|
|
it('recovers contentRichness', () => {
|
|
const vector = createTestVector({ contentRichness: 0.88 });
|
|
const encoded = encoder.encode(vector, createTestNormParams());
|
|
const decoded = encoder.decode(encoded);
|
|
expect(decoded.contentRichness).toBeCloseTo(0.88);
|
|
});
|
|
});
|
|
|
|
describe('getDimensionLabels', () => {
|
|
it('returns exactly 59 labels', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels.length).toBe(59);
|
|
});
|
|
|
|
it('starts with rateTier labels', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels[0]).toBe('rateTier:budget');
|
|
expect(labels[4]).toBe('rateTier:unknown');
|
|
});
|
|
|
|
it('contains service labels', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels).toContain('service:companion');
|
|
expect(labels).toContain('service:fbsm');
|
|
});
|
|
|
|
it('contains boolean labels', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels).toContain('bool:hasGFE');
|
|
expect(labels).toContain('bool:acceptsDeposit');
|
|
});
|
|
|
|
it('contains numeric labels', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels).toContain('num:hourlyRate');
|
|
expect(labels).toContain('num:socialPlatformCount');
|
|
});
|
|
|
|
it('ends with contentRichness', () => {
|
|
const labels = encoder.getDimensionLabels();
|
|
expect(labels[labels.length - 1]).toBe('contentRichness');
|
|
});
|
|
});
|
|
});
|