import { describe, it, expect } from 'vitest'; import { matchRecords, parseName, normalizeString, normalizeStreetType, levenshteinDistance, similarityScore, getConfigForPropertyType, } from '../src/matcher.service'; import { Address } from '../src/types'; const baselineAddress: Address = { streetNumber: '123', streetName: 'main', streetType: 'st', unit: 'apt 4b', city: 'springfield', state: 'IL', zip: '62701', latitude: 39.7817, longitude: -89.6501, }; describe('levenshteinDistance', () => { it('returns 0 for identical strings', () => { expect(levenshteinDistance('hello', 'hello')).toBe(0); }); it('computes distance for different strings', () => { expect(levenshteinDistance('kitten', 'sitting')).toBe(3); }); it('handles empty strings', () => { expect(levenshteinDistance('', 'hello')).toBe(5); expect(levenshteinDistance('hello', '')).toBe(5); }); it('handles single character differences', () => { expect(levenshteinDistance('cat', 'bat')).toBe(1); }); }); describe('similarityScore', () => { it('returns 1.0 for zero distance', () => { expect(similarityScore(0, 5)).toBe(1.0); }); it('returns 0.0 when distance equals max length', () => { expect(similarityScore(5, 5)).toBe(0.0); }); it('returns 1.0 for empty strings', () => { expect(similarityScore(0, 0)).toBe(1.0); }); }); describe('normalizeString', () => { it('lowercases and trims', () => { expect(normalizeString(' Hello World ')).toBe('hello world'); }); it('removes special characters', () => { expect(normalizeString('O\'Brien-Jr!')).toBe('obrien jr'); }); it('collapses multiple spaces', () => { expect(normalizeString('John Doe')).toBe('john doe'); }); }); describe('parseName', () => { it('parses first and last name', () => { const tokens = parseName('John Doe'); expect(tokens.firstName).toBe('john'); expect(tokens.lastName).toBe('doe'); expect(tokens.middleName).toBe(''); }); it('parses name with middle name', () => { const tokens = parseName('John Robert Doe'); expect(tokens.firstName).toBe('john'); expect(tokens.lastName).toBe('doe'); expect(tokens.middleName).toBe('robert'); }); it('strips prefixes', () => { const tokens = parseName('Dr. John Doe'); expect(tokens.firstName).toBe('john'); expect(tokens.lastName).toBe('doe'); }); it('strips suffixes', () => { const tokens = parseName('John Doe Jr'); expect(tokens.firstName).toBe('john'); expect(tokens.lastName).toBe('doe'); }); it('handles single name', () => { const tokens = parseName('Madonna'); expect(tokens.lastName).toBe('madonna'); expect(tokens.firstName).toBe(''); }); it('extracts initials from middle names', () => { const tokens = parseName('John M Doe'); expect(tokens.initials).toContain('m'); }); it('handles empty name', () => { const tokens = parseName(''); expect(tokens.firstName).toBe(''); expect(tokens.lastName).toBe(''); expect(tokens.middleName).toBe(''); }); }); describe('normalizeStreetType', () => { it('expands abbreviations', () => { expect(normalizeStreetType('st')).toBe('street'); expect(normalizeStreetType('ave')).toBe('avenue'); expect(normalizeStreetType('blvd')).toBe('boulevard'); expect(normalizeStreetType('ct')).toBe('court'); expect(normalizeStreetType('ln')).toBe('lane'); expect(normalizeStreetType('dr')).toBe('drive'); }); it('normalizes full names', () => { expect(normalizeStreetType('Street')).toBe('street'); expect(normalizeStreetType('Avenue')).toBe('avenue'); }); it('passes through unknown types', () => { expect(normalizeStreetType('way')).toBe('way'); }); }); describe('matchRecords', () => { it('matches identical records with high confidence', () => { const result = matchRecords( 'John Doe', { ...baselineAddress }, 'John Doe', { ...baselineAddress }, ); expect(result.nameScore).toBeCloseTo(1.0, 2); expect(result.addressScore).toBeGreaterThan(0.95); expect(result.isMatch).toBe(true); }); it('matches names with different prefixes', () => { const result = matchRecords( 'Dr. John Doe', { ...baselineAddress }, 'John Doe', { ...baselineAddress }, ); expect(result.nameScore).toBeGreaterThan(0.8); expect(result.isMatch).toBe(true); }); it('matches names with different suffixes', () => { const result = matchRecords( 'John Doe Jr', { ...baselineAddress }, 'John Doe', { ...baselineAddress }, ); expect(result.nameScore).toBeGreaterThan(0.8); }); it('matches names with typos via Levenshtein', () => { const result = matchRecords( 'Jhon Doe', { ...baselineAddress }, 'John Doe', { ...baselineAddress }, ); expect(result.nameScore).toBeGreaterThan(0.7); expect(result.details.levenshteinDistance).toBeGreaterThan(0); }); it('handles middle initial matching', () => { const result = matchRecords( 'John M Doe', { ...baselineAddress }, 'John Michael Doe', { ...baselineAddress }, ); expect(result.nameScore).toBeGreaterThan(0.7); }); it('matches addresses with different street type formats', () => { const addrA: Address = { ...baselineAddress, streetType: 'st' }; const addrB: Address = { ...baselineAddress, streetType: 'street' }; const result = matchRecords('John Doe', addrA, 'John Doe', addrB); expect(result.addressScore).toBeGreaterThan(0.9); }); it('uses geocoding proximity when coordinates available', () => { const addrA: Address = { ...baselineAddress, latitude: 39.7817, longitude: -89.6501, }; const addrB: Address = { ...baselineAddress, latitude: 39.782, longitude: -89.6505, }; const result = matchRecords('John Doe', addrA, 'John Doe', addrB); expect(result.details.geocodingDistance).toBeDefined(); expect(result.details.geocodingDistance!).toBeLessThan(100); }); it('returns false for completely different records', () => { const result = matchRecords( 'John Doe', baselineAddress, 'Jane Smith', { streetNumber: '999', streetName: 'oak', streetType: 'ave', city: 'chicago', state: 'IL', zip: '60601', }, ); expect(result.isMatch).toBe(false); }); it('provides detailed field-level match info', () => { const result = matchRecords( 'John Doe', baselineAddress, 'John Doe', baselineAddress, ); expect(result.details.fields.firstName.score).toBe(1.0); expect(result.details.fields.lastName.score).toBe(1.0); expect(result.details.fields.streetNumber.score).toBe(1.0); }); it('reports normalized address strings', () => { const result = matchRecords( 'John Doe', baselineAddress, 'John Doe', baselineAddress, ); expect(result.details.addressNormalized[0]).toBe(result.details.addressNormalized[1]); }); }); describe('getConfigForPropertyType', () => { it('returns residential config with higher thresholds', () => { const config = getConfigForPropertyType('residential'); expect(config.nameThreshold).toBe(0.85); expect(config.addressThreshold).toBe(0.9); }); it('returns commercial config with lower name threshold', () => { const config = getConfigForPropertyType('commercial'); expect(config.nameThreshold).toBe(0.8); }); it('returns land config with lower address threshold', () => { const config = getConfigForPropertyType('land'); expect(config.addressThreshold).toBe(0.85); }); });