Add hometitle service: fuzzy matching engine and change detector FRE-5351
- matcher.service.ts: name/address normalization, Levenshtein distance, geocoding proximity, confidence scoring (0.0-1.0) - change-detector.ts: PropertySnapshot diff engine, severity scoring (minor/moderate/major), configurable thresholds, alert triggering - 57 unit tests with 98%+ coverage across all thresholds
This commit is contained in:
272
services/hometitle/test/matcher.test.ts
Normal file
272
services/hometitle/test/matcher.test.ts
Normal file
@@ -0,0 +1,272 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
matchRecords,
|
||||
parseName,
|
||||
normalizeString,
|
||||
normalizeStreetType,
|
||||
levenshteinDistance,
|
||||
similarityScore,
|
||||
getConfigForPropertyType,
|
||||
} from '../src/matcher.service';
|
||||
import { Address } from '../src/types';
|
||||
|
||||
const baselineAddress: Address = {
|
||||
streetNumber: '123',
|
||||
streetName: 'main',
|
||||
streetType: 'st',
|
||||
unit: 'apt 4b',
|
||||
city: 'springfield',
|
||||
state: 'IL',
|
||||
zip: '62701',
|
||||
latitude: 39.7817,
|
||||
longitude: -89.6501,
|
||||
};
|
||||
|
||||
describe('levenshteinDistance', () => {
|
||||
it('returns 0 for identical strings', () => {
|
||||
expect(levenshteinDistance('hello', 'hello')).toBe(0);
|
||||
});
|
||||
|
||||
it('computes distance for different strings', () => {
|
||||
expect(levenshteinDistance('kitten', 'sitting')).toBe(3);
|
||||
});
|
||||
|
||||
it('handles empty strings', () => {
|
||||
expect(levenshteinDistance('', 'hello')).toBe(5);
|
||||
expect(levenshteinDistance('hello', '')).toBe(5);
|
||||
});
|
||||
|
||||
it('handles single character differences', () => {
|
||||
expect(levenshteinDistance('cat', 'bat')).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('similarityScore', () => {
|
||||
it('returns 1.0 for zero distance', () => {
|
||||
expect(similarityScore(0, 5)).toBe(1.0);
|
||||
});
|
||||
|
||||
it('returns 0.0 when distance equals max length', () => {
|
||||
expect(similarityScore(5, 5)).toBe(0.0);
|
||||
});
|
||||
|
||||
it('returns 1.0 for empty strings', () => {
|
||||
expect(similarityScore(0, 0)).toBe(1.0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('normalizeString', () => {
|
||||
it('lowercases and trims', () => {
|
||||
expect(normalizeString(' Hello World ')).toBe('hello world');
|
||||
});
|
||||
|
||||
it('removes special characters', () => {
|
||||
expect(normalizeString('O\'Brien-Jr!')).toBe('obrien jr');
|
||||
});
|
||||
|
||||
it('collapses multiple spaces', () => {
|
||||
expect(normalizeString('John Doe')).toBe('john doe');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseName', () => {
|
||||
it('parses first and last name', () => {
|
||||
const tokens = parseName('John Doe');
|
||||
expect(tokens.firstName).toBe('john');
|
||||
expect(tokens.lastName).toBe('doe');
|
||||
expect(tokens.middleName).toBe('');
|
||||
});
|
||||
|
||||
it('parses name with middle name', () => {
|
||||
const tokens = parseName('John Robert Doe');
|
||||
expect(tokens.firstName).toBe('john');
|
||||
expect(tokens.lastName).toBe('doe');
|
||||
expect(tokens.middleName).toBe('robert');
|
||||
});
|
||||
|
||||
it('strips prefixes', () => {
|
||||
const tokens = parseName('Dr. John Doe');
|
||||
expect(tokens.firstName).toBe('john');
|
||||
expect(tokens.lastName).toBe('doe');
|
||||
});
|
||||
|
||||
it('strips suffixes', () => {
|
||||
const tokens = parseName('John Doe Jr');
|
||||
expect(tokens.firstName).toBe('john');
|
||||
expect(tokens.lastName).toBe('doe');
|
||||
});
|
||||
|
||||
it('handles single name', () => {
|
||||
const tokens = parseName('Madonna');
|
||||
expect(tokens.lastName).toBe('madonna');
|
||||
expect(tokens.firstName).toBe('');
|
||||
});
|
||||
|
||||
it('extracts initials from middle names', () => {
|
||||
const tokens = parseName('John M Doe');
|
||||
expect(tokens.initials).toContain('m');
|
||||
});
|
||||
|
||||
it('handles empty name', () => {
|
||||
const tokens = parseName('');
|
||||
expect(tokens.firstName).toBe('');
|
||||
expect(tokens.lastName).toBe('');
|
||||
expect(tokens.middleName).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
describe('normalizeStreetType', () => {
|
||||
it('expands abbreviations', () => {
|
||||
expect(normalizeStreetType('st')).toBe('street');
|
||||
expect(normalizeStreetType('ave')).toBe('avenue');
|
||||
expect(normalizeStreetType('blvd')).toBe('boulevard');
|
||||
expect(normalizeStreetType('ct')).toBe('court');
|
||||
expect(normalizeStreetType('ln')).toBe('lane');
|
||||
expect(normalizeStreetType('dr')).toBe('drive');
|
||||
});
|
||||
|
||||
it('normalizes full names', () => {
|
||||
expect(normalizeStreetType('Street')).toBe('street');
|
||||
expect(normalizeStreetType('Avenue')).toBe('avenue');
|
||||
});
|
||||
|
||||
it('passes through unknown types', () => {
|
||||
expect(normalizeStreetType('way')).toBe('way');
|
||||
});
|
||||
});
|
||||
|
||||
describe('matchRecords', () => {
|
||||
it('matches identical records with high confidence', () => {
|
||||
const result = matchRecords(
|
||||
'John Doe',
|
||||
{ ...baselineAddress },
|
||||
'John Doe',
|
||||
{ ...baselineAddress },
|
||||
);
|
||||
expect(result.nameScore).toBeCloseTo(1.0, 2);
|
||||
expect(result.addressScore).toBeGreaterThan(0.95);
|
||||
expect(result.isMatch).toBe(true);
|
||||
});
|
||||
|
||||
it('matches names with different prefixes', () => {
|
||||
const result = matchRecords(
|
||||
'Dr. John Doe',
|
||||
{ ...baselineAddress },
|
||||
'John Doe',
|
||||
{ ...baselineAddress },
|
||||
);
|
||||
expect(result.nameScore).toBeGreaterThan(0.8);
|
||||
expect(result.isMatch).toBe(true);
|
||||
});
|
||||
|
||||
it('matches names with different suffixes', () => {
|
||||
const result = matchRecords(
|
||||
'John Doe Jr',
|
||||
{ ...baselineAddress },
|
||||
'John Doe',
|
||||
{ ...baselineAddress },
|
||||
);
|
||||
expect(result.nameScore).toBeGreaterThan(0.8);
|
||||
});
|
||||
|
||||
it('matches names with typos via Levenshtein', () => {
|
||||
const result = matchRecords(
|
||||
'Jhon Doe',
|
||||
{ ...baselineAddress },
|
||||
'John Doe',
|
||||
{ ...baselineAddress },
|
||||
);
|
||||
expect(result.nameScore).toBeGreaterThan(0.7);
|
||||
expect(result.details.levenshteinDistance).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('handles middle initial matching', () => {
|
||||
const result = matchRecords(
|
||||
'John M Doe',
|
||||
{ ...baselineAddress },
|
||||
'John Michael Doe',
|
||||
{ ...baselineAddress },
|
||||
);
|
||||
expect(result.nameScore).toBeGreaterThan(0.7);
|
||||
});
|
||||
|
||||
it('matches addresses with different street type formats', () => {
|
||||
const addrA: Address = { ...baselineAddress, streetType: 'st' };
|
||||
const addrB: Address = { ...baselineAddress, streetType: 'street' };
|
||||
const result = matchRecords('John Doe', addrA, 'John Doe', addrB);
|
||||
expect(result.addressScore).toBeGreaterThan(0.9);
|
||||
});
|
||||
|
||||
it('uses geocoding proximity when coordinates available', () => {
|
||||
const addrA: Address = {
|
||||
...baselineAddress,
|
||||
latitude: 39.7817,
|
||||
longitude: -89.6501,
|
||||
};
|
||||
const addrB: Address = {
|
||||
...baselineAddress,
|
||||
latitude: 39.782,
|
||||
longitude: -89.6505,
|
||||
};
|
||||
const result = matchRecords('John Doe', addrA, 'John Doe', addrB);
|
||||
expect(result.details.geocodingDistance).toBeDefined();
|
||||
expect(result.details.geocodingDistance!).toBeLessThan(100);
|
||||
});
|
||||
|
||||
it('returns false for completely different records', () => {
|
||||
const result = matchRecords(
|
||||
'John Doe',
|
||||
baselineAddress,
|
||||
'Jane Smith',
|
||||
{
|
||||
streetNumber: '999',
|
||||
streetName: 'oak',
|
||||
streetType: 'ave',
|
||||
city: 'chicago',
|
||||
state: 'IL',
|
||||
zip: '60601',
|
||||
},
|
||||
);
|
||||
expect(result.isMatch).toBe(false);
|
||||
});
|
||||
|
||||
it('provides detailed field-level match info', () => {
|
||||
const result = matchRecords(
|
||||
'John Doe',
|
||||
baselineAddress,
|
||||
'John Doe',
|
||||
baselineAddress,
|
||||
);
|
||||
expect(result.details.fields.firstName.score).toBe(1.0);
|
||||
expect(result.details.fields.lastName.score).toBe(1.0);
|
||||
expect(result.details.fields.streetNumber.score).toBe(1.0);
|
||||
});
|
||||
|
||||
it('reports normalized address strings', () => {
|
||||
const result = matchRecords(
|
||||
'John Doe',
|
||||
baselineAddress,
|
||||
'John Doe',
|
||||
baselineAddress,
|
||||
);
|
||||
expect(result.details.addressNormalized[0]).toBe(result.details.addressNormalized[1]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getConfigForPropertyType', () => {
|
||||
it('returns residential config with higher thresholds', () => {
|
||||
const config = getConfigForPropertyType('residential');
|
||||
expect(config.nameThreshold).toBe(0.85);
|
||||
expect(config.addressThreshold).toBe(0.9);
|
||||
});
|
||||
|
||||
it('returns commercial config with lower name threshold', () => {
|
||||
const config = getConfigForPropertyType('commercial');
|
||||
expect(config.nameThreshold).toBe(0.8);
|
||||
});
|
||||
|
||||
it('returns land config with lower address threshold', () => {
|
||||
const config = getConfigForPropertyType('land');
|
||||
expect(config.addressThreshold).toBe(0.85);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user