Add hometitle service: fuzzy matching engine and change detector FRE-5351

- matcher.service.ts: name/address normalization, Levenshtein distance,
  geocoding proximity, confidence scoring (0.0-1.0)
- change-detector.ts: PropertySnapshot diff engine, severity scoring
  (minor/moderate/major), configurable thresholds, alert triggering
- 57 unit tests with 98%+ coverage across all thresholds
This commit is contained in:
2026-05-14 09:09:23 -04:00
parent 1b917321cf
commit 74949d9bcc
35 changed files with 7716 additions and 0 deletions

View File

@@ -0,0 +1,309 @@
import {
Address,
MatchResult,
MatchDetails,
FieldMatch,
MatchingConfig,
NormalizedTokens,
PropertyType,
} from './types';
const DEFAULT_CONFIG: MatchingConfig = {
nameThreshold: 0.85,
addressThreshold: 0.9,
overallThreshold: 0.85,
geocodingRadiusMeters: 100,
};
const COMMON_PREFIXES = new Set([
'mr', 'mrs', 'ms', 'miss', 'dr', 'prof', 'jr', 'sr', 'junior', 'senior',
'ii', 'iii', 'iv', 'rev', 'st', 'hon', 'esq',
]);
const COMMON_SUFFIXES = new Set([
'jr', 'sr', 'junior', 'senior', 'ii', 'iii', 'iv', 'v', 'esq',
'phd', 'md', 'llm', 'cpa',
]);
const STREET_TYPE_MAP: Record<string, string> = {
'st': 'street', 'street': 'street',
'ave': 'avenue', 'avenue': 'avenue',
'blvd': 'boulevard', 'boulevard': 'boulevard',
'dr': 'drive', 'drive': 'drive',
'ln': 'lane', 'lane': 'lane',
'ct': 'court', 'court': 'court',
'pl': 'place', 'place': 'place',
'rd': 'road', 'road': 'road',
'way': 'way',
'trl': 'trail', 'trail': 'trail',
'hwy': 'highway', 'highway': 'highway',
'pkwy': 'parkway', 'parkway': 'parkway',
'cir': 'circle', 'circle': 'circle',
'sq': 'square', 'square': 'square',
'ter': 'terrace', 'terrace': 'terrace',
};
const PROPERTY_TYPE_CONFIGS: Record<PropertyType, Partial<MatchingConfig>> = {
'residential': { nameThreshold: 0.85, addressThreshold: 0.9 },
'commercial': { nameThreshold: 0.8, addressThreshold: 0.9 },
'land': { nameThreshold: 0.8, addressThreshold: 0.85 },
'multi-family': { nameThreshold: 0.8, addressThreshold: 0.9 },
};
function levenshteinDistance(a: string, b: string): number {
const matrix: number[][] = Array.from({ length: b.length + 1 }, (_, i) =>
Array.from({ length: a.length + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0))
);
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
const cost = a[j - 1] === b[i - 1] ? 0 : 1;
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + cost,
);
}
}
return matrix[b.length][a.length];
}
function similarityScore(distance: number, maxLen: number): number {
if (maxLen === 0) return 1.0;
return 1.0 - distance / maxLen;
}
function normalizeString(str: string): string {
return str
.toLowerCase()
.replace(/[''']/g, '')
.replace(/[^a-z0-9\s]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function parseName(name: string): NormalizedTokens {
const clean = normalizeString(name);
const parts = clean.split(' ').filter(Boolean);
let firstName = '';
let lastName = '';
let middleName = '';
const initials: string[] = [];
if (parts.length === 0) return { firstName, lastName, middleName, initials };
let startIdx = 0;
while (startIdx < parts.length && COMMON_PREFIXES.has(parts[startIdx])) {
startIdx++;
}
let endIdx = parts.length;
while (endIdx > startIdx + 1 && COMMON_SUFFIXES.has(parts[endIdx - 1])) {
endIdx--;
}
const coreParts = parts.slice(startIdx, endIdx);
if (coreParts.length === 1) {
lastName = coreParts[0];
} else if (coreParts.length === 2) {
firstName = coreParts[0];
lastName = coreParts[1];
} else {
firstName = coreParts[0];
lastName = coreParts[coreParts.length - 1];
middleName = coreParts.slice(1, -1).join(' ');
}
if (firstName.length === 1) {
initials.push(firstName);
}
if (middleName) {
const middleParts = middleName.split(' ');
for (const mp of middleParts) {
if (mp.length === 1) initials.push(mp);
}
}
return { firstName, lastName, middleName, initials };
}
function normalizeStreetType(type: string): string {
const clean = normalizeString(type);
return STREET_TYPE_MAP[clean] || clean;
}
function normalizeAddress(addr: Address): string {
const parts = [
addr.streetNumber,
normalizeString(addr.streetName),
addr.streetType ? normalizeStreetType(addr.streetType) : '',
addr.unit ? normalizeString(addr.unit) : '',
normalizeString(addr.city),
addr.state.toLowerCase(),
addr.zip,
].filter(Boolean);
return parts.join(' ');
}
function computeFieldMatch(valueA: string, valueB: string, normalizeFn?: (v: string) => string): FieldMatch {
const normFn = normalizeFn || normalizeString;
const normalizedA = normFn(valueA);
const normalizedB = normFn(valueB);
if (!normalizedA && !normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 1.0 };
if (!normalizedA || !normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 0.0 };
if (normalizedA === normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 1.0 };
const dist = levenshteinDistance(normalizedA, normalizedB);
const maxLen = Math.max(normalizedA.length, normalizedB.length);
const score = similarityScore(dist, maxLen);
return { valueA, valueB, normalizedA, normalizedB, score: Math.round(score * 1000) / 1000 };
}
function haversineDistance(lat1: number, lon1: number, lat2: number, lon2: number): number {
const R = 6371000;
const dLat = ((lat2 - lat1) * Math.PI) / 180;
const dLon = ((lon2 - lon1) * Math.PI) / 180;
const a =
Math.sin(dLat / 2) * Math.sin(dLat / 2) +
Math.cos((lat1 * Math.PI) / 180) *
Math.cos((lat2 * Math.PI) / 180) *
Math.sin(dLon / 2) *
Math.sin(dLon / 2);
const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
return R * c;
}
function computeNameScore(tokensA: NormalizedTokens, tokensB: NormalizedTokens): number {
const firstScore = computeFieldMatch(tokensA.firstName, tokensB.firstName).score;
const lastScore = computeFieldMatch(tokensA.lastName, tokensB.lastName).score;
const middleScore = computeFieldMatch(tokensA.middleName, tokensB.middleName).score;
let initialMatchScore = 1.0;
if (tokensA.initials.length > 0 || tokensB.initials.length > 0) {
const allInitialsA = new Set(tokensA.initials.map(i => i.toLowerCase()));
const allInitialsB = new Set(tokensB.initials.map(i => i.toLowerCase()));
let matched = 0;
for (const init of allInitialsA) {
if (allInitialsB.has(init)) matched++;
}
const total = Math.max(allInitialsA.size, allInitialsB.size);
initialMatchScore = total > 0 ? matched / total : 1.0;
}
const weighted = (lastScore * 0.45) + (firstScore * 0.35) + (middleScore * 0.1) + (initialMatchScore * 0.1);
return Math.round(weighted * 1000) / 1000;
}
function computeAddressScore(addrA: Address, addrB: Address, config: MatchingConfig): { score: number; geocodingDistance?: number } {
const numberMatch = computeFieldMatch(addrA.streetNumber, addrB.streetNumber).score;
const streetMatch = computeFieldMatch(addrA.streetName, addrB.streetName, normalizeString).score;
const typeMatch = computeFieldMatch(
addrA.streetType ? normalizeStreetType(addrA.streetType) : '',
addrB.streetType ? normalizeStreetType(addrB.streetType) : '',
).score;
const unitMatch = computeFieldMatch(addrA.unit || '', addrB.unit || '').score;
const cityMatch = computeFieldMatch(addrA.city, addrB.city).score;
const stateMatch = computeFieldMatch(addrA.state, addrB.state).score;
const zipMatch = computeFieldMatch(addrA.zip, addrB.zip).score;
let geocodingDistance: number | undefined;
let geoScore = 0.0;
if (addrA.latitude && addrA.longitude && addrB.latitude && addrB.longitude) {
geocodingDistance = haversineDistance(addrA.latitude, addrA.longitude, addrB.latitude, addrB.longitude);
const maxDist = config.geocodingRadiusMeters;
geoScore = geocodingDistance <= maxDist ? 1.0 : Math.max(0, 1.0 - (geocodingDistance - maxDist) / (maxDist * 5));
}
const weighted =
(numberMatch * 0.2) +
(streetMatch * 0.25) +
(typeMatch * 0.1) +
(unitMatch * 0.1) +
(cityMatch * 0.1) +
(stateMatch * 0.1) +
(zipMatch * 0.1) +
(geoScore * (geocodingDistance !== undefined ? 0.05 : 0));
return { score: Math.round(weighted * 1000) / 1000, geocodingDistance };
}
export function matchRecords(
nameA: string,
addressA: Address,
nameB: string,
addressB: Address,
config?: Partial<MatchingConfig>,
): MatchResult {
const effectiveConfig = { ...DEFAULT_CONFIG, ...config };
const tokensA = parseName(nameA);
const tokensB = parseName(nameB);
const nameScore = computeNameScore(tokensA, tokensB);
const { score: addressScore, geocodingDistance } = computeAddressScore(addressA, addressB, effectiveConfig);
const overallConfidence = Math.round((nameScore * 0.5 + addressScore * 0.5) * 1000) / 1000;
const firstMatch = computeFieldMatch(tokensA.firstName, tokensB.firstName);
const lastMatch = computeFieldMatch(tokensA.lastName, tokensB.lastName);
const middleMatch = computeFieldMatch(tokensA.middleName, tokensB.middleName);
const numberMatch = computeFieldMatch(addressA.streetNumber, addressB.streetNumber);
const streetMatch = computeFieldMatch(addressA.streetName, addressB.streetName, normalizeString);
const typeMatch = computeFieldMatch(
addressA.streetType ? normalizeStreetType(addressA.streetType) : '',
addressB.streetType ? normalizeStreetType(addressB.streetType) : '',
);
const unitMatch = computeFieldMatch(addressA.unit || '', addressB.unit || '');
const cityMatch = computeFieldMatch(addressA.city, addressB.city);
const stateMatch = computeFieldMatch(addressA.state, addressB.state);
const zipMatch = computeFieldMatch(addressA.zip, addressB.zip);
const normalizedA = normalizeAddress(addressA);
const normalizedB = normalizeAddress(addressB);
const dist = levenshteinDistance(
normalizeString(nameA),
normalizeString(nameB),
);
const details: MatchDetails = {
nameNormalized: [normalizeString(nameA), normalizeString(nameB)],
addressNormalized: [normalizedA, normalizedB],
levenshteinDistance: dist,
geocodingDistance,
fields: {
firstName: firstMatch,
lastName: lastMatch,
middleName: middleMatch,
streetNumber: numberMatch,
streetName: streetMatch,
streetType: typeMatch,
unit: unitMatch,
city: cityMatch,
state: stateMatch,
zip: zipMatch,
},
};
return {
nameScore,
addressScore,
overallConfidence,
isMatch: overallConfidence >= effectiveConfig.overallThreshold,
details,
};
}
export function getConfigForPropertyType(type: PropertyType): MatchingConfig {
return { ...DEFAULT_CONFIG, ...PROPERTY_TYPE_CONFIGS[type] };
}
export { parseName, normalizeString, normalizeStreetType, levenshteinDistance, similarityScore };