import { Address, MatchResult, MatchDetails, FieldMatch, MatchingConfig, NormalizedTokens, PropertyType, } from './types'; const DEFAULT_CONFIG: MatchingConfig = { nameThreshold: 0.85, addressThreshold: 0.9, overallThreshold: 0.85, geocodingRadiusMeters: 100, }; const COMMON_PREFIXES = new Set([ 'mr', 'mrs', 'ms', 'miss', 'dr', 'prof', 'jr', 'sr', 'junior', 'senior', 'ii', 'iii', 'iv', 'rev', 'st', 'hon', 'esq', ]); const COMMON_SUFFIXES = new Set([ 'jr', 'sr', 'junior', 'senior', 'ii', 'iii', 'iv', 'v', 'esq', 'phd', 'md', 'llm', 'cpa', ]); const STREET_TYPE_MAP: Record = { 'st': 'street', 'street': 'street', 'ave': 'avenue', 'avenue': 'avenue', 'blvd': 'boulevard', 'boulevard': 'boulevard', 'dr': 'drive', 'drive': 'drive', 'ln': 'lane', 'lane': 'lane', 'ct': 'court', 'court': 'court', 'pl': 'place', 'place': 'place', 'rd': 'road', 'road': 'road', 'way': 'way', 'trl': 'trail', 'trail': 'trail', 'hwy': 'highway', 'highway': 'highway', 'pkwy': 'parkway', 'parkway': 'parkway', 'cir': 'circle', 'circle': 'circle', 'sq': 'square', 'square': 'square', 'ter': 'terrace', 'terrace': 'terrace', }; const PROPERTY_TYPE_CONFIGS: Record> = { 'residential': { nameThreshold: 0.85, addressThreshold: 0.9 }, 'commercial': { nameThreshold: 0.8, addressThreshold: 0.9 }, 'land': { nameThreshold: 0.8, addressThreshold: 0.85 }, 'multi-family': { nameThreshold: 0.8, addressThreshold: 0.9 }, }; function levenshteinDistance(a: string, b: string): number { const matrix: number[][] = Array.from({ length: b.length + 1 }, (_, i) => Array.from({ length: a.length + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0)) ); for (let i = 1; i <= b.length; i++) { for (let j = 1; j <= a.length; j++) { const cost = a[j - 1] === b[i - 1] ? 0 : 1; matrix[i][j] = Math.min( matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost, ); } } return matrix[b.length][a.length]; } function similarityScore(distance: number, maxLen: number): number { if (maxLen === 0) return 1.0; return 1.0 - distance / maxLen; } function normalizeString(str: string): string { return str .toLowerCase() .replace(/[''']/g, '') .replace(/[^a-z0-9\s]/g, ' ') .replace(/\s+/g, ' ') .trim(); } function parseName(name: string): NormalizedTokens { const clean = normalizeString(name); const parts = clean.split(' ').filter(Boolean); let firstName = ''; let lastName = ''; let middleName = ''; const initials: string[] = []; if (parts.length === 0) return { firstName, lastName, middleName, initials }; let startIdx = 0; while (startIdx < parts.length && COMMON_PREFIXES.has(parts[startIdx])) { startIdx++; } let endIdx = parts.length; while (endIdx > startIdx + 1 && COMMON_SUFFIXES.has(parts[endIdx - 1])) { endIdx--; } const coreParts = parts.slice(startIdx, endIdx); if (coreParts.length === 1) { lastName = coreParts[0]; } else if (coreParts.length === 2) { firstName = coreParts[0]; lastName = coreParts[1]; } else { firstName = coreParts[0]; lastName = coreParts[coreParts.length - 1]; middleName = coreParts.slice(1, -1).join(' '); } if (firstName.length === 1) { initials.push(firstName); } if (middleName) { const middleParts = middleName.split(' '); for (const mp of middleParts) { if (mp.length === 1) initials.push(mp); } } return { firstName, lastName, middleName, initials }; } function normalizeStreetType(type: string): string { const clean = normalizeString(type); return STREET_TYPE_MAP[clean] || clean; } function normalizeAddress(addr: Address): string { const parts = [ addr.streetNumber, normalizeString(addr.streetName), addr.streetType ? normalizeStreetType(addr.streetType) : '', addr.unit ? normalizeString(addr.unit) : '', normalizeString(addr.city), addr.state.toLowerCase(), addr.zip, ].filter(Boolean); return parts.join(' '); } function computeFieldMatch(valueA: string, valueB: string, normalizeFn?: (v: string) => string): FieldMatch { const normFn = normalizeFn || normalizeString; const normalizedA = normFn(valueA); const normalizedB = normFn(valueB); if (!normalizedA && !normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 1.0 }; if (!normalizedA || !normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 0.0 }; if (normalizedA === normalizedB) return { valueA, valueB, normalizedA, normalizedB, score: 1.0 }; const dist = levenshteinDistance(normalizedA, normalizedB); const maxLen = Math.max(normalizedA.length, normalizedB.length); const score = similarityScore(dist, maxLen); return { valueA, valueB, normalizedA, normalizedB, score: Math.round(score * 1000) / 1000 }; } function haversineDistance(lat1: number, lon1: number, lat2: number, lon2: number): number { const R = 6371000; const dLat = ((lat2 - lat1) * Math.PI) / 180; const dLon = ((lon2 - lon1) * Math.PI) / 180; const a = Math.sin(dLat / 2) * Math.sin(dLat / 2) + Math.cos((lat1 * Math.PI) / 180) * Math.cos((lat2 * Math.PI) / 180) * Math.sin(dLon / 2) * Math.sin(dLon / 2); const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); return R * c; } function computeNameScore(tokensA: NormalizedTokens, tokensB: NormalizedTokens): number { const firstScore = computeFieldMatch(tokensA.firstName, tokensB.firstName).score; const lastScore = computeFieldMatch(tokensA.lastName, tokensB.lastName).score; const middleScore = computeFieldMatch(tokensA.middleName, tokensB.middleName).score; let initialMatchScore = 1.0; if (tokensA.initials.length > 0 || tokensB.initials.length > 0) { const allInitialsA = new Set(tokensA.initials.map(i => i.toLowerCase())); const allInitialsB = new Set(tokensB.initials.map(i => i.toLowerCase())); let matched = 0; for (const init of allInitialsA) { if (allInitialsB.has(init)) matched++; } const total = Math.max(allInitialsA.size, allInitialsB.size); initialMatchScore = total > 0 ? matched / total : 1.0; } const weighted = (lastScore * 0.45) + (firstScore * 0.35) + (middleScore * 0.1) + (initialMatchScore * 0.1); return Math.round(weighted * 1000) / 1000; } function computeAddressScore(addrA: Address, addrB: Address, config: MatchingConfig): { score: number; geocodingDistance?: number } { const numberMatch = computeFieldMatch(addrA.streetNumber, addrB.streetNumber).score; const streetMatch = computeFieldMatch(addrA.streetName, addrB.streetName, normalizeString).score; const typeMatch = computeFieldMatch( addrA.streetType ? normalizeStreetType(addrA.streetType) : '', addrB.streetType ? normalizeStreetType(addrB.streetType) : '', ).score; const unitMatch = computeFieldMatch(addrA.unit || '', addrB.unit || '').score; const cityMatch = computeFieldMatch(addrA.city, addrB.city).score; const stateMatch = computeFieldMatch(addrA.state, addrB.state).score; const zipMatch = computeFieldMatch(addrA.zip, addrB.zip).score; let geocodingDistance: number | undefined; let geoScore = 0.0; if (addrA.latitude && addrA.longitude && addrB.latitude && addrB.longitude) { geocodingDistance = haversineDistance(addrA.latitude, addrA.longitude, addrB.latitude, addrB.longitude); const maxDist = config.geocodingRadiusMeters; geoScore = geocodingDistance <= maxDist ? 1.0 : Math.max(0, 1.0 - (geocodingDistance - maxDist) / (maxDist * 5)); } const weighted = (numberMatch * 0.2) + (streetMatch * 0.25) + (typeMatch * 0.1) + (unitMatch * 0.1) + (cityMatch * 0.1) + (stateMatch * 0.1) + (zipMatch * 0.1) + (geoScore * (geocodingDistance !== undefined ? 0.05 : 0)); return { score: Math.round(weighted * 1000) / 1000, geocodingDistance }; } export function matchRecords( nameA: string, addressA: Address, nameB: string, addressB: Address, config?: Partial, ): MatchResult { const effectiveConfig = { ...DEFAULT_CONFIG, ...config }; const tokensA = parseName(nameA); const tokensB = parseName(nameB); const nameScore = computeNameScore(tokensA, tokensB); const { score: addressScore, geocodingDistance } = computeAddressScore(addressA, addressB, effectiveConfig); const overallConfidence = Math.round((nameScore * 0.5 + addressScore * 0.5) * 1000) / 1000; const firstMatch = computeFieldMatch(tokensA.firstName, tokensB.firstName); const lastMatch = computeFieldMatch(tokensA.lastName, tokensB.lastName); const middleMatch = computeFieldMatch(tokensA.middleName, tokensB.middleName); const numberMatch = computeFieldMatch(addressA.streetNumber, addressB.streetNumber); const streetMatch = computeFieldMatch(addressA.streetName, addressB.streetName, normalizeString); const typeMatch = computeFieldMatch( addressA.streetType ? normalizeStreetType(addressA.streetType) : '', addressB.streetType ? normalizeStreetType(addressB.streetType) : '', ); const unitMatch = computeFieldMatch(addressA.unit || '', addressB.unit || ''); const cityMatch = computeFieldMatch(addressA.city, addressB.city); const stateMatch = computeFieldMatch(addressA.state, addressB.state); const zipMatch = computeFieldMatch(addressA.zip, addressB.zip); const normalizedA = normalizeAddress(addressA); const normalizedB = normalizeAddress(addressB); const dist = levenshteinDistance( normalizeString(nameA), normalizeString(nameB), ); const details: MatchDetails = { nameNormalized: [normalizeString(nameA), normalizeString(nameB)], addressNormalized: [normalizedA, normalizedB], levenshteinDistance: dist, geocodingDistance, fields: { firstName: firstMatch, lastName: lastMatch, middleName: middleMatch, streetNumber: numberMatch, streetName: streetMatch, streetType: typeMatch, unit: unitMatch, city: cityMatch, state: stateMatch, zip: zipMatch, }, }; return { nameScore, addressScore, overallConfidence, isMatch: overallConfidence >= effectiveConfig.overallThreshold, details, }; } export function getConfigForPropertyType(type: PropertyType): MatchingConfig { return { ...DEFAULT_CONFIG, ...PROPERTY_TYPE_CONFIGS[type] }; } export { parseName, normalizeString, normalizeStreetType, levenshteinDistance, similarityScore };