// --------------------------------------------------------------------------- // Scraper health monitoring and breakage detection // Tracks success/failure rate per county, auto-disables broken scrapers, // and provides a dashboard for monitoring. // --------------------------------------------------------------------------- import type { ScraperHealthEntry, ScraperHealthSummary } from "./types"; // --------------------------------------------------------------------------- // In-memory health store (could be persisted to DB for production) // --------------------------------------------------------------------------- interface HealthStore { entries: ScraperHealthEntry[]; disabledCounties: Map; } const healthStore: HealthStore = { entries: [], disabledCounties: new Map(), }; const MAX_ENTRIES = 100_000; // Keep last 100k entries in memory // --------------------------------------------------------------------------- // Configuration // --------------------------------------------------------------------------- const FAILURE_THRESHOLD_PERCENT = 20; // Disable if >20% failure in window const HEALTH_WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hours const MIN_ATTEMPTS_FOR_DISABLE = 5; // Need at least 5 attempts before disabling // --------------------------------------------------------------------------- // Record a scraper attempt // --------------------------------------------------------------------------- export async function recordScraperAttempt( entry: ScraperHealthEntry, ): Promise { healthStore.entries.push(entry); // Trim old entries if (healthStore.entries.length > MAX_ENTRIES) { healthStore.entries = healthStore.entries.slice(-MAX_ENTRIES); } // Check if this county should be auto-disabled if (!entry.success) { await checkAndDisable(entry.countyId); } else { // Re-enable a previously disabled county if it's now working if (healthStore.disabledCounties.has(entry.countyId)) { const summary = getCountyHealth(entry.countyId); if (summary.successRate >= 70) { healthStore.disabledCounties.delete(entry.countyId); } } } } /** * Check if a county should be disabled based on recent failure rate. */ async function checkAndDisable(countyId: string): Promise { const summary = getCountyHealth(countyId); if ( summary.totalAttempts >= MIN_ATTEMPTS_FOR_DISABLE && summary.successRate < 100 - FAILURE_THRESHOLD_PERCENT ) { healthStore.disabledCounties.set(countyId, { reason: `Auto-disabled: ${summary.failedAttempts}/${summary.totalAttempts} attempts failed in last 24h (${summary.successRate.toFixed(1)}% success rate)`, disabledAt: new Date(), }); } } // --------------------------------------------------------------------------- // Query health // --------------------------------------------------------------------------- /** * Get health summary for a specific county within the monitoring window. */ export function getCountyHealth(countyId: string): ScraperHealthSummary { const windowStart = Date.now() - HEALTH_WINDOW_MS; const countyEntries = healthStore.entries.filter( (e) => e.countyId === countyId && e.timestamp.getTime() >= windowStart, ); const successful = countyEntries.filter((e) => e.success); const failed = countyEntries.filter((e) => !e.success); const disabled = healthStore.disabledCounties.get(countyId); return { countyId, county: countyEntries[0]?.county ?? "", state: countyEntries[0]?.state ?? "", totalAttempts: countyEntries.length, successfulAttempts: successful.length, failedAttempts: failed.length, successRate: countyEntries.length > 0 ? (successful.length / countyEntries.length) * 100 : 100, averageDurationMs: countyEntries.length > 0 ? countyEntries.reduce((sum, e) => sum + e.durationMs, 0) / countyEntries.length : 0, lastAttempt: countyEntries.length > 0 ? countyEntries[countyEntries.length - 1]!.timestamp : null, lastSuccess: successful.length > 0 ? successful[successful.length - 1]!.timestamp : null, lastFailure: failed.length > 0 ? failed[failed.length - 1]!.timestamp : null, isDisabled: disabled !== undefined, disabledReason: disabled?.reason, }; } /** * Get health summaries for all counties that have been scraped. */ export function getAllCountyHealth(): ScraperHealthSummary[] { const countyIds = [ ...new Set(healthStore.entries.map((e) => e.countyId)), ]; return countyIds.map((id) => getCountyHealth(id)); } /** * Get list of disabled counties. */ export function getDisabledCounties(): Array<{ countyId: string; reason: string; disabledAt: Date; }> { return Array.from(healthStore.disabledCounties.entries()).map( ([countyId, info]) => ({ countyId, reason: info.reason, disabledAt: info.disabledAt, }), ); } /** * Check if a specific county scraper is currently disabled. */ export function isCountyDisabled(countyId: string): boolean { return healthStore.disabledCounties.has(countyId); } /** * Manually disable a county scraper (e.g., after a site redesign). */ export function manuallyDisableCounty( countyId: string, reason: string, ): void { healthStore.disabledCounties.set(countyId, { reason, disabledAt: new Date(), }); } /** * Manually re-enable a county scraper. */ export function manuallyEnableCounty(countyId: string): void { healthStore.disabledCounties.delete(countyId); } /** * Get overall scraper system health summary. */ export function getOverallHealth(): { totalCounties: number; activeCounties: number; disabledCounties: number; overallSuccessRate: number; totalAttempts: number; totalSuccessful: number; totalFailed: number; } { const allCounties = getAllCountyHealth(); const disabled = getDisabledCounties(); const totalAttempts = allCounties.reduce((s, c) => s + c.totalAttempts, 0); const totalSuccessful = allCounties.reduce( (s, c) => s + c.successfulAttempts, 0, ); return { totalCounties: allCounties.length, activeCounties: allCounties.length - disabled.length, disabledCounties: disabled.length, overallSuccessRate: totalAttempts > 0 ? (totalSuccessful / totalAttempts) * 100 : 100, totalAttempts, totalSuccessful, totalFailed: totalAttempts - totalSuccessful, }; } /** * Reset all health data (useful for testing). */ export function resetHealthData(): void { healthStore.entries = []; healthStore.disabledCounties.clear(); } /** * Get the raw entries for a given county within a time window. */ export function getRawEntries( countyId: string, windowMs = HEALTH_WINDOW_MS, ): ScraperHealthEntry[] { const windowStart = Date.now() - windowMs; return healthStore.entries.filter( (e) => e.countyId === countyId && e.timestamp.getTime() >= windowStart, ); }