deep research addressement
This commit is contained in:
228
web/src/server/services/hometitle/county-scrapers/monitor.ts
Normal file
228
web/src/server/services/hometitle/county-scrapers/monitor.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scraper health monitoring and breakage detection
|
||||
// Tracks success/failure rate per county, auto-disables broken scrapers,
|
||||
// and provides a dashboard for monitoring.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
import type { ScraperHealthEntry, ScraperHealthSummary } from "./types";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory health store (could be persisted to DB for production)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface HealthStore {
|
||||
entries: ScraperHealthEntry[];
|
||||
disabledCounties: Map<string, { reason: string; disabledAt: Date }>;
|
||||
}
|
||||
|
||||
const healthStore: HealthStore = {
|
||||
entries: [],
|
||||
disabledCounties: new Map(),
|
||||
};
|
||||
|
||||
const MAX_ENTRIES = 100_000; // Keep last 100k entries in memory
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FAILURE_THRESHOLD_PERCENT = 20; // Disable if >20% failure in window
|
||||
const HEALTH_WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hours
|
||||
const MIN_ATTEMPTS_FOR_DISABLE = 5; // Need at least 5 attempts before disabling
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Record a scraper attempt
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function recordScraperAttempt(
|
||||
entry: ScraperHealthEntry,
|
||||
): Promise<void> {
|
||||
healthStore.entries.push(entry);
|
||||
|
||||
// Trim old entries
|
||||
if (healthStore.entries.length > MAX_ENTRIES) {
|
||||
healthStore.entries = healthStore.entries.slice(-MAX_ENTRIES);
|
||||
}
|
||||
|
||||
// Check if this county should be auto-disabled
|
||||
if (!entry.success) {
|
||||
await checkAndDisable(entry.countyId);
|
||||
} else {
|
||||
// Re-enable a previously disabled county if it's now working
|
||||
if (healthStore.disabledCounties.has(entry.countyId)) {
|
||||
const summary = getCountyHealth(entry.countyId);
|
||||
if (summary.successRate >= 70) {
|
||||
healthStore.disabledCounties.delete(entry.countyId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a county should be disabled based on recent failure rate.
|
||||
*/
|
||||
async function checkAndDisable(countyId: string): Promise<void> {
|
||||
const summary = getCountyHealth(countyId);
|
||||
|
||||
if (
|
||||
summary.totalAttempts >= MIN_ATTEMPTS_FOR_DISABLE &&
|
||||
summary.successRate < 100 - FAILURE_THRESHOLD_PERCENT
|
||||
) {
|
||||
healthStore.disabledCounties.set(countyId, {
|
||||
reason: `Auto-disabled: ${summary.failedAttempts}/${summary.totalAttempts} attempts failed in last 24h (${summary.successRate.toFixed(1)}% success rate)`,
|
||||
disabledAt: new Date(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Query health
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get health summary for a specific county within the monitoring window.
|
||||
*/
|
||||
export function getCountyHealth(countyId: string): ScraperHealthSummary {
|
||||
const windowStart = Date.now() - HEALTH_WINDOW_MS;
|
||||
const countyEntries = healthStore.entries.filter(
|
||||
(e) => e.countyId === countyId && e.timestamp.getTime() >= windowStart,
|
||||
);
|
||||
|
||||
const successful = countyEntries.filter((e) => e.success);
|
||||
const failed = countyEntries.filter((e) => !e.success);
|
||||
const disabled = healthStore.disabledCounties.get(countyId);
|
||||
|
||||
return {
|
||||
countyId,
|
||||
county: countyEntries[0]?.county ?? "",
|
||||
state: countyEntries[0]?.state ?? "",
|
||||
totalAttempts: countyEntries.length,
|
||||
successfulAttempts: successful.length,
|
||||
failedAttempts: failed.length,
|
||||
successRate:
|
||||
countyEntries.length > 0
|
||||
? (successful.length / countyEntries.length) * 100
|
||||
: 100,
|
||||
averageDurationMs:
|
||||
countyEntries.length > 0
|
||||
? countyEntries.reduce((sum, e) => sum + e.durationMs, 0) /
|
||||
countyEntries.length
|
||||
: 0,
|
||||
lastAttempt:
|
||||
countyEntries.length > 0
|
||||
? countyEntries[countyEntries.length - 1]!.timestamp
|
||||
: null,
|
||||
lastSuccess: successful.length > 0 ? successful[successful.length - 1]!.timestamp : null,
|
||||
lastFailure: failed.length > 0 ? failed[failed.length - 1]!.timestamp : null,
|
||||
isDisabled: disabled !== undefined,
|
||||
disabledReason: disabled?.reason,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get health summaries for all counties that have been scraped.
|
||||
*/
|
||||
export function getAllCountyHealth(): ScraperHealthSummary[] {
|
||||
const countyIds = [
|
||||
...new Set(healthStore.entries.map((e) => e.countyId)),
|
||||
];
|
||||
return countyIds.map((id) => getCountyHealth(id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of disabled counties.
|
||||
*/
|
||||
export function getDisabledCounties(): Array<{
|
||||
countyId: string;
|
||||
reason: string;
|
||||
disabledAt: Date;
|
||||
}> {
|
||||
return Array.from(healthStore.disabledCounties.entries()).map(
|
||||
([countyId, info]) => ({
|
||||
countyId,
|
||||
reason: info.reason,
|
||||
disabledAt: info.disabledAt,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific county scraper is currently disabled.
|
||||
*/
|
||||
export function isCountyDisabled(countyId: string): boolean {
|
||||
return healthStore.disabledCounties.has(countyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually disable a county scraper (e.g., after a site redesign).
|
||||
*/
|
||||
export function manuallyDisableCounty(
|
||||
countyId: string,
|
||||
reason: string,
|
||||
): void {
|
||||
healthStore.disabledCounties.set(countyId, {
|
||||
reason,
|
||||
disabledAt: new Date(),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually re-enable a county scraper.
|
||||
*/
|
||||
export function manuallyEnableCounty(countyId: string): void {
|
||||
healthStore.disabledCounties.delete(countyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get overall scraper system health summary.
|
||||
*/
|
||||
export function getOverallHealth(): {
|
||||
totalCounties: number;
|
||||
activeCounties: number;
|
||||
disabledCounties: number;
|
||||
overallSuccessRate: number;
|
||||
totalAttempts: number;
|
||||
totalSuccessful: number;
|
||||
totalFailed: number;
|
||||
} {
|
||||
const allCounties = getAllCountyHealth();
|
||||
const disabled = getDisabledCounties();
|
||||
|
||||
const totalAttempts = allCounties.reduce((s, c) => s + c.totalAttempts, 0);
|
||||
const totalSuccessful = allCounties.reduce(
|
||||
(s, c) => s + c.successfulAttempts,
|
||||
0,
|
||||
);
|
||||
|
||||
return {
|
||||
totalCounties: allCounties.length,
|
||||
activeCounties: allCounties.length - disabled.length,
|
||||
disabledCounties: disabled.length,
|
||||
overallSuccessRate:
|
||||
totalAttempts > 0 ? (totalSuccessful / totalAttempts) * 100 : 100,
|
||||
totalAttempts,
|
||||
totalSuccessful,
|
||||
totalFailed: totalAttempts - totalSuccessful,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all health data (useful for testing).
|
||||
*/
|
||||
export function resetHealthData(): void {
|
||||
healthStore.entries = [];
|
||||
healthStore.disabledCounties.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the raw entries for a given county within a time window.
|
||||
*/
|
||||
export function getRawEntries(
|
||||
countyId: string,
|
||||
windowMs = HEALTH_WINDOW_MS,
|
||||
): ScraperHealthEntry[] {
|
||||
const windowStart = Date.now() - windowMs;
|
||||
return healthStore.entries.filter(
|
||||
(e) => e.countyId === countyId && e.timestamp.getTime() >= windowStart,
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user