From bc4843fb88b33d6baf2007b347c0f26e9f670db4 Mon Sep 17 00:00:00 2001 From: Michael Freno Date: Sun, 7 Jun 2026 16:41:33 -0400 Subject: [PATCH] herm --- apps/web/scripts/fill-training-dataset.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/apps/web/scripts/fill-training-dataset.ts b/apps/web/scripts/fill-training-dataset.ts index 163d51d..8757455 100644 --- a/apps/web/scripts/fill-training-dataset.ts +++ b/apps/web/scripts/fill-training-dataset.ts @@ -667,11 +667,23 @@ function scanDataset(): ScanResult { return { diseaseCounts, healthyCount }; } +// ─── CLI Flags ────────────────────────────────────────────────────────────── + +function parseFlags(): { reverse: boolean } { + const args = process.argv.slice(2); + return { + reverse: args.includes("--reverse") || args.includes("-r"), + }; +} + // ─── Main ─────────────────────────────────────────────────────────────────── async function main() { + const flags = parseFlags(); + console.log("=".repeat(60)); console.log("TRAINING DATASET FILL — Parallelized gap-filling download"); + if (flags.reverse) console.log(" (reverse order — processing from lowest deficit first)"); console.log("=".repeat(60)); // Ensure dataset directory exists @@ -717,6 +729,10 @@ async function main() { // Sort by deficit size (largest first) so we prioritize the neediest diseases deficits.sort((a, b) => b.needed - a.needed); + // Reverse order if --reverse/-r flag is set (useful to try a different + // direction when the front of the queue keeps hitting dead URLs) + if (flags.reverse) deficits.reverse(); + const healthyDeficit = TARGET_HEALTHY - healthyCount; console.log(`\n${"=".repeat(60)}`); @@ -727,6 +743,9 @@ async function main() { console.log(` Healthy deficit: ${Math.max(0, healthyDeficit)}`); console.log(` Parallelism: ${DISEASE_CONCURRENCY} diseases at once`); console.log(` DDG rate limit: ${DDG_RATE_LIMIT_RPS} req/s (shared)`); + console.log( + ` Order: ${flags.reverse ? "reverse (--reverse)" : "normal (deficit-first)"}`, + ); console.log(`${"=".repeat(60)}`); if (deficits.length === 0 && healthyDeficit <= 0) {