This commit is contained in:
2026-06-07 16:41:33 -04:00
parent c61540fc63
commit bc4843fb88

View File

@@ -667,11 +667,23 @@ function scanDataset(): ScanResult {
return { diseaseCounts, healthyCount };
}
// ─── CLI Flags ──────────────────────────────────────────────────────────────
function parseFlags(): { reverse: boolean } {
const args = process.argv.slice(2);
return {
reverse: args.includes("--reverse") || args.includes("-r"),
};
}
// ─── Main ───────────────────────────────────────────────────────────────────
async function main() {
const flags = parseFlags();
console.log("=".repeat(60));
console.log("TRAINING DATASET FILL — Parallelized gap-filling download");
if (flags.reverse) console.log(" (reverse order — processing from lowest deficit first)");
console.log("=".repeat(60));
// Ensure dataset directory exists
@@ -717,6 +729,10 @@ async function main() {
// Sort by deficit size (largest first) so we prioritize the neediest diseases
deficits.sort((a, b) => b.needed - a.needed);
// Reverse order if --reverse/-r flag is set (useful to try a different
// direction when the front of the queue keeps hitting dead URLs)
if (flags.reverse) deficits.reverse();
const healthyDeficit = TARGET_HEALTHY - healthyCount;
console.log(`\n${"=".repeat(60)}`);
@@ -727,6 +743,9 @@ async function main() {
console.log(` Healthy deficit: ${Math.max(0, healthyDeficit)}`);
console.log(` Parallelism: ${DISEASE_CONCURRENCY} diseases at once`);
console.log(` DDG rate limit: ${DDG_RATE_LIMIT_RPS} req/s (shared)`);
console.log(
` Order: ${flags.reverse ? "reverse (--reverse)" : "normal (deficit-first)"}`,
);
console.log(`${"=".repeat(60)}`);
if (deficits.length === 0 && healthyDeficit <= 0) {