herm
This commit is contained in:
@@ -667,11 +667,23 @@ function scanDataset(): ScanResult {
|
||||
return { diseaseCounts, healthyCount };
|
||||
}
|
||||
|
||||
// ─── CLI Flags ──────────────────────────────────────────────────────────────
|
||||
|
||||
function parseFlags(): { reverse: boolean } {
|
||||
const args = process.argv.slice(2);
|
||||
return {
|
||||
reverse: args.includes("--reverse") || args.includes("-r"),
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const flags = parseFlags();
|
||||
|
||||
console.log("=".repeat(60));
|
||||
console.log("TRAINING DATASET FILL — Parallelized gap-filling download");
|
||||
if (flags.reverse) console.log(" (reverse order — processing from lowest deficit first)");
|
||||
console.log("=".repeat(60));
|
||||
|
||||
// Ensure dataset directory exists
|
||||
@@ -717,6 +729,10 @@ async function main() {
|
||||
// Sort by deficit size (largest first) so we prioritize the neediest diseases
|
||||
deficits.sort((a, b) => b.needed - a.needed);
|
||||
|
||||
// Reverse order if --reverse/-r flag is set (useful to try a different
|
||||
// direction when the front of the queue keeps hitting dead URLs)
|
||||
if (flags.reverse) deficits.reverse();
|
||||
|
||||
const healthyDeficit = TARGET_HEALTHY - healthyCount;
|
||||
|
||||
console.log(`\n${"=".repeat(60)}`);
|
||||
@@ -727,6 +743,9 @@ async function main() {
|
||||
console.log(` Healthy deficit: ${Math.max(0, healthyDeficit)}`);
|
||||
console.log(` Parallelism: ${DISEASE_CONCURRENCY} diseases at once`);
|
||||
console.log(` DDG rate limit: ${DDG_RATE_LIMIT_RPS} req/s (shared)`);
|
||||
console.log(
|
||||
` Order: ${flags.reverse ? "reverse (--reverse)" : "normal (deficit-first)"}`,
|
||||
);
|
||||
console.log(`${"=".repeat(60)}`);
|
||||
|
||||
if (deficits.length === 0 && healthyDeficit <= 0) {
|
||||
|
||||
Reference in New Issue
Block a user