no longer rely on json

This commit is contained in:
2026-06-08 09:40:01 -04:00
parent 9f9b88c8db
commit edfe2a3331
9 changed files with 148 additions and 645 deletions

View File

@@ -59,7 +59,7 @@ const TARGET_HEALTHY = 400;
* Each disease is I/O-bound (HTTP requests), so high concurrency is safe.
* The global DDG rate limiter prevents us from overwhelming DuckDuckGo.
*/
const DISEASE_CONCURRENCY = 30;
const DISEASE_CONCURRENCY = 60;
/**
* Max DDG requests per second (shared across all concurrent diseases).
@@ -68,10 +68,10 @@ const DISEASE_CONCURRENCY = 30;
* parallel pages = 9 parallel DDG requests per disease at peak.
* The rate limiter serializes this so we don't get banned.
*/
const DDG_RATE_LIMIT_RPS = 15;
const DDG_RATE_LIMIT_RPS = 3;
/** Max concurrent image downloads per disease */
const CONCURRENT_DOWNLOADS = 30;
const CONCURRENT_DOWNLOADS = 3;
/** Minimum image size in bytes to accept */
const MIN_IMAGE_SIZE = 10_000; // 10KB
@@ -84,7 +84,7 @@ const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp"];
/** User agent for requests */
const UA =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1";
/** Healthy class directory name */
const HEALTHY_CLASS = "healthy";