#!/usr/bin/env node /** * fill-brave-images-v2.ts — Brave Image Search for remaining disease images. * * Prioritizes by severity (critical → high → moderate → low). * Runs at 1 request/sec (Brave free tier rate limit). * Updates Turso DB directly with found images. * When current key is exhausted, prompts for next key. * Falls back to duckduckgo-images-api when all keys are spent. * * Usage: * cd apps/web && npx tsx scripts/fill-brave-images-v2.ts * * Pass additional API keys as args: * npx tsx scripts/fill-brave-images-v2.ts KEY2 KEY3 */ import { readFileSync, writeFileSync } from "fs"; import { resolve } from "path"; // Load env const envPath = resolve(__dirname, "../.env.development"); try { const env = readFileSync(envPath, "utf-8"); for (const line of env.split("\n")) { const trimmed = line.trim(); if (trimmed && !trimmed.startsWith("#")) { const eqIdx = trimmed.indexOf("="); if (eqIdx > 0) { const key = trimmed.slice(0, eqIdx).trim(); const val = trimmed.slice(eqIdx + 1).trim(); if (!process.env[key]) process.env[key] = val; } } } } catch {} // Also try .env.local for BRAVE_API_KEY try { const envLocal = readFileSync(resolve(__dirname, "../.env.local"), "utf-8"); for (const line of envLocal.split("\n")) { const trimmed = line.trim(); if (trimmed.startsWith("BRAVE_API_KEY=")) { const val = trimmed.slice("BRAVE_API_KEY=".length).trim(); if (!process.env.BRAVE_API_KEY) process.env.BRAVE_API_KEY = val; } } } catch {} import { getDb, closeDb } from "../src/lib/db/index"; import { diseases } from "../src/lib/db/schema"; import { createClient } from "@libsql/client"; import { sql } from "drizzle-orm"; interface DiseaseRow { id: string; name: string; scientificName: string; severity: string; plantId: string; } // ─── Config ────────────────────────────────────────────────────────────────── const BRAVE_DELAY = 1100; // ms between calls (1 req/sec) const DB_FLUSH_BATCH = 50; const MAX_PER_KEY = 1800; // Leave 200 buffer of the 2000/mo limit const STATE_FILE = resolve(__dirname, ".brave-progress.json"); let currentKeyIndex = 0; let braveKeys: string[] = []; let callsThisKey = 0; let totalFound = 0; // totalSkipped tracking removed — not needed for v2 // ─── State persistence ─────────────────────────────────────────────────────── interface RunState { processedIds: string[]; currentKeyIndex: number; callsThisKey: number; totalFound: number; } function loadState(): RunState | null { try { return JSON.parse(readFileSync(STATE_FILE, "utf-8")); } catch { return null; } } function saveState(processedIds: string[]) { writeFileSync( STATE_FILE, JSON.stringify( { processedIds, currentKeyIndex, callsThisKey, totalFound, }, null, 2, ), "utf-8", ); } // ─── Brave API ─────────────────────────────────────────────────────────────── async function braveImageSearch(query: string): Promise { const key = braveKeys[currentKeyIndex]; if (!key) return null; const url = new URL("https://api.search.brave.com/res/v1/images/search"); url.searchParams.set("q", query); url.searchParams.set("count", "3"); for (let attempt = 0; attempt < 3; attempt++) { try { const res = await fetch(url.toString(), { headers: { "X-Subscription-Token": key, Accept: "application/json" }, }); if (res.status === 429) { console.log("\n [RATE LIMITED] Key " + (currentKeyIndex + 1) + " exhausted!"); return "RATE_LIMITED"; } if (!res.ok) return null; callsThisKey++; const data = (await res.json()) as { results?: Array<{ url: string; thumbnail?: { src?: string } }>; }; const results = data?.results ?? []; if (results.length === 0) return null; // Prefer non-stock images for (const r of results) { const src = r.thumbnail?.src ?? r.url; if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) { return src; } } return results[0].thumbnail?.src ?? results[0].url; } catch { await new Promise((r) => setTimeout(r, 2000)); } } return null; } // ─── DuckDuckGo fallback ──────────────────────────────────────────────────── async function ddgFallbackSearch(query: string): Promise { try { // Try to use duckduckgo-images-api if installed const ddg = await import("duckduckgo-images-api").catch(() => null); if (ddg) { const results = await ddg.image_search({ query, moderate: true }); if (results && results.length > 0) { for (const r of results) { if (r.image && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(r.image)) { return r.image; } } return results[0].image || null; } } } catch { // duckduckgo-images-api not installed } return null; } // ─── Main ──────────────────────────────────────────────────────────────────── async function main() { console.log("\n🔍 Brave Disease Image Filler v2\n"); // Parse keys from args + env const argsKeys = process.argv.slice(2).filter((a) => !a.startsWith("-")); const envKey = process.env.BRAVE_API_KEY; braveKeys = [envKey, ...argsKeys].filter(Boolean) as string[]; braveKeys = [...new Set(braveKeys)]; // dedup if (braveKeys.length === 0) { console.log("❌ No Brave API keys found."); console.log(" Set BRAVE_API_KEY in .env.local or pass as argument.\n"); process.exit(1); } console.log(`🔑 ${braveKeys.length} Brave API key(s) available\n`); // Load state const state = loadState(); if (state) { currentKeyIndex = state.currentKeyIndex; callsThisKey = state.callsThisKey; totalFound = state.totalFound; console.log( `📋 Resuming from previous run (${state.processedIds.length} processed, ${totalFound} found)\n`, ); } // Get diseases from DB const db = getDb(); const allDiseases = (await db .select({ id: diseases.id, name: diseases.name, scientificName: diseases.scientificName, severity: diseases.severity, plantId: diseases.plantId, }) .from(diseases) .where(sql`(image_url IS NULL OR image_url = '')`) .all()) as DiseaseRow[]; console.log(`📋 ${allDiseases.length} diseases need images\n`); if (allDiseases.length === 0) { console.log("✅ All diseases already have images!\n"); closeDb(); return; } // Sort by severity priority const severityOrder = { critical: 0, high: 1, moderate: 2, low: 3 }; allDiseases.sort( (a, b) => (severityOrder[a.severity as keyof typeof severityOrder] || 99) - (severityOrder[b.severity as keyof typeof severityOrder] || 99), ); // Filter out already-processed from state const processedSet = new Set(state?.processedIds || []); const pending = allDiseases.filter((d) => !processedSet.has(d.id)); console.log( `📊 Prioritization: critical=${allDiseases.filter((d) => d.severity === "critical" && !processedSet.has(d.id)).length}, high=${allDiseases.filter((d) => d.severity === "high" && !processedSet.has(d.id)).length}, moderate=${allDiseases.filter((d) => d.severity === "moderate" && !processedSet.has(d.id)).length}, low=${allDiseases.filter((d) => d.severity === "low" && !processedSet.has(d.id)).length}\n`, ); if (pending.length === 0) { console.log("✅ All remaining diseases already attempted\n"); closeDb(); return; } const raw = createClient({ url: process.env.DATABASE_URL!, authToken: process.env.DATABASE_TOKEN!, }); let updates: Array<{ id: string; url: string }> = []; const processedIds: string[] = state?.processedIds || []; let found = totalFound; let ddgMode = false; for (let i = 0; i < pending.length; i++) { const d = pending[i]; // Check if current key needs rotating if (!ddgMode && callsThisKey >= MAX_PER_KEY) { if (currentKeyIndex < braveKeys.length - 1) { currentKeyIndex++; callsThisKey = 0; console.log(`\n 🔄 Rotating to key ${currentKeyIndex + 1}/${braveKeys.length}\n`); } else { console.log( `\n ⚠️ All ${braveKeys.length} Brave keys exhausted. Switching to DuckDuckGo fallback.\n`, ); ddgMode = true; // Install duckduckgo-images-api if not available try { await import("duckduckgo-images-api"); } catch { console.log(" Installing duckduckgo-images-api..."); const { execSync } = await import("child_process"); execSync("npm install duckduckgo-images-api", { cwd: resolve(__dirname, ".."), stdio: "pipe", }); console.log(" Done.\n"); } } } // Build search query const plantName = d.plantId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()); const query = `${d.name} ${d.scientificName} ${plantName} plant disease`; const sev = d.severity.padEnd(8); process.stdout.write( ` [${String(i + 1).padStart(4)}/${pending.length}] [${sev}] ${d.name.substring(0, 40).padEnd(42)} `, ); let url: string | null = null; if (ddgMode) { url = await ddgFallbackSearch(query); if (!url) { // Try a simpler query url = await ddgFallbackSearch(`${d.name} disease`); } } else { url = await braveImageSearch(query); if (url === "RATE_LIMITED") { // Key exhausted mid-query, try next if (currentKeyIndex < braveKeys.length - 1) { currentKeyIndex++; callsThisKey = 0; console.log("\n 🔄 Rotating key..."); url = await braveImageSearch(query); } else { console.log("\n ⚠️ All keys exhausted mid-batch!"); ddgMode = true; url = await ddgFallbackSearch(query); } } } if (url) { updates.push({ id: d.id, url }); found++; processedIds.push(d.id); console.log("✅"); } else { processedIds.push(d.id); // Mark as attempted even if not found console.log("❌"); } // Flush to DB if (updates.length >= DB_FLUSH_BATCH) { await raw.batch( updates.map((u) => ({ sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?", args: [u.url, u.id], })), "write", ); console.log(` → Flushed ${updates.length} to DB`); updates = []; } // Save state every 50 if ((i + 1) % 50 === 0) { saveState(processedIds); } // Rate limit (even for DDG to be polite) await new Promise((r) => setTimeout(r, ddgMode ? 500 : BRAVE_DELAY)); } // Final flush if (updates.length > 0) { await raw.batch( updates.map((u) => ({ sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?", args: [u.url, u.id], })), "write", ); console.log(` → Flushed ${updates.length} to DB`); } saveState(processedIds); raw.close(); // Final report const finalList = await db .select({ id: diseases.id, name: diseases.name, imageUrl: diseases.imageUrl }) .from(diseases) .all(); const w = finalList.filter((d) => d.imageUrl); const wo = finalList.filter((d) => !d.imageUrl); console.log(`\n${"═".repeat(50)}`); console.log(`📊 BRAVE IMAGE SEARCH COMPLETE`); console.log(`${"═".repeat(50)}`); console.log(` Processed: ${pending.length}`); console.log(` Found this run: ${found - totalFound}`); console.log(` Total with images: ${w.length}/${finalList.length}`); console.log(` Still missing: ${wo.length}`); console.log(` Brave keys used: ${currentKeyIndex + 1}`); console.log(` Calls on current key: ${callsThisKey}`); console.log(` DuckDuckGo mode: ${ddgMode}`); if (wo.length > 0) { const rp = resolve(__dirname, ".disease-image-review-needed.md"); let report = "# Disease Images - Still Missing\n\n"; report += `Generated: ${new Date().toISOString()}\n\n`; report += `## Summary\n\n`; report += `- Total: ${finalList.length}\n`; report += `- With images: ${w.length}\n`; report += `- Still missing: ${wo.length}\n\n`; report += `## Missing Diseases\n\n`; for (const d of wo) { report += `- ${d.name} (\`${d.id}\`)\n`; } writeFileSync(rp, report, "utf-8"); console.log(`\n📝 Report: ${rp}`); } else { console.log("\n✅ ALL diseases now have images!"); } closeDb(); console.log("\n"); } main().catch((err) => { console.error("\n❌", err); process.exit(1); });