re-init
This commit is contained in:
152
scripts/fill-brave-images.ts
Normal file
152
scripts/fill-brave-images.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* fill-brave-images.ts — Brave-only pass for remaining disease images.
|
||||
*
|
||||
* Runs at 1 request/sec (Brave rate limit).
|
||||
* Updates diseases.json and Turso DB.
|
||||
*
|
||||
* Usage: cd apps/web && npx tsx scripts/fill-brave-images.ts
|
||||
*/
|
||||
|
||||
import dotenv from "dotenv"; dotenv.config({ path: resolve(__dirname, "../.env.local") });
|
||||
import { readFileSync, writeFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
import { createClient } from "@libsql/client";
|
||||
import { closeDb } from "../src/lib/db/index";
|
||||
|
||||
const DISEASES_JSON = resolve(__dirname, "../src/data/diseases.json");
|
||||
const BRAVE_KEY = process.env.BRAVE_API_KEY ?? "";
|
||||
|
||||
interface DiseaseSeed {
|
||||
id: string;
|
||||
plantId: string;
|
||||
name: string;
|
||||
scientificName: string;
|
||||
imageUrl?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
function load(): DiseaseSeed[] {
|
||||
return JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
|
||||
}
|
||||
|
||||
async function searchBraveImage(query: string): Promise<string | null> {
|
||||
const url = new URL("https://api.search.brave.com/res/v1/images/search");
|
||||
url.searchParams.set("q", query);
|
||||
url.searchParams.set("count", "3");
|
||||
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
try {
|
||||
const res = await fetch(url.toString(), {
|
||||
headers: { "X-Subscription-Token": BRAVE_KEY, Accept: "application/json" },
|
||||
});
|
||||
if (res.status === 429) {
|
||||
await new Promise((r) => setTimeout(r, 5000 * 2 ** attempt));
|
||||
continue;
|
||||
}
|
||||
if (!res.ok) return null;
|
||||
const data = (await res.json()) as {
|
||||
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
|
||||
};
|
||||
const results = data?.results ?? [];
|
||||
if (results.length === 0) return null;
|
||||
|
||||
// Prefer non-stock direct-looking images
|
||||
for (const r of results) {
|
||||
const src = r.thumbnail?.src ?? r.url;
|
||||
if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) return src;
|
||||
}
|
||||
return results[0].thumbnail?.src ?? results[0].url;
|
||||
} catch {
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("\n🔍 Brave Image Search — remaining disease images\n");
|
||||
|
||||
if (!BRAVE_KEY) {
|
||||
console.log("❌ No BRAVE_API_KEY in .env.local\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const diseases = load();
|
||||
const pending = diseases.filter((d) => !d.imageUrl);
|
||||
console.log(`📋 ${pending.length} diseases need images\n`);
|
||||
|
||||
let found = 0;
|
||||
|
||||
for (let i = 0; i < pending.length; i++) {
|
||||
const d = pending[i];
|
||||
const plant = diseases.find((p) => p.id === d.plantId);
|
||||
const plantName = plant?.name ?? d.plantId;
|
||||
const query = `${d.name} ${plantName} plant disease symptom`;
|
||||
|
||||
process.stdout.write(` [${String(i + 1).padStart(2, " ")}/${pending.length}] ${d.name.padEnd(35)} `);
|
||||
|
||||
const url = await searchBraveImage(query);
|
||||
if (url) {
|
||||
d.imageUrl = url;
|
||||
found++;
|
||||
console.log(`✅`);
|
||||
} else {
|
||||
console.log(`❌`);
|
||||
}
|
||||
|
||||
// 1 req/sec rate limit
|
||||
await new Promise((r) => setTimeout(r, 1100));
|
||||
}
|
||||
|
||||
// Write updated JSON
|
||||
writeFileSync(DISEASES_JSON, JSON.stringify(diseases, null, 2) + "\n", "utf-8");
|
||||
console.log(`\n✅ diseases.json updated: ${found}/${pending.length} images found\n`);
|
||||
|
||||
// Update DB
|
||||
try {
|
||||
const dbUrl = process.env.DATABASE_URL;
|
||||
const dbToken = process.env.DATABASE_TOKEN;
|
||||
if (dbUrl && dbToken) {
|
||||
const raw = createClient({ url: dbUrl, authToken: dbToken });
|
||||
const updates = pending.filter((d) => d.imageUrl);
|
||||
for (let i = 0; i < updates.length; i += 50) {
|
||||
await raw.batch(
|
||||
updates.slice(i, i + 50).map((d) => ({
|
||||
sql: "UPDATE diseases SET image_url = ? WHERE id = ?",
|
||||
args: [d.imageUrl!, d.id],
|
||||
})),
|
||||
"write",
|
||||
);
|
||||
}
|
||||
raw.close();
|
||||
console.log(`✅ Turso DB updated: ${updates.length} rows`);
|
||||
} else {
|
||||
console.log("⏭️ Skipping DB — no DATABASE_URL/TOKEN");
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(` ⚠️ DB: ${err instanceof Error ? err.message : err}`);
|
||||
}
|
||||
|
||||
// Summary
|
||||
const finalDiseases = JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
|
||||
const stillMissing = finalDiseases.filter((d) => !d.imageUrl);
|
||||
console.log(`\n${"═".repeat(50)}`);
|
||||
console.log(`📊 FINAL: ${finalDiseases.length} total`);
|
||||
console.log(` With images: ${finalDiseases.length - stillMissing.length}`);
|
||||
console.log(` Still missing: ${stillMissing.length}`);
|
||||
if (stillMissing.length > 0) {
|
||||
console.log(`\nStill need human curation:`);
|
||||
for (const d of stillMissing) {
|
||||
console.log(` ❌ ${d.name} (${d.id})`);
|
||||
}
|
||||
}
|
||||
console.log(`${"═".repeat(50)}\n`);
|
||||
|
||||
closeDb();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("\n❌ Fatal:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user