script cleanup
This commit is contained in:
@@ -1,53 +0,0 @@
|
|||||||
/**
|
|
||||||
* apply-flag-migration.ts
|
|
||||||
*
|
|
||||||
* Applies the flagged_content table migration to Turso.
|
|
||||||
* Run with: npx tsx scripts/apply-flag-migration.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import dotenv from "dotenv";
|
|
||||||
import path from "node:path";
|
|
||||||
|
|
||||||
const envFile =
|
|
||||||
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
|
|
||||||
dotenv.config({ path: path.resolve(__dirname, envFile) });
|
|
||||||
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const db = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log("Applying migration: create flagged_content table...");
|
|
||||||
|
|
||||||
await db.execute(`
|
|
||||||
CREATE TABLE IF NOT EXISTS flagged_content (
|
|
||||||
id text PRIMARY KEY NOT NULL,
|
|
||||||
content_type text NOT NULL,
|
|
||||||
content_id text NOT NULL,
|
|
||||||
field_name text NOT NULL,
|
|
||||||
notes text DEFAULT '',
|
|
||||||
flag_count integer DEFAULT 1 NOT NULL,
|
|
||||||
created_at text DEFAULT (datetime('now')) NOT NULL,
|
|
||||||
updated_at text DEFAULT (datetime('now')) NOT NULL
|
|
||||||
)
|
|
||||||
`);
|
|
||||||
|
|
||||||
await db.execute(`
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_flagged_content_type ON flagged_content (content_type)
|
|
||||||
`);
|
|
||||||
|
|
||||||
await db.execute(`
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_flagged_content_id ON flagged_content (content_id)
|
|
||||||
`);
|
|
||||||
|
|
||||||
console.log("Migration applied successfully.");
|
|
||||||
db.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("Migration failed:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
import "dotenv/config";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const db = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log("Applying migration: add image_url to diseases...");
|
|
||||||
await db.execute("ALTER TABLE diseases ADD COLUMN image_url TEXT DEFAULT ''");
|
|
||||||
await db.execute("UPDATE diseases SET image_url = '' WHERE image_url IS NULL");
|
|
||||||
|
|
||||||
// Mark migration as applied
|
|
||||||
await db.execute(
|
|
||||||
"INSERT INTO __drizzle_migrations (hash, created_at) VALUES ('0001_add-disease-images', datetime('now'))",
|
|
||||||
);
|
|
||||||
|
|
||||||
console.log("Migration applied successfully.");
|
|
||||||
db.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch(console.error);
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
import { createClient } from "@libsql/client";
|
|
||||||
const c = createClient({
|
|
||||||
url: process.env.DATABASE_URL,
|
|
||||||
authToken: process.env.DATABASE_TOKEN,
|
|
||||||
});
|
|
||||||
const r = await c.execute("SELECT COUNT(*) as cnt FROM diseases");
|
|
||||||
const r2 = await c.execute(
|
|
||||||
`SELECT SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has, SUM(CASE WHEN image_url IS NULL OR image_url = '' THEN 1 ELSE 0 END) as miss FROM diseases`,
|
|
||||||
);
|
|
||||||
const r3 = await c.execute(
|
|
||||||
`SELECT severity, COUNT(*) as total, SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has FROM diseases GROUP BY severity ORDER BY severity`,
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
`Total: ${r.rows[0].cnt} | With images: ${r2.rows[0].has} | Missing: ${r2.rows[0].miss}`,
|
|
||||||
);
|
|
||||||
for (const row of r3.rows) {
|
|
||||||
console.log(` ${row.severity?.padEnd(10)}: ${row.has}/${row.total}`);
|
|
||||||
}
|
|
||||||
c.close();
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,691 +0,0 @@
|
|||||||
/**
|
|
||||||
* Expand DB with comprehensive plant disease list from Wikipedia.
|
|
||||||
*
|
|
||||||
* Reads /tmp/plant_diseases/plant_diseases_comprehensive.txt,
|
|
||||||
* compares against existing DB entries (by name, case-insensitive),
|
|
||||||
* and inserts new entries with reasonable defaults.
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* cd apps/web && export $(grep -v '^#' .env.development | xargs) && npx tsx scripts/expand-diseases.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import "dotenv/config";
|
|
||||||
import { readFileSync } from "fs";
|
|
||||||
import { eq, sql } from "drizzle-orm";
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { plants, diseases } from "../src/lib/db/schema";
|
|
||||||
import type { CausalAgentType, Severity } from "../src/lib/types";
|
|
||||||
|
|
||||||
// ─── Parse the comprehensive list ─────────────────────────────────────────────
|
|
||||||
|
|
||||||
interface DiseaseEntry {
|
|
||||||
name: string;
|
|
||||||
sourceUrl: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseComprehensiveList(filePath: string): DiseaseEntry[] {
|
|
||||||
const content = readFileSync(filePath, "utf-8");
|
|
||||||
const entries: DiseaseEntry[] = [];
|
|
||||||
const lines = content.split("\n");
|
|
||||||
const nameRe = /^\d+\.\s+(.+)$/;
|
|
||||||
|
|
||||||
for (let i = 0; i < lines.length; i++) {
|
|
||||||
const nameMatch = lines[i].match(nameRe);
|
|
||||||
if (nameMatch) {
|
|
||||||
const name = nameMatch[1].trim();
|
|
||||||
const urlLine = lines[i + 1]?.trim() || "";
|
|
||||||
// Only add if the next line is a valid URL
|
|
||||||
if (urlLine.startsWith("http")) {
|
|
||||||
entries.push({ name, sourceUrl: urlLine });
|
|
||||||
i++; // skip the URL line
|
|
||||||
} else {
|
|
||||||
entries.push({ name, sourceUrl: "" });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return entries;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Infer causal agent type from disease name ────────────────────────────────
|
|
||||||
|
|
||||||
function inferCausalAgent(name: string): CausalAgentType {
|
|
||||||
const lower = name.toLowerCase();
|
|
||||||
|
|
||||||
// Bacterial indicators
|
|
||||||
if (
|
|
||||||
lower.startsWith("bacterial ") ||
|
|
||||||
lower.includes(" xanthomonas") ||
|
|
||||||
lower.includes(" pseudomonas") ||
|
|
||||||
lower.includes(" erwinia") ||
|
|
||||||
lower.includes(" ralstonia") ||
|
|
||||||
lower.includes(" clavibacter") ||
|
|
||||||
lower.includes(" streptomyces") ||
|
|
||||||
lower.includes(" agrobacterium") ||
|
|
||||||
lower.includes(" corynebacterium") ||
|
|
||||||
lower.includes(" pectobacterium") ||
|
|
||||||
lower.includes(" dickeya")
|
|
||||||
) {
|
|
||||||
return "bacterial";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Viral indicators - strong signals
|
|
||||||
if (
|
|
||||||
lower.includes(" mosaic") ||
|
|
||||||
lower.includes(" yellows") ||
|
|
||||||
lower.includes(" leaf roll") ||
|
|
||||||
lower.includes(" leafroll") ||
|
|
||||||
lower.includes(" ringspot") ||
|
|
||||||
lower.includes(" ring spot") ||
|
|
||||||
lower.includes(" enation") ||
|
|
||||||
lower.includes(" phyllody") ||
|
|
||||||
lower.includes(" witches") ||
|
|
||||||
lower.includes(" witches'") ||
|
|
||||||
lower.includes(" crinkle") ||
|
|
||||||
lower.includes(" rosette") ||
|
|
||||||
lower.includes(" shoestring") ||
|
|
||||||
lower.includes(" tristeza") ||
|
|
||||||
lower.includes(" psorosis") ||
|
|
||||||
lower.includes(" stubborn") ||
|
|
||||||
lower.includes(" greening") ||
|
|
||||||
lower.includes(" vein banding") ||
|
|
||||||
lower.includes(" vein mottle") ||
|
|
||||||
lower.includes(" vein clearing") ||
|
|
||||||
lower.includes(" leaf pucker") ||
|
|
||||||
lower.includes(" pucker leaf") ||
|
|
||||||
lower.includes(" latent") ||
|
|
||||||
lower.includes(" motley") ||
|
|
||||||
lower.includes(" rugose")
|
|
||||||
) {
|
|
||||||
return "viral";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Viral - names containing "virus" or "viroid"
|
|
||||||
if (lower.includes(" virus") || lower.includes(" viroid") || lower.includes(" virosis")) {
|
|
||||||
return "viral";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Nematodes
|
|
||||||
if (
|
|
||||||
lower.includes(" nematode") ||
|
|
||||||
lower.includes(" nematodes") ||
|
|
||||||
lower.includes(" eelworm") ||
|
|
||||||
lower.includes(" root knot") ||
|
|
||||||
lower.includes(" root-knot") ||
|
|
||||||
lower.includes(" cyst ") ||
|
|
||||||
lower.includes(" dagger ") ||
|
|
||||||
lower.includes(" lance ") ||
|
|
||||||
lower.includes(" lesion ") ||
|
|
||||||
lower.includes(" ring ") ||
|
|
||||||
lower.includes(" spiral ") ||
|
|
||||||
lower.includes(" sting ") ||
|
|
||||||
lower.includes(" stubby ") ||
|
|
||||||
lower.includes(" needle ") ||
|
|
||||||
lower.includes(" foliar ") ||
|
|
||||||
lower.includes(" bulb ") ||
|
|
||||||
lower.includes(" reniform ") ||
|
|
||||||
lower.includes(" burrowing ")
|
|
||||||
) {
|
|
||||||
// Check if it's really a nematode name
|
|
||||||
if (lower.includes("nematode")) return "environmental";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fungal indicators
|
|
||||||
if (
|
|
||||||
lower.includes(" mildew") ||
|
|
||||||
lower.includes(" rust") ||
|
|
||||||
lower.includes(" smut") ||
|
|
||||||
lower.includes(" blight") ||
|
|
||||||
lower.includes(" canker") ||
|
|
||||||
lower.includes(" rot") ||
|
|
||||||
lower.includes(" scab") ||
|
|
||||||
lower.includes(" mold") ||
|
|
||||||
lower.includes(" anthracnose") ||
|
|
||||||
lower.includes(" bunt") ||
|
|
||||||
lower.includes(" ergot") ||
|
|
||||||
lower.includes(" dieback") ||
|
|
||||||
lower.includes(" scald") ||
|
|
||||||
lower.includes(" blotch") ||
|
|
||||||
lower.includes(" speckle") ||
|
|
||||||
lower.includes(" sooty") ||
|
|
||||||
lower.includes(" flyspeck") ||
|
|
||||||
lower.includes(" fusarium") ||
|
|
||||||
lower.includes(" alternaria") ||
|
|
||||||
lower.includes(" botrytis") ||
|
|
||||||
lower.includes(" rhizoctonia") ||
|
|
||||||
lower.includes(" pythium") ||
|
|
||||||
lower.includes(" phytophthora") ||
|
|
||||||
lower.includes(" sclerotinia") ||
|
|
||||||
lower.includes(" verticillium") ||
|
|
||||||
lower.includes(" ascochyta") ||
|
|
||||||
lower.includes(" cercospora") ||
|
|
||||||
lower.includes(" septoria") ||
|
|
||||||
lower.includes(" colletotrichum") ||
|
|
||||||
lower.includes(" phomopsis") ||
|
|
||||||
lower.includes(" diaporthe") ||
|
|
||||||
lower.includes(" diplodia") ||
|
|
||||||
lower.includes(" macrophomina") ||
|
|
||||||
lower.includes(" cylindrocladium") ||
|
|
||||||
lower.includes(" mycosphaerella") ||
|
|
||||||
lower.includes(" helminthosporium") ||
|
|
||||||
lower.includes(" curvularia") ||
|
|
||||||
lower.includes(" bipolaris") ||
|
|
||||||
lower.includes(" exserohilum") ||
|
|
||||||
lower.includes(" dothiorella") ||
|
|
||||||
lower.includes(" fusicoccum") ||
|
|
||||||
lower.includes(" pestalotia") ||
|
|
||||||
lower.includes(" glomerella") ||
|
|
||||||
lower.includes(" nectria") ||
|
|
||||||
lower.includes(" eutypa") ||
|
|
||||||
lower.includes(" armillaria") ||
|
|
||||||
lower.includes(" ganoderma") ||
|
|
||||||
lower.includes(" phoma") ||
|
|
||||||
lower.includes(" cladosporium") ||
|
|
||||||
lower.includes(" penicillium") ||
|
|
||||||
lower.includes(" aspergillus") ||
|
|
||||||
lower.includes(" rhizopus") ||
|
|
||||||
lower.includes(" mucor") ||
|
|
||||||
lower.includes(" downy mildew") ||
|
|
||||||
lower.includes(" powdery mildew") ||
|
|
||||||
lower.includes(" pink rot") ||
|
|
||||||
lower.includes(" pink mold") ||
|
|
||||||
lower.includes(" pink root") ||
|
|
||||||
lower.includes(" gray mold") ||
|
|
||||||
lower.includes(" grey mold") ||
|
|
||||||
lower.includes(" white rot") ||
|
|
||||||
lower.includes(" white mold") ||
|
|
||||||
lower.includes(" brown rot") ||
|
|
||||||
lower.includes(" black rot") ||
|
|
||||||
lower.includes(" soft rot") ||
|
|
||||||
lower.includes(" dry rot") ||
|
|
||||||
lower.includes(" fruit rot") ||
|
|
||||||
lower.includes(" root rot") ||
|
|
||||||
lower.includes(" stem rot") ||
|
|
||||||
lower.includes(" ear rot") ||
|
|
||||||
lower.includes(" crown rot") ||
|
|
||||||
lower.includes(" collar rot") ||
|
|
||||||
lower.includes(" pod rot") ||
|
|
||||||
lower.includes(" kernel rot") ||
|
|
||||||
lower.includes(" stalk rot") ||
|
|
||||||
lower.includes(" head rot") ||
|
|
||||||
lower.includes(" butt rot") ||
|
|
||||||
lower.includes(" stump rot") ||
|
|
||||||
lower.includes(" wood rot") ||
|
|
||||||
lower.includes(" seed rot") ||
|
|
||||||
lower.includes(" leaf spot") ||
|
|
||||||
lower.includes(" leaf blight") ||
|
|
||||||
lower.includes(" leaf blotch") ||
|
|
||||||
lower.includes(" leaf rust") ||
|
|
||||||
lower.includes(" brown spot") ||
|
|
||||||
lower.includes(" black spot") ||
|
|
||||||
lower.includes(" black leg") ||
|
|
||||||
lower.includes(" blackleg") ||
|
|
||||||
lower.includes(" black foot") ||
|
|
||||||
lower.includes(" white rust") ||
|
|
||||||
lower.includes(" white smut") ||
|
|
||||||
lower.includes(" white scab") ||
|
|
||||||
lower.includes(" tar spot") ||
|
|
||||||
lower.includes(" target spot") ||
|
|
||||||
lower.includes(" dollar spot") ||
|
|
||||||
lower.includes(" fairy ring") ||
|
|
||||||
lower.includes(" snow mold") ||
|
|
||||||
lower.includes(" pink disease") ||
|
|
||||||
lower.includes(" thread blight") ||
|
|
||||||
lower.includes(" web blight") ||
|
|
||||||
lower.includes(" sclerotial") ||
|
|
||||||
lower.includes(" sore shin") ||
|
|
||||||
lower.includes(" wart") ||
|
|
||||||
lower.includes(" scurf") ||
|
|
||||||
lower.includes(" silver scurf") ||
|
|
||||||
lower.includes(" shot hole") ||
|
|
||||||
lower.includes(" timber rot") ||
|
|
||||||
lower.includes(" cottony rot") ||
|
|
||||||
lower.includes(" watery rot") ||
|
|
||||||
lower.includes(" sour rot") ||
|
|
||||||
lower.includes(" seepage") ||
|
|
||||||
lower.includes(" bunch rot") ||
|
|
||||||
lower.includes(" noble rot") ||
|
|
||||||
lower.includes(" bitter rot") ||
|
|
||||||
lower.includes(" ripe rot") ||
|
|
||||||
lower.includes(" ring rot") ||
|
|
||||||
lower.includes(" coral spot") ||
|
|
||||||
lower.includes(" stem canker") ||
|
|
||||||
lower.includes(" branch canker") ||
|
|
||||||
lower.includes(" perennial canker") ||
|
|
||||||
lower.includes(" brand canker") ||
|
|
||||||
lower.includes(" blister canker") ||
|
|
||||||
lower.includes(" bleeding canker") ||
|
|
||||||
lower.includes(" bark canker") ||
|
|
||||||
lower.includes(" gum canker") ||
|
|
||||||
lower.includes(" collar crack") ||
|
|
||||||
lower.includes(" fasciation") ||
|
|
||||||
lower.includes(" exobasidium") ||
|
|
||||||
lower.includes(" mycorrhiza") ||
|
|
||||||
lower.includes(" lichen") ||
|
|
||||||
lower.includes(" algal") ||
|
|
||||||
lower.includes(" chlorosis") ||
|
|
||||||
lower.includes(" leaf blister") ||
|
|
||||||
lower.includes(" leaf curl")
|
|
||||||
) {
|
|
||||||
return "fungal";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Physiological / environmental indicators
|
|
||||||
if (
|
|
||||||
lower.includes(" sunscald") ||
|
|
||||||
lower.includes(" sunburn") ||
|
|
||||||
lower.includes(" chilling") ||
|
|
||||||
lower.includes(" blossom end rot") ||
|
|
||||||
lower.includes(" edema") ||
|
|
||||||
lower.includes(" deficiency") ||
|
|
||||||
lower.includes(" toxicity") ||
|
|
||||||
lower.includes(" ozone") ||
|
|
||||||
lower.includes(" drought") ||
|
|
||||||
lower.includes(" frost") ||
|
|
||||||
lower.includes(" herbicide") ||
|
|
||||||
lower.includes(" pesticide") ||
|
|
||||||
lower.includes(" phytotoxicity") ||
|
|
||||||
lower.includes(" catface") ||
|
|
||||||
lower.includes(" fruit cracking") ||
|
|
||||||
lower.includes(" russeting") ||
|
|
||||||
lower.includes(" growth crack") ||
|
|
||||||
lower.includes(" mealiness") ||
|
|
||||||
lower.includes(" wind scar") ||
|
|
||||||
lower.includes(" hail") ||
|
|
||||||
lower.includes(" salt ") ||
|
|
||||||
lower.includes(" nutritional") ||
|
|
||||||
lower.includes(" mineral") ||
|
|
||||||
lower.includes(" overwatering") ||
|
|
||||||
lower.includes(" under watering") ||
|
|
||||||
lower.includes(" waterlogging") ||
|
|
||||||
lower.includes(" chemical injury") ||
|
|
||||||
lower.includes(" spray injury") ||
|
|
||||||
lower.includes(" fertilizer burn") ||
|
|
||||||
lower.includes(" lightning") ||
|
|
||||||
lower.includes(" bruising") ||
|
|
||||||
lower.includes(" pressure bruise") ||
|
|
||||||
lower.includes(" impact damage") ||
|
|
||||||
lower.includes(" transit rot")
|
|
||||||
) {
|
|
||||||
return "environmental";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insect/mite/pest indicators
|
|
||||||
if (
|
|
||||||
lower.includes(" mite") ||
|
|
||||||
lower.includes(" beetle") ||
|
|
||||||
lower.includes(" weevil") ||
|
|
||||||
lower.includes(" aphid") ||
|
|
||||||
lower.includes(" bollworm") ||
|
|
||||||
lower.includes(" leaf miner") ||
|
|
||||||
lower.includes(" mealybug") ||
|
|
||||||
lower.includes(" thrips") ||
|
|
||||||
lower.includes(" whitefly") ||
|
|
||||||
lower.includes(" caterpillar") ||
|
|
||||||
lower.includes(" sawfly") ||
|
|
||||||
lower.includes(" scale ") ||
|
|
||||||
lower.includes(" leafhopper") ||
|
|
||||||
lower.includes(" psylla") ||
|
|
||||||
lower.includes(" slug") ||
|
|
||||||
lower.includes(" snail") ||
|
|
||||||
lower.includes(" borer") ||
|
|
||||||
lower.includes(" maggot") ||
|
|
||||||
lower.includes(" grub") ||
|
|
||||||
lower.includes(" earwig") ||
|
|
||||||
lower.includes(" grasshopper")
|
|
||||||
) {
|
|
||||||
return "environmental";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fungal genus names
|
|
||||||
const fungalGenera = [
|
|
||||||
"armillaria",
|
|
||||||
"aspergillus",
|
|
||||||
"alternaria",
|
|
||||||
"botrytis",
|
|
||||||
"cercospora",
|
|
||||||
"cladosporium",
|
|
||||||
"colletotrichum",
|
|
||||||
"curvularia",
|
|
||||||
"cylindrocladium",
|
|
||||||
"diplodia",
|
|
||||||
"fusarium",
|
|
||||||
"ganoderma",
|
|
||||||
"glomerella",
|
|
||||||
"helminthosporium",
|
|
||||||
"macrophomina",
|
|
||||||
"mycosphaerella",
|
|
||||||
"nectria",
|
|
||||||
"penicillium",
|
|
||||||
"pestalotia",
|
|
||||||
"phoma",
|
|
||||||
"phomopsis",
|
|
||||||
"phytophthora",
|
|
||||||
"pythium",
|
|
||||||
"rhizoctonia",
|
|
||||||
"sclerotinia",
|
|
||||||
"septoria",
|
|
||||||
"verticillium",
|
|
||||||
"ascochyta",
|
|
||||||
"cercoseptoria",
|
|
||||||
"phaeoisariopsis",
|
|
||||||
"phaeoseptoria",
|
|
||||||
"stagonospora",
|
|
||||||
"stemphylium",
|
|
||||||
"myrothecium",
|
|
||||||
"myriogenospora",
|
|
||||||
"dactuliophora",
|
|
||||||
"dilophospora",
|
|
||||||
"coniothecium",
|
|
||||||
"coniosporium",
|
|
||||||
"cryptostictis",
|
|
||||||
"catacauma",
|
|
||||||
"botryodiplodia",
|
|
||||||
"botryosphaeria",
|
|
||||||
"cephalosporium",
|
|
||||||
"ceratocystis",
|
|
||||||
"chalara",
|
|
||||||
"choanephora",
|
|
||||||
"clitocybe",
|
|
||||||
"coprinus",
|
|
||||||
"cordana",
|
|
||||||
"corticium",
|
|
||||||
"corynespora",
|
|
||||||
"coryneum",
|
|
||||||
"cylindrocarpon",
|
|
||||||
"cylindrocladiella",
|
|
||||||
"cylindrosporium",
|
|
||||||
"cytospora",
|
|
||||||
"cytosporina",
|
|
||||||
"dematophora",
|
|
||||||
"didymella",
|
|
||||||
"dothiorella",
|
|
||||||
"drechslera",
|
|
||||||
"endothia",
|
|
||||||
"eutypa",
|
|
||||||
"eutypella",
|
|
||||||
"exobasidium",
|
|
||||||
"fusicladium",
|
|
||||||
"fusicoccum",
|
|
||||||
"gibberella",
|
|
||||||
"glomerella",
|
|
||||||
"gnomonia",
|
|
||||||
"graphiola",
|
|
||||||
"guignardia",
|
|
||||||
"hendersonia",
|
|
||||||
"hendersonula",
|
|
||||||
"hymenochaete",
|
|
||||||
"hypoxylon",
|
|
||||||
"lasiodiplodia",
|
|
||||||
"leptosphaeria",
|
|
||||||
"leucostoma",
|
|
||||||
"lophodermium",
|
|
||||||
"macrophoma",
|
|
||||||
"marasmiellus",
|
|
||||||
"marasmius",
|
|
||||||
"massaria",
|
|
||||||
"monilia",
|
|
||||||
"monosporascus",
|
|
||||||
"mystrosporium",
|
|
||||||
"neocosmospora",
|
|
||||||
"nigrospora",
|
|
||||||
"omphalia",
|
|
||||||
"ophiobolus",
|
|
||||||
"ovulinia",
|
|
||||||
"ozonium",
|
|
||||||
"panagrolaimus",
|
|
||||||
"periconia",
|
|
||||||
"pestalosphaeria",
|
|
||||||
"pestalotiopsis",
|
|
||||||
"phialophora",
|
|
||||||
"phymatotrichum",
|
|
||||||
"physalospora",
|
|
||||||
"phytophthora",
|
|
||||||
"plasmodiophora",
|
|
||||||
"plectosporium",
|
|
||||||
"polyporus",
|
|
||||||
"poria",
|
|
||||||
"pseudocercosporella",
|
|
||||||
"pseudopeziza",
|
|
||||||
"pseudoseptoria",
|
|
||||||
"puccinia",
|
|
||||||
"pyrenochaeta",
|
|
||||||
"pythium",
|
|
||||||
"ramularia",
|
|
||||||
"rhizoctonia",
|
|
||||||
"rhizopus",
|
|
||||||
"rhynchosporium",
|
|
||||||
"rosellinia",
|
|
||||||
"sclerophthora",
|
|
||||||
"sclerotinia",
|
|
||||||
"sclerotium",
|
|
||||||
"septoria",
|
|
||||||
"sphaceloma",
|
|
||||||
"sphaeropsis",
|
|
||||||
"spongospora",
|
|
||||||
"stagonospora",
|
|
||||||
"stemphylium",
|
|
||||||
"stereum",
|
|
||||||
"stigmina",
|
|
||||||
"thanatephorus",
|
|
||||||
"thielaviopsis",
|
|
||||||
"tippula",
|
|
||||||
"typhula",
|
|
||||||
"ulocladium",
|
|
||||||
"uredo",
|
|
||||||
"ustilago",
|
|
||||||
"valsa",
|
|
||||||
"venturia",
|
|
||||||
"verticillium",
|
|
||||||
"xylaria",
|
|
||||||
];
|
|
||||||
for (const genus of fungalGenera) {
|
|
||||||
if (lower.includes(genus)) return "fungal";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default to fungal (most plant diseases are fungal)
|
|
||||||
return "fungal";
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Infer severity ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
function inferSeverity(name: string): Severity {
|
|
||||||
const lower = name.toLowerCase();
|
|
||||||
if (
|
|
||||||
lower.includes(" lethal") ||
|
|
||||||
lower.includes(" devastating") ||
|
|
||||||
lower.includes(" destructive") ||
|
|
||||||
lower.includes(" fatal") ||
|
|
||||||
lower.includes(" severe") ||
|
|
||||||
lower.includes(" blight") ||
|
|
||||||
lower.includes(" wilt") ||
|
|
||||||
lower.includes(" canker") ||
|
|
||||||
lower.includes(" dieback") ||
|
|
||||||
lower.includes(" decline") ||
|
|
||||||
lower.includes(" rot") ||
|
|
||||||
lower.includes(" gall") ||
|
|
||||||
lower.includes(" gummosis") ||
|
|
||||||
lower.includes(" necrosis") ||
|
|
||||||
lower.includes(" erwinia")
|
|
||||||
) {
|
|
||||||
return "high";
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
lower.includes(" minor") ||
|
|
||||||
lower.includes(" mild") ||
|
|
||||||
lower.includes(" slight") ||
|
|
||||||
lower.includes(" speckle") ||
|
|
||||||
lower.includes(" fleck") ||
|
|
||||||
lower.includes(" freckle") ||
|
|
||||||
lower.includes(" chlorosis") ||
|
|
||||||
lower.includes(" translucence") ||
|
|
||||||
lower.includes(" superficial")
|
|
||||||
) {
|
|
||||||
return "low";
|
|
||||||
}
|
|
||||||
return "moderate";
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Generate a deterministic slug ────────────────────────────────────────────
|
|
||||||
|
|
||||||
function toSlug(name: string): string {
|
|
||||||
return (
|
|
||||||
"wiki-" +
|
|
||||||
name
|
|
||||||
.toLowerCase()
|
|
||||||
.replace(/[^a-z0-9]+/g, "-")
|
|
||||||
.replace(/^-|-$/g, "")
|
|
||||||
.replace(/-+/g, "-")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Main ─────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const db = getDb();
|
|
||||||
|
|
||||||
// 1. Get existing disease names from DB
|
|
||||||
const existingDiseases = await db.select({ name: diseases.name }).from(diseases);
|
|
||||||
const existingNames = new Set(existingDiseases.map((d) => d.name.toLowerCase().trim()));
|
|
||||||
|
|
||||||
console.log(`Existing diseases in DB: ${existingNames.size}`);
|
|
||||||
|
|
||||||
// 2. Parse the comprehensive list
|
|
||||||
const entries = parseComprehensiveList("/tmp/plant_diseases/plant_diseases_comprehensive.txt");
|
|
||||||
console.log(`Total entries in comprehensive file: ${entries.length}`);
|
|
||||||
|
|
||||||
// 3. Find or create catch-all plants
|
|
||||||
for (const plantId of ["general", "unknown"]) {
|
|
||||||
const existing = await db.select().from(plants).where(eq(plants.id, plantId)).get();
|
|
||||||
|
|
||||||
if (!existing) {
|
|
||||||
console.log(`Creating '${plantId}' plant for catch-all diseases...`);
|
|
||||||
await db.insert(plants).values({
|
|
||||||
id: plantId,
|
|
||||||
commonName: plantId === "general" ? "General (Multiple Plants)" : "Unknown Plant",
|
|
||||||
scientificName: "Various",
|
|
||||||
family: "Various",
|
|
||||||
category: "houseplant",
|
|
||||||
careSummary:
|
|
||||||
plantId === "general"
|
|
||||||
? "General plant diseases affecting multiple species."
|
|
||||||
: "Plant disease with unknown host plant.",
|
|
||||||
imageUrl: "",
|
|
||||||
});
|
|
||||||
console.log(`Created '${plantId}' plant.`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Filter new entries (deduplicate within file + against DB)
|
|
||||||
const newEntries: DiseaseEntry[] = [];
|
|
||||||
const skipped: string[] = [];
|
|
||||||
const seen = new Set<string>();
|
|
||||||
|
|
||||||
for (const entry of entries) {
|
|
||||||
const key = entry.name.toLowerCase().trim();
|
|
||||||
if (seen.has(key)) continue;
|
|
||||||
seen.add(key);
|
|
||||||
|
|
||||||
if (existingNames.has(key)) {
|
|
||||||
skipped.push(entry.name);
|
|
||||||
} else {
|
|
||||||
newEntries.push(entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\nNew entries to insert: ${newEntries.length}`);
|
|
||||||
console.log(`Already existing (skipped): ${skipped.length}`);
|
|
||||||
|
|
||||||
if (skipped.length > 0) {
|
|
||||||
console.log(`\nFirst 10 skipped (of ${skipped.length}):`);
|
|
||||||
skipped.slice(0, 10).forEach((s) => console.log(` - ${s}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Insert new entries in batches
|
|
||||||
if (newEntries.length === 0) {
|
|
||||||
console.log("\n✅ No new diseases to insert.");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const BATCH_SIZE = 50;
|
|
||||||
let inserted = 0;
|
|
||||||
let errors = 0;
|
|
||||||
|
|
||||||
for (let i = 0; i < newEntries.length; i += BATCH_SIZE) {
|
|
||||||
const batch = newEntries.slice(i, i + BATCH_SIZE);
|
|
||||||
const values = batch.map((entry) => {
|
|
||||||
const causalAgent = inferCausalAgent(entry.name);
|
|
||||||
const severity = inferSeverity(entry.name);
|
|
||||||
return {
|
|
||||||
id: toSlug(entry.name),
|
|
||||||
plantId: "general",
|
|
||||||
name: entry.name,
|
|
||||||
scientificName: "",
|
|
||||||
causalAgentType: causalAgent,
|
|
||||||
description: `A plant disease known as "${entry.name}". Source: Wikipedia.`,
|
|
||||||
symptoms: [],
|
|
||||||
causes: [],
|
|
||||||
treatment: [],
|
|
||||||
prevention: [],
|
|
||||||
lookalikeIds: [],
|
|
||||||
severity,
|
|
||||||
sourceUrl: entry.sourceUrl,
|
|
||||||
imageUrl: "",
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
await db.insert(diseases).values(values).onConflictDoNothing();
|
|
||||||
inserted += values.length;
|
|
||||||
} catch (err) {
|
|
||||||
// Fall back to individual inserts for this batch if batch fails
|
|
||||||
console.log(` Batch failed, trying individually...`);
|
|
||||||
for (const val of values) {
|
|
||||||
try {
|
|
||||||
await db.insert(diseases).values(val).onConflictDoNothing();
|
|
||||||
inserted++;
|
|
||||||
} catch (e2) {
|
|
||||||
// If it's a duplicate key, count it as skipped
|
|
||||||
if (String(e2).includes("UNIQUE") || String(e2).includes("duplicate")) {
|
|
||||||
// Already handled by onConflictDoNothing, shouldn't happen
|
|
||||||
inserted++;
|
|
||||||
} else {
|
|
||||||
console.error(` Error inserting "${val.name}":`, e2);
|
|
||||||
errors++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((i + BATCH_SIZE) % 200 === 0 || i + BATCH_SIZE >= newEntries.length) {
|
|
||||||
console.log(
|
|
||||||
` Progress: ${Math.min(i + BATCH_SIZE, newEntries.length)}/${newEntries.length} (${inserted} inserted, ${errors} errors)`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 6. Summary
|
|
||||||
const totalDiseases = await db
|
|
||||||
.select({ count: sql<number>`COUNT(*)` })
|
|
||||||
.from(diseases)
|
|
||||||
.get();
|
|
||||||
const totalPlants = await db
|
|
||||||
.select({ count: sql<number>`COUNT(*)` })
|
|
||||||
.from(plants)
|
|
||||||
.get();
|
|
||||||
|
|
||||||
console.log(`\n📊 Results:`);
|
|
||||||
console.log(` Inserted: ${inserted}`);
|
|
||||||
console.log(` Errors: ${errors}`);
|
|
||||||
console.log(` Skipped (already existed): ${skipped.length}`);
|
|
||||||
console.log(`\n📊 Database now has:`);
|
|
||||||
console.log(` ${totalPlants?.count ?? 0} plants`);
|
|
||||||
console.log(` ${totalDiseases?.count ?? 0} diseases`);
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("❌ Failed:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,414 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-brave-images-v2.ts — Brave Image Search for remaining disease images.
|
|
||||||
*
|
|
||||||
* Prioritizes by severity (critical → high → moderate → low).
|
|
||||||
* Runs at 1 request/sec (Brave free tier rate limit).
|
|
||||||
* Updates Turso DB directly with found images.
|
|
||||||
* When current key is exhausted, prompts for next key.
|
|
||||||
* Falls back to duckduckgo-images-api when all keys are spent.
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* cd apps/web && npx tsx scripts/fill-brave-images-v2.ts
|
|
||||||
*
|
|
||||||
* Pass additional API keys as args:
|
|
||||||
* npx tsx scripts/fill-brave-images-v2.ts KEY2 KEY3
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { readFileSync, writeFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
|
|
||||||
// Load env
|
|
||||||
const envPath = resolve(__dirname, "../.env.development");
|
|
||||||
try {
|
|
||||||
const env = readFileSync(envPath, "utf-8");
|
|
||||||
for (const line of env.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed && !trimmed.startsWith("#")) {
|
|
||||||
const eqIdx = trimmed.indexOf("=");
|
|
||||||
if (eqIdx > 0) {
|
|
||||||
const key = trimmed.slice(0, eqIdx).trim();
|
|
||||||
const val = trimmed.slice(eqIdx + 1).trim();
|
|
||||||
if (!process.env[key]) process.env[key] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
// Also try .env.local for BRAVE_API_KEY
|
|
||||||
try {
|
|
||||||
const envLocal = readFileSync(resolve(__dirname, "../.env.local"), "utf-8");
|
|
||||||
for (const line of envLocal.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed.startsWith("BRAVE_API_KEY=")) {
|
|
||||||
const val = trimmed.slice("BRAVE_API_KEY=".length).trim();
|
|
||||||
if (!process.env.BRAVE_API_KEY) process.env.BRAVE_API_KEY = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { diseases } from "../src/lib/db/schema";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
|
|
||||||
interface DiseaseRow {
|
|
||||||
id: string;
|
|
||||||
name: string;
|
|
||||||
scientificName: string;
|
|
||||||
severity: string;
|
|
||||||
plantId: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Config ──────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const BRAVE_DELAY = 1100; // ms between calls (1 req/sec)
|
|
||||||
const DB_FLUSH_BATCH = 50;
|
|
||||||
const MAX_PER_KEY = 1800; // Leave 200 buffer of the 2000/mo limit
|
|
||||||
const STATE_FILE = resolve(__dirname, ".brave-progress.json");
|
|
||||||
|
|
||||||
let currentKeyIndex = 0;
|
|
||||||
let braveKeys: string[] = [];
|
|
||||||
let callsThisKey = 0;
|
|
||||||
let totalFound = 0;
|
|
||||||
// totalSkipped tracking removed — not needed for v2
|
|
||||||
|
|
||||||
// ─── State persistence ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
interface RunState {
|
|
||||||
processedIds: string[];
|
|
||||||
currentKeyIndex: number;
|
|
||||||
callsThisKey: number;
|
|
||||||
totalFound: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
function loadState(): RunState | null {
|
|
||||||
try {
|
|
||||||
return JSON.parse(readFileSync(STATE_FILE, "utf-8"));
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function saveState(processedIds: string[]) {
|
|
||||||
writeFileSync(
|
|
||||||
STATE_FILE,
|
|
||||||
JSON.stringify(
|
|
||||||
{
|
|
||||||
processedIds,
|
|
||||||
currentKeyIndex,
|
|
||||||
callsThisKey,
|
|
||||||
totalFound,
|
|
||||||
},
|
|
||||||
null,
|
|
||||||
2,
|
|
||||||
),
|
|
||||||
"utf-8",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Brave API ───────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function braveImageSearch(query: string): Promise<string | null> {
|
|
||||||
const key = braveKeys[currentKeyIndex];
|
|
||||||
if (!key) return null;
|
|
||||||
|
|
||||||
const url = new URL("https://api.search.brave.com/res/v1/images/search");
|
|
||||||
url.searchParams.set("q", query);
|
|
||||||
url.searchParams.set("count", "3");
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetch(url.toString(), {
|
|
||||||
headers: { "X-Subscription-Token": key, Accept: "application/json" },
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.status === 429) {
|
|
||||||
console.log("\n [RATE LIMITED] Key " + (currentKeyIndex + 1) + " exhausted!");
|
|
||||||
return "RATE_LIMITED";
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
|
|
||||||
callsThisKey++;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
|
|
||||||
};
|
|
||||||
const results = data?.results ?? [];
|
|
||||||
if (results.length === 0) return null;
|
|
||||||
|
|
||||||
// Prefer non-stock images
|
|
||||||
for (const r of results) {
|
|
||||||
const src = r.thumbnail?.src ?? r.url;
|
|
||||||
if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) {
|
|
||||||
return src;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results[0].thumbnail?.src ?? results[0].url;
|
|
||||||
} catch {
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── DuckDuckGo fallback ────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function ddgFallbackSearch(query: string): Promise<string | null> {
|
|
||||||
try {
|
|
||||||
// Try to use duckduckgo-images-api if installed
|
|
||||||
const ddg = await import("duckduckgo-images-api").catch(() => null);
|
|
||||||
if (ddg) {
|
|
||||||
const results = await ddg.image_search({ query, moderate: true });
|
|
||||||
if (results && results.length > 0) {
|
|
||||||
for (const r of results) {
|
|
||||||
if (r.image && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(r.image)) {
|
|
||||||
return r.image;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results[0].image || null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// duckduckgo-images-api not installed
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\n🔍 Brave Disease Image Filler v2\n");
|
|
||||||
|
|
||||||
// Parse keys from args + env
|
|
||||||
const argsKeys = process.argv.slice(2).filter((a) => !a.startsWith("-"));
|
|
||||||
const envKey = process.env.BRAVE_API_KEY;
|
|
||||||
braveKeys = [envKey, ...argsKeys].filter(Boolean) as string[];
|
|
||||||
braveKeys = [...new Set(braveKeys)]; // dedup
|
|
||||||
|
|
||||||
if (braveKeys.length === 0) {
|
|
||||||
console.log("❌ No Brave API keys found.");
|
|
||||||
console.log(" Set BRAVE_API_KEY in .env.local or pass as argument.\n");
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
console.log(`🔑 ${braveKeys.length} Brave API key(s) available\n`);
|
|
||||||
|
|
||||||
// Load state
|
|
||||||
const state = loadState();
|
|
||||||
if (state) {
|
|
||||||
currentKeyIndex = state.currentKeyIndex;
|
|
||||||
callsThisKey = state.callsThisKey;
|
|
||||||
totalFound = state.totalFound;
|
|
||||||
console.log(
|
|
||||||
`📋 Resuming from previous run (${state.processedIds.length} processed, ${totalFound} found)\n`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get diseases from DB
|
|
||||||
const db = getDb();
|
|
||||||
const allDiseases = (await db
|
|
||||||
.select({
|
|
||||||
id: diseases.id,
|
|
||||||
name: diseases.name,
|
|
||||||
scientificName: diseases.scientificName,
|
|
||||||
severity: diseases.severity,
|
|
||||||
plantId: diseases.plantId,
|
|
||||||
})
|
|
||||||
.from(diseases)
|
|
||||||
.where(sql`(image_url IS NULL OR image_url = '')`)
|
|
||||||
.all()) as DiseaseRow[];
|
|
||||||
|
|
||||||
console.log(`📋 ${allDiseases.length} diseases need images\n`);
|
|
||||||
|
|
||||||
if (allDiseases.length === 0) {
|
|
||||||
console.log("✅ All diseases already have images!\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort by severity priority
|
|
||||||
const severityOrder = { critical: 0, high: 1, moderate: 2, low: 3 };
|
|
||||||
allDiseases.sort(
|
|
||||||
(a, b) =>
|
|
||||||
(severityOrder[a.severity as keyof typeof severityOrder] || 99) -
|
|
||||||
(severityOrder[b.severity as keyof typeof severityOrder] || 99),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Filter out already-processed from state
|
|
||||||
const processedSet = new Set(state?.processedIds || []);
|
|
||||||
const pending = allDiseases.filter((d) => !processedSet.has(d.id));
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`📊 Prioritization: critical=${allDiseases.filter((d) => d.severity === "critical" && !processedSet.has(d.id)).length}, high=${allDiseases.filter((d) => d.severity === "high" && !processedSet.has(d.id)).length}, moderate=${allDiseases.filter((d) => d.severity === "moderate" && !processedSet.has(d.id)).length}, low=${allDiseases.filter((d) => d.severity === "low" && !processedSet.has(d.id)).length}\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (pending.length === 0) {
|
|
||||||
console.log("✅ All remaining diseases already attempted\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const raw = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
let updates: Array<{ id: string; url: string }> = [];
|
|
||||||
const processedIds: string[] = state?.processedIds || [];
|
|
||||||
let found = totalFound;
|
|
||||||
let ddgMode = false;
|
|
||||||
|
|
||||||
for (let i = 0; i < pending.length; i++) {
|
|
||||||
const d = pending[i];
|
|
||||||
|
|
||||||
// Check if current key needs rotating
|
|
||||||
if (!ddgMode && callsThisKey >= MAX_PER_KEY) {
|
|
||||||
if (currentKeyIndex < braveKeys.length - 1) {
|
|
||||||
currentKeyIndex++;
|
|
||||||
callsThisKey = 0;
|
|
||||||
console.log(`\n 🔄 Rotating to key ${currentKeyIndex + 1}/${braveKeys.length}\n`);
|
|
||||||
} else {
|
|
||||||
console.log(
|
|
||||||
`\n ⚠️ All ${braveKeys.length} Brave keys exhausted. Switching to DuckDuckGo fallback.\n`,
|
|
||||||
);
|
|
||||||
ddgMode = true;
|
|
||||||
// Install duckduckgo-images-api if not available
|
|
||||||
try {
|
|
||||||
await import("duckduckgo-images-api");
|
|
||||||
} catch {
|
|
||||||
console.log(" Installing duckduckgo-images-api...");
|
|
||||||
const { execSync } = await import("child_process");
|
|
||||||
execSync("npm install duckduckgo-images-api", {
|
|
||||||
cwd: resolve(__dirname, ".."),
|
|
||||||
stdio: "pipe",
|
|
||||||
});
|
|
||||||
console.log(" Done.\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build search query
|
|
||||||
const plantName = d.plantId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
|
|
||||||
const query = `${d.name} ${d.scientificName} ${plantName} plant disease`;
|
|
||||||
const sev = d.severity.padEnd(8);
|
|
||||||
|
|
||||||
process.stdout.write(
|
|
||||||
` [${String(i + 1).padStart(4)}/${pending.length}] [${sev}] ${d.name.substring(0, 40).padEnd(42)} `,
|
|
||||||
);
|
|
||||||
|
|
||||||
let url: string | null = null;
|
|
||||||
|
|
||||||
if (ddgMode) {
|
|
||||||
url = await ddgFallbackSearch(query);
|
|
||||||
if (!url) {
|
|
||||||
// Try a simpler query
|
|
||||||
url = await ddgFallbackSearch(`${d.name} disease`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
url = await braveImageSearch(query);
|
|
||||||
if (url === "RATE_LIMITED") {
|
|
||||||
// Key exhausted mid-query, try next
|
|
||||||
if (currentKeyIndex < braveKeys.length - 1) {
|
|
||||||
currentKeyIndex++;
|
|
||||||
callsThisKey = 0;
|
|
||||||
console.log("\n 🔄 Rotating key...");
|
|
||||||
url = await braveImageSearch(query);
|
|
||||||
} else {
|
|
||||||
console.log("\n ⚠️ All keys exhausted mid-batch!");
|
|
||||||
ddgMode = true;
|
|
||||||
url = await ddgFallbackSearch(query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (url) {
|
|
||||||
updates.push({ id: d.id, url });
|
|
||||||
found++;
|
|
||||||
processedIds.push(d.id);
|
|
||||||
console.log("✅");
|
|
||||||
} else {
|
|
||||||
processedIds.push(d.id); // Mark as attempted even if not found
|
|
||||||
console.log("❌");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush to DB
|
|
||||||
if (updates.length >= DB_FLUSH_BATCH) {
|
|
||||||
await raw.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
updates = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save state every 50
|
|
||||||
if ((i + 1) % 50 === 0) {
|
|
||||||
saveState(processedIds);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rate limit (even for DDG to be polite)
|
|
||||||
await new Promise((r) => setTimeout(r, ddgMode ? 500 : BRAVE_DELAY));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final flush
|
|
||||||
if (updates.length > 0) {
|
|
||||||
await raw.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
}
|
|
||||||
|
|
||||||
saveState(processedIds);
|
|
||||||
raw.close();
|
|
||||||
|
|
||||||
// Final report
|
|
||||||
const finalList = await db
|
|
||||||
.select({ id: diseases.id, name: diseases.name, imageUrl: diseases.imageUrl })
|
|
||||||
.from(diseases)
|
|
||||||
.all();
|
|
||||||
const w = finalList.filter((d) => d.imageUrl);
|
|
||||||
const wo = finalList.filter((d) => !d.imageUrl);
|
|
||||||
|
|
||||||
console.log(`\n${"═".repeat(50)}`);
|
|
||||||
console.log(`📊 BRAVE IMAGE SEARCH COMPLETE`);
|
|
||||||
console.log(`${"═".repeat(50)}`);
|
|
||||||
console.log(` Processed: ${pending.length}`);
|
|
||||||
console.log(` Found this run: ${found - totalFound}`);
|
|
||||||
console.log(` Total with images: ${w.length}/${finalList.length}`);
|
|
||||||
console.log(` Still missing: ${wo.length}`);
|
|
||||||
console.log(` Brave keys used: ${currentKeyIndex + 1}`);
|
|
||||||
console.log(` Calls on current key: ${callsThisKey}`);
|
|
||||||
console.log(` DuckDuckGo mode: ${ddgMode}`);
|
|
||||||
|
|
||||||
if (wo.length > 0) {
|
|
||||||
const rp = resolve(__dirname, ".disease-image-review-needed.md");
|
|
||||||
let report = "# Disease Images - Still Missing\n\n";
|
|
||||||
report += `Generated: ${new Date().toISOString()}\n\n`;
|
|
||||||
report += `## Summary\n\n`;
|
|
||||||
report += `- Total: ${finalList.length}\n`;
|
|
||||||
report += `- With images: ${w.length}\n`;
|
|
||||||
report += `- Still missing: ${wo.length}\n\n`;
|
|
||||||
report += `## Missing Diseases\n\n`;
|
|
||||||
for (const d of wo) {
|
|
||||||
report += `- ${d.name} (\`${d.id}\`)\n`;
|
|
||||||
}
|
|
||||||
writeFileSync(rp, report, "utf-8");
|
|
||||||
console.log(`\n📝 Report: ${rp}`);
|
|
||||||
} else {
|
|
||||||
console.log("\n✅ ALL diseases now have images!");
|
|
||||||
}
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
console.log("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("\n❌", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,152 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-brave-images.ts — Brave-only pass for remaining disease images.
|
|
||||||
*
|
|
||||||
* Runs at 1 request/sec (Brave rate limit).
|
|
||||||
* Updates diseases.json and Turso DB.
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/fill-brave-images.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import dotenv from "dotenv"; dotenv.config({ path: resolve(__dirname, "../.env.local") });
|
|
||||||
import { readFileSync, writeFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { closeDb } from "../src/lib/db/index";
|
|
||||||
|
|
||||||
const DISEASES_JSON = resolve(__dirname, "../src/data/diseases.json");
|
|
||||||
const BRAVE_KEY = process.env.BRAVE_API_KEY ?? "";
|
|
||||||
|
|
||||||
interface DiseaseSeed {
|
|
||||||
id: string;
|
|
||||||
plantId: string;
|
|
||||||
name: string;
|
|
||||||
scientificName: string;
|
|
||||||
imageUrl?: string;
|
|
||||||
[key: string]: unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
function load(): DiseaseSeed[] {
|
|
||||||
return JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
|
|
||||||
}
|
|
||||||
|
|
||||||
async function searchBraveImage(query: string): Promise<string | null> {
|
|
||||||
const url = new URL("https://api.search.brave.com/res/v1/images/search");
|
|
||||||
url.searchParams.set("q", query);
|
|
||||||
url.searchParams.set("count", "3");
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetch(url.toString(), {
|
|
||||||
headers: { "X-Subscription-Token": BRAVE_KEY, Accept: "application/json" },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await new Promise((r) => setTimeout(r, 5000 * 2 ** attempt));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
|
|
||||||
};
|
|
||||||
const results = data?.results ?? [];
|
|
||||||
if (results.length === 0) return null;
|
|
||||||
|
|
||||||
// Prefer non-stock direct-looking images
|
|
||||||
for (const r of results) {
|
|
||||||
const src = r.thumbnail?.src ?? r.url;
|
|
||||||
if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) return src;
|
|
||||||
}
|
|
||||||
return results[0].thumbnail?.src ?? results[0].url;
|
|
||||||
} catch {
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\n🔍 Brave Image Search — remaining disease images\n");
|
|
||||||
|
|
||||||
if (!BRAVE_KEY) {
|
|
||||||
console.log("❌ No BRAVE_API_KEY in .env.local\n");
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const diseases = load();
|
|
||||||
const pending = diseases.filter((d) => !d.imageUrl);
|
|
||||||
console.log(`📋 ${pending.length} diseases need images\n`);
|
|
||||||
|
|
||||||
let found = 0;
|
|
||||||
|
|
||||||
for (let i = 0; i < pending.length; i++) {
|
|
||||||
const d = pending[i];
|
|
||||||
const plant = diseases.find((p) => p.id === d.plantId);
|
|
||||||
const plantName = plant?.name ?? d.plantId;
|
|
||||||
const query = `${d.name} ${plantName} plant disease symptom`;
|
|
||||||
|
|
||||||
process.stdout.write(` [${String(i + 1).padStart(2, " ")}/${pending.length}] ${d.name.padEnd(35)} `);
|
|
||||||
|
|
||||||
const url = await searchBraveImage(query);
|
|
||||||
if (url) {
|
|
||||||
d.imageUrl = url;
|
|
||||||
found++;
|
|
||||||
console.log(`✅`);
|
|
||||||
} else {
|
|
||||||
console.log(`❌`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1 req/sec rate limit
|
|
||||||
await new Promise((r) => setTimeout(r, 1100));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write updated JSON
|
|
||||||
writeFileSync(DISEASES_JSON, JSON.stringify(diseases, null, 2) + "\n", "utf-8");
|
|
||||||
console.log(`\n✅ diseases.json updated: ${found}/${pending.length} images found\n`);
|
|
||||||
|
|
||||||
// Update DB
|
|
||||||
try {
|
|
||||||
const dbUrl = process.env.DATABASE_URL;
|
|
||||||
const dbToken = process.env.DATABASE_TOKEN;
|
|
||||||
if (dbUrl && dbToken) {
|
|
||||||
const raw = createClient({ url: dbUrl, authToken: dbToken });
|
|
||||||
const updates = pending.filter((d) => d.imageUrl);
|
|
||||||
for (let i = 0; i < updates.length; i += 50) {
|
|
||||||
await raw.batch(
|
|
||||||
updates.slice(i, i + 50).map((d) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ? WHERE id = ?",
|
|
||||||
args: [d.imageUrl!, d.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
raw.close();
|
|
||||||
console.log(`✅ Turso DB updated: ${updates.length} rows`);
|
|
||||||
} else {
|
|
||||||
console.log("⏭️ Skipping DB — no DATABASE_URL/TOKEN");
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.log(` ⚠️ DB: ${err instanceof Error ? err.message : err}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Summary
|
|
||||||
const finalDiseases = JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
|
|
||||||
const stillMissing = finalDiseases.filter((d) => !d.imageUrl);
|
|
||||||
console.log(`\n${"═".repeat(50)}`);
|
|
||||||
console.log(`📊 FINAL: ${finalDiseases.length} total`);
|
|
||||||
console.log(` With images: ${finalDiseases.length - stillMissing.length}`);
|
|
||||||
console.log(` Still missing: ${stillMissing.length}`);
|
|
||||||
if (stillMissing.length > 0) {
|
|
||||||
console.log(`\nStill need human curation:`);
|
|
||||||
for (const d of stillMissing) {
|
|
||||||
console.log(` ❌ ${d.name} (${d.id})`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(`${"═".repeat(50)}\n`);
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("\n❌ Fatal:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,268 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-ddg-images.ts — DuckDuckGo Image Search for remaining disease images.
|
|
||||||
*
|
|
||||||
* No API key needed. Searches DuckDuckGo Images API for each disease
|
|
||||||
* without an image and updates the Turso DB.
|
|
||||||
*
|
|
||||||
* Prioritizes by severity (critical → high → moderate → low).
|
|
||||||
* Runs at 1 request/sec to be polite to DuckDuckGo.
|
|
||||||
* Resumable via state file (scripts/.ddg-progress.json).
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* cd apps/web && npx tsx scripts/fill-ddg-images.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { readFileSync, writeFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
|
|
||||||
// Load .env.development for DB creds
|
|
||||||
const envPath = resolve(__dirname, "../.env.development");
|
|
||||||
try {
|
|
||||||
const env = readFileSync(envPath, "utf-8");
|
|
||||||
for (const line of env.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed && !trimmed.startsWith("#")) {
|
|
||||||
const eqIdx = trimmed.indexOf("=");
|
|
||||||
if (eqIdx > 0) {
|
|
||||||
const key = trimmed.slice(0, eqIdx).trim();
|
|
||||||
const val = trimmed.slice(eqIdx + 1).trim();
|
|
||||||
if (!process.env[key]) process.env[key] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { diseases } from "../src/lib/db/schema";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
|
|
||||||
// DuckDuckGo
|
|
||||||
import { imageSearch } from "@mudbill/duckduckgo-images-api";
|
|
||||||
|
|
||||||
interface DiseaseRow {
|
|
||||||
id: string;
|
|
||||||
name: string;
|
|
||||||
scientificName: string;
|
|
||||||
severity: string;
|
|
||||||
plantId: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Config ──────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const POLITE_DELAY = 800; // ms between calls
|
|
||||||
const DB_FLUSH_BATCH = 50;
|
|
||||||
const STATE_FILE = resolve(__dirname, ".ddg-progress.json");
|
|
||||||
|
|
||||||
interface RunState {
|
|
||||||
processedIds: string[];
|
|
||||||
totalFound: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
function loadState(): RunState | null {
|
|
||||||
try {
|
|
||||||
return JSON.parse(readFileSync(STATE_FILE, "utf-8"));
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function saveState(processedIds: string[], totalFound: number) {
|
|
||||||
writeFileSync(STATE_FILE, JSON.stringify({ processedIds, totalFound }, null, 2), "utf-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── DuckDuckGo Search ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function searchImage(query: string): Promise<string | null> {
|
|
||||||
try {
|
|
||||||
const results = await imageSearch({ query, safe: true, iterations: 1, retries: 2 });
|
|
||||||
if (!results || results.length === 0) return null;
|
|
||||||
|
|
||||||
// Prefer non-stock images
|
|
||||||
for (const r of results) {
|
|
||||||
if (r.image && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(r.image)) {
|
|
||||||
return r.image;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results[0].image || results[0].thumbnail || null;
|
|
||||||
} catch {
|
|
||||||
// DuckDuckGo may block or timeout; silently skip
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\n🦆 DuckDuckGo Disease Image Filler\n");
|
|
||||||
|
|
||||||
const db = getDb();
|
|
||||||
|
|
||||||
// Load state
|
|
||||||
const state = loadState();
|
|
||||||
const processedSet = new Set(state?.processedIds || []);
|
|
||||||
const totalFoundPrev = state?.totalFound ?? 0;
|
|
||||||
|
|
||||||
// Get all diseases that still need images
|
|
||||||
const allDiseases = (await db
|
|
||||||
.select({
|
|
||||||
id: diseases.id,
|
|
||||||
name: diseases.name,
|
|
||||||
scientificName: diseases.scientificName,
|
|
||||||
severity: diseases.severity,
|
|
||||||
plantId: diseases.plantId,
|
|
||||||
})
|
|
||||||
.from(diseases)
|
|
||||||
.where(sql`(image_url IS NULL OR image_url = '')`)
|
|
||||||
.all()) as DiseaseRow[];
|
|
||||||
|
|
||||||
console.log(`📋 ${allDiseases.length} diseases need images\n`);
|
|
||||||
|
|
||||||
if (allDiseases.length === 0) {
|
|
||||||
console.log("✅ All diseases already have images!\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort by severity: critical > high > moderate > low
|
|
||||||
const severityOrder: Record<string, number> = { critical: 0, high: 1, moderate: 2, low: 3 };
|
|
||||||
allDiseases.sort((a, b) => (severityOrder[a.severity] ?? 99) - (severityOrder[b.severity] ?? 99));
|
|
||||||
|
|
||||||
// Filter out already-processed
|
|
||||||
const pending = allDiseases.filter((d) => !processedSet.has(d.id));
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`📊 Remaining: critical=${allDiseases.filter((d) => d.severity === "critical" && !processedSet.has(d.id)).length}, ` +
|
|
||||||
`high=${allDiseases.filter((d) => d.severity === "high" && !processedSet.has(d.id)).length}, ` +
|
|
||||||
`moderate=${allDiseases.filter((d) => d.severity === "moderate" && !processedSet.has(d.id)).length}, ` +
|
|
||||||
`low=${allDiseases.filter((d) => d.severity === "low" && !processedSet.has(d.id)).length}\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (pending.length === 0) {
|
|
||||||
console.log("✅ All remaining diseases already attempted\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const raw = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
const processedIds: string[] = state?.processedIds ?? [];
|
|
||||||
let found = totalFoundPrev;
|
|
||||||
let updates: Array<{ id: string; url: string }> = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < pending.length; i++) {
|
|
||||||
const d = pending[i];
|
|
||||||
const sev = d.severity.padEnd(8);
|
|
||||||
|
|
||||||
// Build search query — "[disease] on [plant]" phrasing for better specificity
|
|
||||||
const plantName = d.plantId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
|
|
||||||
const query1 = `${d.name} on ${plantName} plant disease`;
|
|
||||||
const query2 = `${d.scientificName || d.name} on ${plantName} disease`;
|
|
||||||
const query3 = `${d.name} plant disease ${plantName}`;
|
|
||||||
const query4 = `${d.name} plant`;
|
|
||||||
const query5 = `${d.name} symptom`;
|
|
||||||
|
|
||||||
process.stdout.write(
|
|
||||||
` [${String(i + 1).padStart(4)}/${pending.length}] [${sev}] ${d.name.substring(0, 42).padEnd(44)} `,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Try queries in order until we get a result
|
|
||||||
let url: string | null = null;
|
|
||||||
for (const q of [query1, query2, query3, query4, query5]) {
|
|
||||||
url = await searchImage(q);
|
|
||||||
if (url) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (url) {
|
|
||||||
updates.push({ id: d.id, url });
|
|
||||||
found++;
|
|
||||||
processedIds.push(d.id);
|
|
||||||
console.log("✅");
|
|
||||||
} else {
|
|
||||||
processedIds.push(d.id);
|
|
||||||
console.log("❌");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush to DB in batches
|
|
||||||
if (updates.length >= DB_FLUSH_BATCH) {
|
|
||||||
await raw.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
updates = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save state every 50
|
|
||||||
if ((i + 1) % 50 === 0) {
|
|
||||||
saveState(processedIds, found);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Be polite — 1 req/sec
|
|
||||||
await new Promise((r) => setTimeout(r, POLITE_DELAY));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final flush
|
|
||||||
if (updates.length > 0) {
|
|
||||||
await raw.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
}
|
|
||||||
|
|
||||||
saveState(processedIds, found);
|
|
||||||
raw.close();
|
|
||||||
|
|
||||||
// Final report
|
|
||||||
const finalList = await db
|
|
||||||
.select({ id: diseases.id, name: diseases.name, imageUrl: diseases.imageUrl })
|
|
||||||
.from(diseases)
|
|
||||||
.all();
|
|
||||||
const w = finalList.filter((d) => d.imageUrl);
|
|
||||||
const wo = finalList.filter((d) => !d.imageUrl);
|
|
||||||
|
|
||||||
console.log(`\n${"═".repeat(50)}`);
|
|
||||||
console.log(`🦆 DUCKDUCKGO SEARCH COMPLETE`);
|
|
||||||
console.log(`${"═".repeat(50)}`);
|
|
||||||
console.log(` Processed: ${pending.length}`);
|
|
||||||
console.log(` Found this run: ${found - totalFoundPrev}`);
|
|
||||||
console.log(` Total with images: ${w.length}/${finalList.length}`);
|
|
||||||
console.log(` Still missing: ${wo.length}`);
|
|
||||||
|
|
||||||
if (wo.length > 0) {
|
|
||||||
const reportPath = resolve(__dirname, ".ddg-image-review-needed.md");
|
|
||||||
let report = "# Disease Images - Still Missing (DDG)\n\n";
|
|
||||||
report += `Generated: ${new Date().toISOString()}\n\n`;
|
|
||||||
report += `## Summary\n\n`;
|
|
||||||
report += `- Total: ${finalList.length}\n`;
|
|
||||||
report += `- With images: ${w.length}\n`;
|
|
||||||
report += `- Still missing: ${wo.length}\n\n`;
|
|
||||||
report += `## Missing Diseases\n\n`;
|
|
||||||
for (const d of wo) {
|
|
||||||
report += `- ${d.name} (\`${d.id}\`)\n`;
|
|
||||||
}
|
|
||||||
writeFileSync(reportPath, report, "utf-8");
|
|
||||||
console.log(`\n📝 Missing report: ${reportPath}`);
|
|
||||||
} else {
|
|
||||||
console.log("\n✅ ALL diseases now have images!");
|
|
||||||
}
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
console.log();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("\n❌ Fatal:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,440 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-disease-images.ts — Three-stage disease image pipeline
|
|
||||||
*
|
|
||||||
* For every disease without an imageUrl, tries:
|
|
||||||
* Stage 1 — Wikipedia search → pageimages
|
|
||||||
* Stage 2 — Wikimedia Commons search
|
|
||||||
* Stage 3 — Brave Image Search API (fallback, 1 req/sec, 2000/mo)
|
|
||||||
*
|
|
||||||
* Updates both diseases.json (seed) and the Turso DB.
|
|
||||||
* Flags anything found only via Brave for human review.
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/fill-disease-images.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import "dotenv/config";
|
|
||||||
import { readFileSync, writeFileSync, existsSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { closeDb } from "../src/lib/db/index";
|
|
||||||
|
|
||||||
// ─── Types & Config ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
interface DiseaseSeed {
|
|
||||||
id: string;
|
|
||||||
plantId: string;
|
|
||||||
name: string;
|
|
||||||
scientificName: string;
|
|
||||||
commonName?: string;
|
|
||||||
[key: string]: unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ImageResult {
|
|
||||||
url: string;
|
|
||||||
source: "wikipedia" | "commons" | "brave" | "missing";
|
|
||||||
quality: "good" | "fallback" | "missing";
|
|
||||||
}
|
|
||||||
|
|
||||||
const DISEASES_JSON = resolve(__dirname, "../src/data/diseases.json");
|
|
||||||
const RESULTS_FILE = resolve(__dirname, ".image-results.json");
|
|
||||||
const REPORT_FILE = resolve(__dirname, ".image-review-needed.md");
|
|
||||||
|
|
||||||
const WIKI_API = "https://en.wikipedia.org/w/api.php";
|
|
||||||
const COMMONS_API = "https://commons.wikimedia.org/w/api.php";
|
|
||||||
const BRAVE_KEY = process.env.BRAVE_API_KEY ?? "";
|
|
||||||
const BRAVE_DELAY = 1100;
|
|
||||||
const MAX_BRAVE = 2000;
|
|
||||||
const UA = "PlantHealthKB/1.0 (plant-disease-id)";
|
|
||||||
const ORIGIN = "*";
|
|
||||||
|
|
||||||
let braveCount = 0;
|
|
||||||
|
|
||||||
// ─── Wikipedia Stage ─────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search Wikipedia and get thumbnails in ONE API call using generator=search.
|
|
||||||
* Returns first thumbnail found, or null.
|
|
||||||
*/
|
|
||||||
async function wikiSearchAndThumb(query: string): Promise<string | null> {
|
|
||||||
const params = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
generator: "search",
|
|
||||||
gsrsearch: query,
|
|
||||||
gsrlimit: "5",
|
|
||||||
prop: "pageimages",
|
|
||||||
pithumbsize: "600",
|
|
||||||
format: "json",
|
|
||||||
origin: ORIGIN,
|
|
||||||
});
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetchWithTimeout(`${WIKI_API}?${params}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await delay(3000 * 2 ** attempt);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
|
|
||||||
};
|
|
||||||
const pages = data?.query?.pages;
|
|
||||||
if (!pages) return null;
|
|
||||||
for (const [, p] of Object.entries(pages)) {
|
|
||||||
const src = (p as { thumbnail?: { source: string } })?.thumbnail?.source;
|
|
||||||
if (src) return src;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch {
|
|
||||||
await delay(2000);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to find a Wikipedia image for a disease.
|
|
||||||
* Uses generator=search which combines search + thumbnails in one call.
|
|
||||||
*/
|
|
||||||
async function wikiStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
|
|
||||||
// Try 1: disease name + plant name (most specific)
|
|
||||||
return wikiSearchAndThumb(`"${d.name}" ${plantName}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Commons Stage ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/** Fetch with timeout. Aborts after `ms` milliseconds. */
|
|
||||||
async function fetchWithTimeout(url: string, opts: RequestInit, ms = 15000): Promise<Response> {
|
|
||||||
const ctrl = new AbortController();
|
|
||||||
const timer = setTimeout(() => ctrl.abort(), ms);
|
|
||||||
try {
|
|
||||||
const res = await fetch(url, { ...opts, signal: ctrl.signal });
|
|
||||||
return res;
|
|
||||||
} finally {
|
|
||||||
clearTimeout(timer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function commonsSearchAndThumb(query: string): Promise<string | null> {
|
|
||||||
const params = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
list: "search",
|
|
||||||
srsearch: query,
|
|
||||||
srnamespace: "6",
|
|
||||||
srlimit: "5",
|
|
||||||
format: "json",
|
|
||||||
origin: ORIGIN,
|
|
||||||
});
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetchWithTimeout(`${COMMONS_API}?${params}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await delay(3000 * 2 ** attempt);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
query?: { search?: Array<{ pageid: number; title: string }> };
|
|
||||||
};
|
|
||||||
const hits = data?.query?.search ?? [];
|
|
||||||
if (hits.length === 0) return null;
|
|
||||||
|
|
||||||
// Batch-fetch imageinfo for all found page IDs
|
|
||||||
const pageids = hits.map((h) => h.pageid).join("|");
|
|
||||||
const imgParams = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
pageids,
|
|
||||||
prop: "imageinfo",
|
|
||||||
iiprop: "url",
|
|
||||||
iiurlwidth: "600",
|
|
||||||
format: "json",
|
|
||||||
origin: ORIGIN,
|
|
||||||
});
|
|
||||||
|
|
||||||
const imgRes = await fetchWithTimeout(`${COMMONS_API}?${imgParams}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (!imgRes.ok) return null;
|
|
||||||
const imgData = (await imgRes.json()) as {
|
|
||||||
query?: { pages?: Record<string, unknown> };
|
|
||||||
};
|
|
||||||
const imgPages = imgData?.query?.pages;
|
|
||||||
if (!imgPages) return null;
|
|
||||||
|
|
||||||
for (const [, pg] of Object.entries(imgPages)) {
|
|
||||||
const p = pg as Record<string, unknown>;
|
|
||||||
const info = (p.imageinfo as Array<Record<string, string>> | undefined)?.[0];
|
|
||||||
if (info?.thumburl) return info.thumburl as string;
|
|
||||||
if (info?.url) return info.url as string;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch {
|
|
||||||
await delay(2000);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function commonsStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
|
|
||||||
let q: string;
|
|
||||||
if (d.scientificName && !d.scientificName.includes("spp.") && !d.scientificName.includes("/")) {
|
|
||||||
q = `${d.scientificName} ${plantName}`;
|
|
||||||
} else {
|
|
||||||
q = `${d.name} ${plantName} disease`;
|
|
||||||
}
|
|
||||||
|
|
||||||
const url = await commonsSearchAndThumb(q);
|
|
||||||
return url ?? null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Brave Stage ─────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function braveStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
|
|
||||||
if (!BRAVE_KEY || braveCount >= MAX_BRAVE) return null;
|
|
||||||
|
|
||||||
const url = new URL("https://api.search.brave.com/res/v1/images/search");
|
|
||||||
url.searchParams.set("q", `${d.name} ${plantName} plant disease symptom`);
|
|
||||||
url.searchParams.set("count", "5");
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetchWithTimeout(url.toString(), {
|
|
||||||
headers: { "X-Subscription-Token": BRAVE_KEY, Accept: "application/json" },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await delay(5000 * 2 ** attempt);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
braveCount++;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
|
|
||||||
};
|
|
||||||
const results = data?.results ?? [];
|
|
||||||
if (results.length === 0) return null;
|
|
||||||
|
|
||||||
// Prefer non-stock thumbnails
|
|
||||||
for (const r of results) {
|
|
||||||
const src = r.thumbnail?.src ?? r.url;
|
|
||||||
if (src && !src.includes("dreamstime") && !src.includes("shutterstock") &&
|
|
||||||
!src.includes("alamy") && !src.includes("istock") && !src.includes("123rf")) {
|
|
||||||
return src;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results[0].thumbnail?.src ?? results[0].url;
|
|
||||||
} catch {
|
|
||||||
await delay(2000);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
function delay(ms: number): Promise<void> {
|
|
||||||
return new Promise((r) => setTimeout(r, ms));
|
|
||||||
}
|
|
||||||
|
|
||||||
function loadDiseases(): DiseaseSeed[] {
|
|
||||||
return JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
|
|
||||||
}
|
|
||||||
|
|
||||||
function getPlantName(diseases: DiseaseSeed[], diseaseId: string): string {
|
|
||||||
const plant = diseases.find((p) => p.id === diseaseId);
|
|
||||||
return plant?.commonName ?? plant?.name ?? diseaseId;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\n🔍 Plant Disease Image Filler\n");
|
|
||||||
|
|
||||||
const diseases = loadDiseases();
|
|
||||||
console.log(`📋 ${diseases.length} diseases loaded\n`);
|
|
||||||
|
|
||||||
// Load existing results
|
|
||||||
let results: Record<string, ImageResult> = {};
|
|
||||||
if (existsSync(RESULTS_FILE)) {
|
|
||||||
try { results = JSON.parse(readFileSync(RESULTS_FILE, "utf-8")); } catch { /* fresh */ }
|
|
||||||
}
|
|
||||||
|
|
||||||
const pending = diseases.filter((d) => {
|
|
||||||
if ((d.imageUrl as string)?.length) return false;
|
|
||||||
return !results[d.id];
|
|
||||||
});
|
|
||||||
|
|
||||||
if (pending.length === 0) {
|
|
||||||
console.log("✅ All done\n");
|
|
||||||
await applyResults(diseases, results);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`⏳ ${pending.length} need images\n`);
|
|
||||||
|
|
||||||
// ── Stage 1: Wikipedia ──────────────────────────────────────────────
|
|
||||||
const s1 = pending.filter((d) => !results[d.id]);
|
|
||||||
let s1ok = 0;
|
|
||||||
console.log("─── Wikipedia ───\n");
|
|
||||||
|
|
||||||
for (let i = 0; i < s1.length; i++) {
|
|
||||||
const d = s1[i];
|
|
||||||
const plantName = getPlantName(diseases, d.plantId);
|
|
||||||
const url = await wikiStage(d, plantName);
|
|
||||||
if (url) {
|
|
||||||
results[d.id] = { url, source: "wikipedia", quality: "good" };
|
|
||||||
s1ok++;
|
|
||||||
}
|
|
||||||
const pct = ((i + 1) / s1.length * 100).toFixed(0);
|
|
||||||
process.stdout.write(` [${pct}% ${i + 1}/${s1.length}] ${d.name.substring(0, 40).padEnd(42)} ${url ? "✅" : "⏭️"}\n`);
|
|
||||||
if ((i + 1) % 25 === 0) writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
||||||
console.log(`\n → ${s1ok}/${s1.length} found\n`);
|
|
||||||
|
|
||||||
// ── Stage 2: Commons ─────────────────────────────────────────────────
|
|
||||||
const s2 = pending.filter((d) => !results[d.id]);
|
|
||||||
let s2ok = 0;
|
|
||||||
|
|
||||||
if (s2.length > 0) {
|
|
||||||
console.log("─── Wikimedia Commons ───\n");
|
|
||||||
for (let i = 0; i < s2.length; i++) {
|
|
||||||
const d = s2[i];
|
|
||||||
const plantName = getPlantName(diseases, d.plantId);
|
|
||||||
let url: string | null = null;
|
|
||||||
try {
|
|
||||||
const result = await Promise.race([
|
|
||||||
commonsStage(d, plantName),
|
|
||||||
new Promise<null>((_, reject) => setTimeout(() => reject(new Error("timeout")), 25000)),
|
|
||||||
]);
|
|
||||||
url = result;
|
|
||||||
} catch { /* timeout */ }
|
|
||||||
if (url) {
|
|
||||||
results[d.id] = { url, source: "commons", quality: "good" };
|
|
||||||
s2ok++;
|
|
||||||
}
|
|
||||||
const pct = ((i + 1) / s2.length * 100).toFixed(0);
|
|
||||||
process.stdout.write(` [${pct}% ${i + 1}/${s2.length}] ${d.name.substring(0, 40).padEnd(42)} ${url ? "✅" : "⏭️"}\n`);
|
|
||||||
|
|
||||||
if ((i + 1) % 10 === 0) writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
||||||
}
|
|
||||||
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
||||||
console.log(`\n → ${s2ok}/${s2.length} found\n`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Stage 3: Brave ───────────────────────────────────────────────────
|
|
||||||
const s3 = pending.filter((d) => !results[d.id]);
|
|
||||||
let s3ok = 0;
|
|
||||||
|
|
||||||
if (s3.length > 0 && BRAVE_KEY) {
|
|
||||||
console.log("─── Brave Image Search ───\n");
|
|
||||||
for (const d of s3) {
|
|
||||||
if (braveCount >= MAX_BRAVE) {
|
|
||||||
results[d.id] = { url: "", source: "missing", quality: "missing" };
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const plantName = getPlantName(diseases, d.plantId);
|
|
||||||
const url = await braveStage(d, plantName);
|
|
||||||
if (url) {
|
|
||||||
results[d.id] = { url, source: "brave", quality: "fallback" };
|
|
||||||
s3ok++;
|
|
||||||
process.stdout.write(` ✅ ${d.name}\n`);
|
|
||||||
} else {
|
|
||||||
results[d.id] = { url: "", source: "missing", quality: "missing" };
|
|
||||||
process.stdout.write(` ❌ ${d.name}\n`);
|
|
||||||
}
|
|
||||||
await delay(BRAVE_DELAY);
|
|
||||||
}
|
|
||||||
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
|
|
||||||
console.log(`\n → ${s3ok}/${s3.length} found via Brave\n`);
|
|
||||||
} else if (s3.length > 0) {
|
|
||||||
console.log("─── Brave Image Search ─── → skipped (no key)\n");
|
|
||||||
for (const d of s3) results[d.id] = { url: "", source: "missing", quality: "missing" };
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Apply ───────────────────────────────────────────────────────────
|
|
||||||
await applyResults(diseases, results);
|
|
||||||
|
|
||||||
// ── Report ──────────────────────────────────────────────────────────
|
|
||||||
const good = Object.values(results).filter((r) => r.quality === "good").length;
|
|
||||||
const fallback = Object.values(results).filter((r) => r.quality === "fallback").length;
|
|
||||||
const missing = Object.values(results).filter((r) => r.quality === "missing").length;
|
|
||||||
|
|
||||||
let report = `# Disease Images — Human Review Needed\n\n`;
|
|
||||||
report += `Generated: ${new Date().toISOString()}\n\n`;
|
|
||||||
|
|
||||||
for (const [label, ids, type] of [
|
|
||||||
["Fallback (Brave)", Object.entries(results).filter(([, r]) => r.quality === "fallback").map(([id]) => id), "fallback"],
|
|
||||||
["Missing", Object.entries(results).filter(([, r]) => r.quality === "missing").map(([id]) => id), "missing"],
|
|
||||||
] as const) {
|
|
||||||
if (ids.length === 0) continue;
|
|
||||||
report += `## ${type === "fallback" ? "⚠️" : "🚫"} ${label}\n\n`;
|
|
||||||
for (const id of ids) {
|
|
||||||
const d = diseases.find((x) => x.id === id);
|
|
||||||
const r = results[id];
|
|
||||||
report += `- **${d?.name ?? id}** (${d?.scientificName ?? ""}) on \`${d?.plantId ?? ""}\``;
|
|
||||||
if (r?.url) report += `\n ${r.url}`;
|
|
||||||
report += `\n\n`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (good === diseases.length) report += `## ✅ All images found!\n`;
|
|
||||||
writeFileSync(REPORT_FILE, report, "utf-8");
|
|
||||||
console.log(`📝 Review report: ${REPORT_FILE}`);
|
|
||||||
|
|
||||||
console.log(`\n${"═".repeat(50)}`);
|
|
||||||
console.log(`📊 Total: ${diseases.length} Good: ${good} Fallback: ${fallback} Missing: ${missing}`);
|
|
||||||
console.log(` Brave calls: ${braveCount}`);
|
|
||||||
console.log(`${"═".repeat(50)}\n`);
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Apply results to JSON + DB ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function applyResults(diseases: DiseaseSeed[], results: Record<string, ImageResult>) {
|
|
||||||
const urlMap = new Map(
|
|
||||||
Object.entries(results).filter(([id, r]) => r.url.length > 0 && diseases.some((d) => d.id === id)),
|
|
||||||
);
|
|
||||||
if (urlMap.size === 0) return console.log("⏭️ No images to apply");
|
|
||||||
|
|
||||||
// JSON
|
|
||||||
let n = 0;
|
|
||||||
const updated = diseases.map((d) => {
|
|
||||||
const img = urlMap.get(d.id);
|
|
||||||
if (img) { n++; return { ...d, imageUrl: img.url, imageQuality: img.quality }; }
|
|
||||||
return d;
|
|
||||||
});
|
|
||||||
writeFileSync(DISEASES_JSON, JSON.stringify(updated, null, 2) + "\n");
|
|
||||||
console.log(`✅ diseases.json: ${n} images`);
|
|
||||||
|
|
||||||
// DB
|
|
||||||
try {
|
|
||||||
const dbUrl = process.env.DATABASE_URL;
|
|
||||||
const dbToken = process.env.DATABASE_TOKEN;
|
|
||||||
if (!dbUrl || !dbToken) return console.log(" ⏭️ DB: no DATABASE_URL/TOKEN");
|
|
||||||
const raw = createClient({ url: dbUrl, authToken: dbToken });
|
|
||||||
const entries = Array.from(urlMap.entries());
|
|
||||||
for (let i = 0; i < entries.length; i += 50) {
|
|
||||||
await raw.batch(
|
|
||||||
entries.slice(i, i + 50).map(([id, img]) => ({
|
|
||||||
sql: "UPDATE diseases SET image_url = ? WHERE id = ?",
|
|
||||||
args: [img.url, id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
raw.close();
|
|
||||||
console.log(`✅ Turso DB: ${entries.length} rows`);
|
|
||||||
} catch (err) {
|
|
||||||
console.log(` ⚠️ DB: ${err instanceof Error ? err.message : err}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => { console.error("\n❌", err); process.exit(1); });
|
|
||||||
@@ -1,301 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-plant-images-v2.ts — Batch Wikipedia image fetch for remaining plants.
|
|
||||||
*
|
|
||||||
* Phase 1: Query 50 scientific names at a time via pageimages.
|
|
||||||
* Phase 2: Query 50 common names at a time.
|
|
||||||
* Phase 3: Search individually for stragglers.
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/fill-plant-images-v2.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { readFileSync, writeFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
|
|
||||||
// Load env
|
|
||||||
const envPath = resolve(__dirname, "../.env.development");
|
|
||||||
try {
|
|
||||||
const env = readFileSync(envPath, "utf-8");
|
|
||||||
for (const line of env.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed && !trimmed.startsWith("#")) {
|
|
||||||
const eqIdx = trimmed.indexOf("=");
|
|
||||||
if (eqIdx > 0) {
|
|
||||||
const key = trimmed.slice(0, eqIdx).trim();
|
|
||||||
const val = trimmed.slice(eqIdx + 1).trim();
|
|
||||||
if (!process.env[key]) {
|
|
||||||
process.env[key] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {}
|
|
||||||
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { plants } from "../src/lib/db/schema";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
|
|
||||||
const API = "https://en.wikipedia.org/w/api.php";
|
|
||||||
const UA = "PlantHealthKB/1.0";
|
|
||||||
const BATCH = 50;
|
|
||||||
|
|
||||||
interface PlantRow {
|
|
||||||
id: string;
|
|
||||||
commonName: string;
|
|
||||||
scientificName: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function clean(s: string): string {
|
|
||||||
return s
|
|
||||||
.replace(/[xX]/g, "x")
|
|
||||||
.replace(/\s*spp\.?\s*/gi, "")
|
|
||||||
.replace(/[.\u00d7']/g, "")
|
|
||||||
.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchThumbs(titles: string[]): Promise<Map<string, string>> {
|
|
||||||
if (titles.length === 0) {
|
|
||||||
return new Map();
|
|
||||||
}
|
|
||||||
const p = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
titles: titles.join("|"),
|
|
||||||
prop: "pageimages",
|
|
||||||
pithumbsize: "400",
|
|
||||||
redirects: "1",
|
|
||||||
format: "json",
|
|
||||||
});
|
|
||||||
for (let a = 0; a < 3; a++) {
|
|
||||||
try {
|
|
||||||
const r = await fetch(API + "?" + p.toString(), {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (r.status === 429) {
|
|
||||||
await new Promise((rr) => setTimeout(rr, 5000 * Math.pow(2, a)));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!r.ok) {
|
|
||||||
return new Map();
|
|
||||||
}
|
|
||||||
const d = (await r.json()) as any;
|
|
||||||
const pages = d?.query?.pages;
|
|
||||||
if (!pages) {
|
|
||||||
return new Map();
|
|
||||||
}
|
|
||||||
const m = new Map<string, string>();
|
|
||||||
for (const [, pg] of Object.entries(pages)) {
|
|
||||||
const p2 = pg as any;
|
|
||||||
if (!p2.missing && p2.thumbnail?.source) {
|
|
||||||
m.set(p2.title.toLowerCase(), p2.thumbnail.source);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
} catch (e) {
|
|
||||||
await new Promise((rr) => setTimeout(rr, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new Map();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function searchOne(query: string): Promise<string | null> {
|
|
||||||
const p = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
generator: "search",
|
|
||||||
gsrsearch: query,
|
|
||||||
gsrlimit: "3",
|
|
||||||
prop: "pageimages",
|
|
||||||
pithumbsize: "400",
|
|
||||||
format: "json",
|
|
||||||
});
|
|
||||||
for (let a = 0; a < 3; a++) {
|
|
||||||
try {
|
|
||||||
const r = await fetch(API + "?" + p.toString(), {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (r.status === 429) {
|
|
||||||
await new Promise((rr) => setTimeout(rr, 5000 * Math.pow(2, a)));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!r.ok) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const d = (await r.json()) as any;
|
|
||||||
const pages = d?.query?.pages;
|
|
||||||
if (!pages) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
for (const [, pg] of Object.entries(pages)) {
|
|
||||||
const p2 = pg as any;
|
|
||||||
if (p2.thumbnail?.source) {
|
|
||||||
return p2.thumbnail.source;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch (e) {
|
|
||||||
await new Promise((rr) => setTimeout(rr, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function batchPhase(
|
|
||||||
plants: PlantRow[],
|
|
||||||
titleFn: (p: PlantRow) => string,
|
|
||||||
label: string,
|
|
||||||
dbClient: any,
|
|
||||||
): Promise<PlantRow[]> {
|
|
||||||
const remaining: PlantRow[] = [];
|
|
||||||
const updates: Array<{ id: string; url: string }> = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < plants.length; i += BATCH) {
|
|
||||||
const chunk = plants.slice(i, i + BATCH);
|
|
||||||
const titles = chunk.map(titleFn).filter((t) => t.length > 2);
|
|
||||||
console.log(
|
|
||||||
" [" +
|
|
||||||
label +
|
|
||||||
"] " +
|
|
||||||
(i + 1) +
|
|
||||||
"-" +
|
|
||||||
Math.min(i + BATCH, plants.length) +
|
|
||||||
"/" +
|
|
||||||
plants.length +
|
|
||||||
" ",
|
|
||||||
);
|
|
||||||
const imageMap = await fetchThumbs(titles);
|
|
||||||
let n = 0;
|
|
||||||
for (const pl of chunk) {
|
|
||||||
const t = titleFn(pl).toLowerCase();
|
|
||||||
const img = imageMap.get(t);
|
|
||||||
if (img) {
|
|
||||||
updates.push({ id: pl.id, url: img });
|
|
||||||
n++;
|
|
||||||
} else {
|
|
||||||
remaining.push(pl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(" found: " + n);
|
|
||||||
if (updates.length >= 100) {
|
|
||||||
await dbClient.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
updates.length = 0;
|
|
||||||
}
|
|
||||||
await new Promise((r) => setTimeout(r, 1500));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (updates.length > 0) {
|
|
||||||
await dbClient.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return remaining;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\nPlant Image Filler v2\n");
|
|
||||||
const db = getDb();
|
|
||||||
const allPlants = (await db
|
|
||||||
.select({
|
|
||||||
id: plants.id,
|
|
||||||
commonName: plants.commonName,
|
|
||||||
scientificName: plants.scientificName,
|
|
||||||
})
|
|
||||||
.from(plants)
|
|
||||||
.where(sql`(image_url IS NULL OR image_url = '')`)
|
|
||||||
.all()) as PlantRow[];
|
|
||||||
|
|
||||||
console.log("Plants needing images: " + allPlants.length + "\n");
|
|
||||||
if (allPlants.length === 0) {
|
|
||||||
console.log("All plants have images!\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const raw = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
let found = 0;
|
|
||||||
|
|
||||||
// Phase 1: Scientific name
|
|
||||||
console.log("--- Phase 1: Scientific names ---\n");
|
|
||||||
let remaining = await batchPhase(allPlants, (p) => clean(p.scientificName), "sci", raw);
|
|
||||||
|
|
||||||
// Phase 2: Common name
|
|
||||||
if (remaining.length > 0) {
|
|
||||||
console.log("\n--- Phase 2: Common names (" + remaining.length + ") ---\n");
|
|
||||||
remaining = await batchPhase(remaining, (p) => p.commonName, "common", raw);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Phase 3: Search
|
|
||||||
if (remaining.length > 0) {
|
|
||||||
console.log("\n--- Phase 3: Search (" + remaining.length + ") ---\n");
|
|
||||||
for (let i = 0; i < remaining.length; i++) {
|
|
||||||
const pl = remaining[i];
|
|
||||||
const q = clean(pl.scientificName) + " " + pl.commonName;
|
|
||||||
console.log(" [" + (i + 1) + "/" + remaining.length + "] " + pl.commonName);
|
|
||||||
const img = await searchOne(q);
|
|
||||||
if (img) {
|
|
||||||
await raw.execute({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [img, pl.id],
|
|
||||||
});
|
|
||||||
found++;
|
|
||||||
console.log(" OK");
|
|
||||||
} else {
|
|
||||||
console.log(" MISS");
|
|
||||||
}
|
|
||||||
await new Promise((r) => setTimeout(r, 500));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
raw.close();
|
|
||||||
|
|
||||||
// Report
|
|
||||||
const finalList = await db
|
|
||||||
.select({
|
|
||||||
id: plants.id,
|
|
||||||
commonName: plants.commonName,
|
|
||||||
imageUrl: plants.imageUrl,
|
|
||||||
})
|
|
||||||
.from(plants)
|
|
||||||
.all();
|
|
||||||
const w = finalList.filter((p) => p.imageUrl);
|
|
||||||
const wo = finalList.filter((p) => !p.imageUrl);
|
|
||||||
|
|
||||||
console.log("\n" + "=".repeat(50));
|
|
||||||
console.log("FINAL: " + finalList.length + " plants");
|
|
||||||
console.log(" With images: " + w.length);
|
|
||||||
console.log(" Missing: " + wo.length);
|
|
||||||
|
|
||||||
if (wo.length > 0) {
|
|
||||||
const rp = resolve(__dirname, ".plant-image-review-needed.md");
|
|
||||||
let report = "# Plant Images - Still Missing\n\n";
|
|
||||||
report += "Generated: " + new Date().toISOString() + "\n\n";
|
|
||||||
report += "## Missing (" + wo.length + ")\n\n";
|
|
||||||
for (const p of wo) {
|
|
||||||
report += "- " + p.commonName + " (" + p.id + ")\n";
|
|
||||||
}
|
|
||||||
writeFileSync(rp, report, "utf-8");
|
|
||||||
console.log("Report: " + rp);
|
|
||||||
} else {
|
|
||||||
console.log("\nALL PLANTS HAVE IMAGES!");
|
|
||||||
}
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err: any) => {
|
|
||||||
console.error("Error:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,308 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fill-plant-images.ts — Fetch plant images from Wikipedia for plants missing them.
|
|
||||||
*
|
|
||||||
* Uses the Wikipedia API to search for the plant's scientific name
|
|
||||||
* and grab the page thumbnail.
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/fill-plant-images.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { readFileSync, writeFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
|
|
||||||
// Load env
|
|
||||||
const envPath = resolve(__dirname, "../.env.development");
|
|
||||||
try {
|
|
||||||
const env = readFileSync(envPath, "utf-8");
|
|
||||||
for (const line of env.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed && !trimmed.startsWith("#")) {
|
|
||||||
const eqIdx = trimmed.indexOf("=");
|
|
||||||
if (eqIdx > 0) {
|
|
||||||
const key = trimmed.slice(0, eqIdx).trim();
|
|
||||||
const val = trimmed.slice(eqIdx + 1).trim();
|
|
||||||
if (!process.env[key]) process.env[key] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { plants } from "../src/lib/db/schema";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
|
|
||||||
const WIKI_API = "https://en.wikipedia.org/w/api.php";
|
|
||||||
const UA = "PlantHealthKB/1.0 (plant-images)";
|
|
||||||
const DELAY_MS = 500;
|
|
||||||
const BATCH_SIZE = 50;
|
|
||||||
|
|
||||||
/** Direct page lookup by title — more reliable for known scientific names. */
|
|
||||||
async function directPageLookup(title: string): Promise<string | null> {
|
|
||||||
const params = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
titles: title,
|
|
||||||
prop: "pageimages",
|
|
||||||
pithumbsize: "400",
|
|
||||||
format: "json",
|
|
||||||
origin: "*",
|
|
||||||
});
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetch(`${WIKI_API}?${params}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await new Promise((r) => setTimeout(r, 3000 * 2 ** attempt));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) return null;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
query?: { pages?: Record<string, { thumbnail?: { source: string }; missing?: boolean }> };
|
|
||||||
};
|
|
||||||
const pages = data?.query?.pages;
|
|
||||||
if (!pages) return null;
|
|
||||||
for (const [, p] of Object.entries(pages)) {
|
|
||||||
if (!p.missing && p.thumbnail?.source) return p.thumbnail.source;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch {
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("\n🌿 Fetching plant images from Wikipedia\n");
|
|
||||||
|
|
||||||
const db = getDb();
|
|
||||||
const allPlants = await db
|
|
||||||
.select({ id: plants.id, commonName: plants.commonName, scientificName: plants.scientificName })
|
|
||||||
.from(plants)
|
|
||||||
.where(sql`(image_url IS NULL OR image_url = '')`)
|
|
||||||
.all();
|
|
||||||
|
|
||||||
console.log(`📋 ${allPlants.length} plants need images\n`);
|
|
||||||
|
|
||||||
if (allPlants.length === 0) {
|
|
||||||
console.log("✅ All plants already have images!\n");
|
|
||||||
closeDb();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const rawClient = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
let found = 0;
|
|
||||||
const updates: { id: string; url: string }[] = [];
|
|
||||||
|
|
||||||
// Phase 1: Try direct page lookup by scientific name (most accurate)
|
|
||||||
console.log("─── Phase 1: Direct page lookup ───\n");
|
|
||||||
|
|
||||||
for (let i = 0; i < allPlants.length; i++) {
|
|
||||||
const plant = allPlants[i];
|
|
||||||
const sciName = plant.scientificName
|
|
||||||
.replace(/[×'"]/g, "")
|
|
||||||
.replace(/\s*spp\.?\s*/i, "")
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
process.stdout.write(
|
|
||||||
` [${String(i + 1).padStart(3)}/${allPlants.length}] ${plant.commonName.padEnd(30)} `,
|
|
||||||
);
|
|
||||||
|
|
||||||
let url: string | null = null;
|
|
||||||
|
|
||||||
// Try scientific name first
|
|
||||||
if (sciName && sciName !== "Unknown" && sciName !== "Various") {
|
|
||||||
url = await directPageLookup(sciName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try common name if scientific name didn't work
|
|
||||||
if (!url) {
|
|
||||||
url = await directPageLookup(plant.commonName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try genus name
|
|
||||||
if (!url && sciName) {
|
|
||||||
const genus = sciName.split(/\s+/)[0];
|
|
||||||
if (genus && genus.length > 3) {
|
|
||||||
url = await directPageLookup(genus);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (url) {
|
|
||||||
updates.push({ id: plant.id, url });
|
|
||||||
found++;
|
|
||||||
process.stdout.write("✅\n");
|
|
||||||
} else {
|
|
||||||
process.stdout.write("⏭️\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush to DB in batches
|
|
||||||
if (updates.length >= BATCH_SIZE) {
|
|
||||||
await rawClient.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
updates.length = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
await new Promise((r) => setTimeout(r, DELAY_MS));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush remaining
|
|
||||||
if (updates.length > 0) {
|
|
||||||
await rawClient.batch(
|
|
||||||
updates.map((u) => ({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [u.url, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
console.log(` → Flushed ${updates.length} to DB`);
|
|
||||||
updates.length = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\n✅ Phase 1 done: ${found}/${allPlants.length} plants got images\n`);
|
|
||||||
|
|
||||||
// Phase 2: Try remaining via search API
|
|
||||||
const stillMissing = await db
|
|
||||||
.select({ id: plants.id, commonName: plants.commonName, scientificName: plants.scientificName })
|
|
||||||
.from(plants)
|
|
||||||
.where(sql`(image_url IS NULL OR image_url = '')`)
|
|
||||||
.all();
|
|
||||||
|
|
||||||
if (stillMissing.length > 0) {
|
|
||||||
console.log(`─── Phase 2: Search API for ${stillMissing.length} remaining ───\n`);
|
|
||||||
|
|
||||||
for (let i = 0; i < stillMissing.length; i++) {
|
|
||||||
const plant = stillMissing[i];
|
|
||||||
const sciName = plant.scientificName.replace(/[×'"]/g, "").trim();
|
|
||||||
|
|
||||||
process.stdout.write(
|
|
||||||
` [${String(i + 1).padStart(3)}/${stillMissing.length}] ${plant.commonName.padEnd(30)} `,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Search with scientific name
|
|
||||||
const searchTerm = `${sciName} ${plant.commonName}`;
|
|
||||||
const params = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
list: "search",
|
|
||||||
srsearch: searchTerm,
|
|
||||||
srlimit: "3",
|
|
||||||
format: "json",
|
|
||||||
origin: "*",
|
|
||||||
});
|
|
||||||
|
|
||||||
let url: string | null = null;
|
|
||||||
for (let attempt = 0; attempt < 3; attempt++) {
|
|
||||||
try {
|
|
||||||
const res = await fetch(`${WIKI_API}?${params}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (res.status === 429) {
|
|
||||||
await new Promise((r) => setTimeout(r, 3000 * 2 ** attempt));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!res.ok) break;
|
|
||||||
const data = (await res.json()) as {
|
|
||||||
query?: { search?: Array<{ title: string; pageid: number }> };
|
|
||||||
};
|
|
||||||
const hits = data?.query?.search ?? [];
|
|
||||||
if (hits.length === 0) break;
|
|
||||||
|
|
||||||
// Get thumbnail for first result
|
|
||||||
for (const hit of hits) {
|
|
||||||
const pageParams = new URLSearchParams({
|
|
||||||
action: "query",
|
|
||||||
pageids: String(hit.pageid),
|
|
||||||
prop: "pageimages",
|
|
||||||
pithumbsize: "400",
|
|
||||||
format: "json",
|
|
||||||
origin: "*",
|
|
||||||
});
|
|
||||||
const pageRes = await fetch(`${WIKI_API}?${pageParams}`, {
|
|
||||||
headers: { "User-Agent": UA },
|
|
||||||
});
|
|
||||||
if (!pageRes.ok) continue;
|
|
||||||
const pageData = (await pageRes.json()) as {
|
|
||||||
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
|
|
||||||
};
|
|
||||||
const pages = pageData?.query?.pages;
|
|
||||||
if (!pages) continue;
|
|
||||||
for (const [, p] of Object.entries(pages)) {
|
|
||||||
if (p.thumbnail?.source) {
|
|
||||||
url = p.thumbnail.source;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (url) break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
} catch {
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (url) {
|
|
||||||
await rawClient.execute({
|
|
||||||
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [url, plant.id],
|
|
||||||
});
|
|
||||||
found++;
|
|
||||||
process.stdout.write("✅\n");
|
|
||||||
} else {
|
|
||||||
process.stdout.write("❌\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
await new Promise((r) => setTimeout(r, DELAY_MS));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final count
|
|
||||||
const final = await db
|
|
||||||
.select({ id: plants.id, commonName: plants.commonName, imageUrl: plants.imageUrl })
|
|
||||||
.from(plants)
|
|
||||||
.all();
|
|
||||||
const withImg = final.filter((p) => p.imageUrl);
|
|
||||||
const withoutImg = final.filter((p) => !p.imageUrl);
|
|
||||||
|
|
||||||
console.log(`\n${"═".repeat(50)}`);
|
|
||||||
console.log(`📊 FINAL: ${final.length} plants`);
|
|
||||||
console.log(` With images: ${withImg.length}`);
|
|
||||||
console.log(` Missing images: ${withoutImg.length}`);
|
|
||||||
|
|
||||||
if (withoutImg.length > 0) {
|
|
||||||
console.log(`\n📝 Plants still needing images:`);
|
|
||||||
withoutImg.forEach((p) => console.log(` ❌ ${p.id}: ${p.commonName}`));
|
|
||||||
// Save to file for reference
|
|
||||||
const reportPath = resolve(__dirname, ".plant-image-review-needed.md");
|
|
||||||
let report = "# Plant Images — Still Missing\n\n";
|
|
||||||
report += `Generated: ${new Date().toISOString()}\n\n`;
|
|
||||||
report += `## 🚫 Plants without images (${withoutImg.length})\n\n`;
|
|
||||||
for (const p of withoutImg) {
|
|
||||||
report += `- **${p.commonName}** (\`${p.id}\`)\n`;
|
|
||||||
}
|
|
||||||
writeFileSync(reportPath, report, "utf-8");
|
|
||||||
console.log(` 📝 Review report: ${reportPath}`);
|
|
||||||
} else {
|
|
||||||
console.log("\n✅ All plants now have images!");
|
|
||||||
}
|
|
||||||
|
|
||||||
rawClient.close();
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("\n❌", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -59,7 +59,7 @@ const TARGET_HEALTHY = 400;
|
|||||||
* Each disease is I/O-bound (HTTP requests), so high concurrency is safe.
|
* Each disease is I/O-bound (HTTP requests), so high concurrency is safe.
|
||||||
* The global DDG rate limiter prevents us from overwhelming DuckDuckGo.
|
* The global DDG rate limiter prevents us from overwhelming DuckDuckGo.
|
||||||
*/
|
*/
|
||||||
const DISEASE_CONCURRENCY = 20;
|
const DISEASE_CONCURRENCY = 50;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Max DDG requests per second (shared across all concurrent diseases).
|
* Max DDG requests per second (shared across all concurrent diseases).
|
||||||
@@ -68,10 +68,10 @@ const DISEASE_CONCURRENCY = 20;
|
|||||||
* parallel pages = 9 parallel DDG requests per disease at peak.
|
* parallel pages = 9 parallel DDG requests per disease at peak.
|
||||||
* The rate limiter serializes this so we don't get banned.
|
* The rate limiter serializes this so we don't get banned.
|
||||||
*/
|
*/
|
||||||
const DDG_RATE_LIMIT_RPS = 2;
|
const DDG_RATE_LIMIT_RPS = 6;
|
||||||
|
|
||||||
/** Max concurrent image downloads per disease */
|
/** Max concurrent image downloads per disease */
|
||||||
const CONCURRENT_DOWNLOADS = 2;
|
const CONCURRENT_DOWNLOADS = 50;
|
||||||
|
|
||||||
/** Minimum image size in bytes to accept */
|
/** Minimum image size in bytes to accept */
|
||||||
const MIN_IMAGE_SIZE = 10_000; // 10KB
|
const MIN_IMAGE_SIZE = 10_000; // 10KB
|
||||||
@@ -93,9 +93,10 @@ const HEALTHY_CLASS = "healthy";
|
|||||||
const SEEN_CACHE_FLUSH_INTERVAL = 20;
|
const SEEN_CACHE_FLUSH_INTERVAL = 20;
|
||||||
|
|
||||||
/** Max DDG pages to fetch per query.
|
/** Max DDG pages to fetch per query.
|
||||||
* Each page returns ~100 image results, so 3 pages × 3 queries = ~900 raw URLs
|
* Each page returns ~50 image results, so 5 pages × 3 queries = ~750 raw URLs
|
||||||
* before dedup — more than enough to find 200 unique, valid images. */
|
* before dedup. Pages beyond 3 yield progressively more novel URLs since
|
||||||
const MAX_DDG_PAGES = 3;
|
* the seen-URLs cache accumulates across runs. */
|
||||||
|
const MAX_DDG_PAGES = 5;
|
||||||
|
|
||||||
/** Healthy source queries limit */
|
/** Healthy source queries limit */
|
||||||
const MAX_HEALTHY_QUERIES = 20;
|
const MAX_HEALTHY_QUERIES = 20;
|
||||||
@@ -281,8 +282,33 @@ async function searchImagesDuckDuckGo(
|
|||||||
await sleep(5_000);
|
await sleep(5_000);
|
||||||
return searchImagesDuckDuckGo(query, vqd, page);
|
return searchImagesDuckDuckGo(query, vqd, page);
|
||||||
}
|
}
|
||||||
if (res.status === 403) return [];
|
if (res.status === 403) {
|
||||||
// Don't throw for transient errors — just return empty
|
// VQD token expired or DDG changed format — get a fresh token and retry
|
||||||
|
console.warn(` ⚠ DDG 403 on page ${page} — refreshing VQD token...`);
|
||||||
|
try {
|
||||||
|
const freshVqd = await getVqdToken(query);
|
||||||
|
await ddgLimiter.acquire();
|
||||||
|
const retryUrl = url.replace(/vqd=[^&]+/, `vqd=${freshVqd}`);
|
||||||
|
const retryRes = await fetch(retryUrl, {
|
||||||
|
headers: {
|
||||||
|
"User-Agent": UA,
|
||||||
|
Accept: "application/json",
|
||||||
|
Referer: `https://duckduckgo.com/?q=${encodeURIComponent(
|
||||||
|
query,
|
||||||
|
)}&t=h_&iax=images&ia=images`,
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(15_000),
|
||||||
|
});
|
||||||
|
if (retryRes.ok) {
|
||||||
|
const freshData = (await retryRes.json()) as { results: DuckDuckGoImageResult[] };
|
||||||
|
return freshData.results ?? [];
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Fresh token also failed — give up on this page
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
console.warn(` ⚠ DDG returned ${res.status} on page ${page}`);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -510,17 +536,19 @@ async function downloadBatch(
|
|||||||
): Promise<{ downloaded: number; failed: number; lastIndex: number }> {
|
): Promise<{ downloaded: number; failed: number; lastIndex: number }> {
|
||||||
let downloaded = 0;
|
let downloaded = 0;
|
||||||
let failed = 0;
|
let failed = 0;
|
||||||
let index = startIndex;
|
|
||||||
|
|
||||||
for (let i = 0; i < urls.length; i += CONCURRENT_DOWNLOADS) {
|
for (let i = 0; i < urls.length; i += CONCURRENT_DOWNLOADS) {
|
||||||
const chunk = urls.slice(i, i + CONCURRENT_DOWNLOADS);
|
const chunk = urls.slice(i, i + CONCURRENT_DOWNLOADS);
|
||||||
|
|
||||||
const results = await Promise.all(
|
const results = await Promise.all(
|
||||||
chunk.map(async (url) => {
|
chunk.map(async (url, chunkIdx) => {
|
||||||
const paddedIndex = String(index).padStart(4, "0");
|
// Compute index deterministically BEFORE the async download starts,
|
||||||
|
// so all parallel callbacks get a unique index (no race condition).
|
||||||
|
const fileIndex = startIndex + i + chunkIdx;
|
||||||
|
const paddedIndex = String(fileIndex).padStart(4, "0");
|
||||||
const destPath = resolve(classDir, `img_${paddedIndex}.jpg`);
|
const destPath = resolve(classDir, `img_${paddedIndex}.jpg`);
|
||||||
const success = await downloadImage(url, destPath);
|
const success = await downloadImage(url, destPath);
|
||||||
return { success, index: index++ };
|
return { success, index: fileIndex };
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -530,7 +558,7 @@ async function downloadBatch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { downloaded, failed, lastIndex: index };
|
return { downloaded, failed, lastIndex: startIndex + urls.length };
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── Query Building ─────────────────────────────────────────────────────────
|
// ─── Query Building ─────────────────────────────────────────────────────────
|
||||||
@@ -592,7 +620,10 @@ async function fillClass(
|
|||||||
indexOffset: number,
|
indexOffset: number,
|
||||||
): Promise<void> => {
|
): Promise<void> => {
|
||||||
const result = await collector();
|
const result = await collector();
|
||||||
if (result.urls.length === 0) return;
|
if (result.urls.length === 0) {
|
||||||
|
console.log(` ${label}: 0 URLs found`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
console.log(` ${label}: ${result.urls.length} new URLs`);
|
console.log(` ${label}: ${result.urls.length} new URLs`);
|
||||||
|
|
||||||
// Each source writes to its own non-overlapping range
|
// Each source writes to its own non-overlapping range
|
||||||
@@ -788,7 +819,13 @@ async function main() {
|
|||||||
|
|
||||||
const classDir = resolve(DATASET_DIR, d.id);
|
const classDir = resolve(DATASET_DIR, d.id);
|
||||||
const queries = buildSearchQueries(d.name, d.plantId);
|
const queries = buildSearchQueries(d.name, d.plantId);
|
||||||
const seen = new Set<string>(seenUrlsCache[d.id] ?? []);
|
|
||||||
|
// CRITICAL: Start with a FRESH empty set for within-run search dedup.
|
||||||
|
// DO NOT pre-load the persistent cache here — it has already consumed
|
||||||
|
// most of DDG's finite result set, causing 0 new URLs per run.
|
||||||
|
// The persistent cache is still saved after processing (capped below)
|
||||||
|
// but is NOT used to filter search results on subsequent runs.
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
` [${d.id}] have ${d.have}, need ${d.needed} more` + ` (${d.name} / ${d.plantId})`,
|
` [${d.id}] have ${d.have}, need ${d.needed} more` + ` (${d.name} / ${d.plantId})`,
|
||||||
@@ -796,8 +833,11 @@ async function main() {
|
|||||||
|
|
||||||
const gained = await fillClass(d.id, queries, d.needed, classDir, seen);
|
const gained = await fillClass(d.id, queries, d.needed, classDir, seen);
|
||||||
|
|
||||||
// Update seen-URLs cache for this disease
|
// Update seen-URLs cache for this disease — merge with existing
|
||||||
seenUrlsCache[d.id] = Array.from(seen);
|
// and cap at 500 per disease to prevent unbounded cache growth.
|
||||||
|
const existing = seenUrlsCache[d.id] ?? [];
|
||||||
|
const merged = [...new Set([...existing, ...Array.from(seen)])];
|
||||||
|
seenUrlsCache[d.id] = merged.slice(-500);
|
||||||
return gained;
|
return gained;
|
||||||
})(),
|
})(),
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -1,212 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* fix-classifications.ts — Fix misclassified diseases in the DB.
|
|
||||||
*
|
|
||||||
* Fixes:
|
|
||||||
* 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.)
|
|
||||||
* that are incorrectly tagged as "fungal"
|
|
||||||
* 2. Other suspicious patterns
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/fix-classifications.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { readFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
|
|
||||||
// Manually load .env.development
|
|
||||||
const envPath = resolve(__dirname, "../.env.development");
|
|
||||||
try {
|
|
||||||
const env = readFileSync(envPath, "utf-8");
|
|
||||||
for (const line of env.split("\n")) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (trimmed && !trimmed.startsWith("#")) {
|
|
||||||
const eqIdx = trimmed.indexOf("=");
|
|
||||||
if (eqIdx > 0) {
|
|
||||||
const key = trimmed.slice(0, eqIdx).trim();
|
|
||||||
const val = trimmed.slice(eqIdx + 1).trim();
|
|
||||||
if (!process.env[key]) process.env[key] = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { diseases } from "../src/lib/db/schema";
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
|
|
||||||
type AgentType = "fungal" | "bacterial" | "viral" | "environmental";
|
|
||||||
|
|
||||||
interface FixRule {
|
|
||||||
test: (name: string) => boolean;
|
|
||||||
correctAgent: AgentType;
|
|
||||||
reason: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
const FIX_RULES: FixRule[] = [
|
|
||||||
// Diseases explicitly named as "virus" or "viral"
|
|
||||||
{
|
|
||||||
test: (name) => /\b(virus|viral|viroid)\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Name explicitly indicates viral disease",
|
|
||||||
},
|
|
||||||
// Potexvirus, carlavirus, etc.
|
|
||||||
{
|
|
||||||
test: (name) =>
|
|
||||||
/\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test(
|
|
||||||
name,
|
|
||||||
),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Recognized virus genus in name",
|
|
||||||
},
|
|
||||||
// "Mosaic" diseases (typically viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\bmosaic\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Mosaic symptoms are typically caused by viruses",
|
|
||||||
},
|
|
||||||
// "Mottle" diseases (typically viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\bmottle\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Mottle symptoms are typically caused by viruses",
|
|
||||||
},
|
|
||||||
// "Ringspot" diseases (typically viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\bringspot\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Ringspot symptoms are typically caused by viruses",
|
|
||||||
},
|
|
||||||
// "Leaf curl" (many are viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Leaf curl/roll diseases are often viral",
|
|
||||||
},
|
|
||||||
// "Rosette" (often viral or phytoplasma)
|
|
||||||
{
|
|
||||||
test: (name) => /\brosette\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Rosette diseases are typically viral or phytoplasma",
|
|
||||||
},
|
|
||||||
// "Yellows" (often phytoplasma/viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Yellows diseases are typically phytoplasma or viral",
|
|
||||||
},
|
|
||||||
// "Stunt" / "Dwarf" (often viral)
|
|
||||||
{
|
|
||||||
test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name),
|
|
||||||
correctAgent: "viral",
|
|
||||||
reason: "Stunting/dwarfing diseases are often viral",
|
|
||||||
},
|
|
||||||
// Explicit bacterial in name
|
|
||||||
{
|
|
||||||
test: (name) =>
|
|
||||||
/\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test(
|
|
||||||
name,
|
|
||||||
),
|
|
||||||
correctAgent: "bacterial",
|
|
||||||
reason: "Name indicates bacterial disease",
|
|
||||||
},
|
|
||||||
// Environmental/abiotic indicators
|
|
||||||
{
|
|
||||||
test: (name) =>
|
|
||||||
/\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test(
|
|
||||||
name,
|
|
||||||
),
|
|
||||||
correctAgent: "environmental",
|
|
||||||
reason: "Name indicates abiotic/environmental cause",
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("🔍 Fixing disease classifications\n");
|
|
||||||
const db = getDb();
|
|
||||||
const allDiseases = await db
|
|
||||||
.select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType })
|
|
||||||
.from(diseases)
|
|
||||||
.all();
|
|
||||||
console.log(`📋 ${allDiseases.length} total diseases\n`);
|
|
||||||
|
|
||||||
const rawClient = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = [];
|
|
||||||
|
|
||||||
for (const d of allDiseases) {
|
|
||||||
for (const rule of FIX_RULES) {
|
|
||||||
if (rule.test(d.name)) {
|
|
||||||
if (d.causalAgentType !== rule.correctAgent) {
|
|
||||||
updates.push({
|
|
||||||
id: d.id,
|
|
||||||
newAgent: rule.correctAgent,
|
|
||||||
rule,
|
|
||||||
oldAgent: d.causalAgentType,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
break; // First matching rule wins
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`Found ${updates.length} diseases needing reclassification:\n`);
|
|
||||||
|
|
||||||
// Group by correction type
|
|
||||||
const grouped: Record<string, { from: string; to: string; items: string[] }> = {};
|
|
||||||
for (const u of updates) {
|
|
||||||
const key = `${u.oldAgent}→${u.newAgent}`;
|
|
||||||
if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] };
|
|
||||||
grouped[key].items.push(` ${u.id}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const [, g] of Object.entries(grouped)) {
|
|
||||||
console.log(`${g.from} → ${g.to} (${g.items.length} diseases):`);
|
|
||||||
g.items.slice(0, 10).forEach((l) => console.log(l));
|
|
||||||
if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`);
|
|
||||||
console.log();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply updates
|
|
||||||
if (updates.length === 0) {
|
|
||||||
console.log("✅ No corrections needed");
|
|
||||||
} else {
|
|
||||||
console.log(`Applying ${updates.length} corrections...\n`);
|
|
||||||
|
|
||||||
// Batch update in groups of 50
|
|
||||||
for (let i = 0; i < updates.length; i += 50) {
|
|
||||||
const batch = updates.slice(i, i + 50);
|
|
||||||
await rawClient.batch(
|
|
||||||
batch.map((u) => ({
|
|
||||||
sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?",
|
|
||||||
args: [u.newAgent, u.id],
|
|
||||||
})),
|
|
||||||
"write",
|
|
||||||
);
|
|
||||||
process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\n✅ ${updates.length} diseases reclassified`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print summary stats
|
|
||||||
const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all();
|
|
||||||
const counts: Record<string, number> = {};
|
|
||||||
after.forEach((d) => {
|
|
||||||
counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1;
|
|
||||||
});
|
|
||||||
console.log("\n📊 Updated distribution:");
|
|
||||||
for (const [type, count] of Object.entries(counts).sort()) {
|
|
||||||
console.log(` ${type}: ${count}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
rawClient.close();
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("\n❌", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,385 +0,0 @@
|
|||||||
/**
|
|
||||||
* generate-flagged-report.ts
|
|
||||||
*
|
|
||||||
* Reads all flagged content from the database and generates a pretty
|
|
||||||
* markdown report organized by content type. The report includes:
|
|
||||||
* - Summary table with counts per content type
|
|
||||||
* - Plant images flagged for review
|
|
||||||
* - Disease images flagged for review
|
|
||||||
* - Disease symptoms flagged for review
|
|
||||||
* - Disease causes flagged for review
|
|
||||||
* - Disease treatment steps flagged for review
|
|
||||||
* - Disease prevention tips flagged for review
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* npx tsx scripts/generate-flagged-report.ts [--min-flags N] [--output path/to/report.md]
|
|
||||||
*
|
|
||||||
* Options:
|
|
||||||
* --min-flags Minimum flag count to include (default: 1)
|
|
||||||
* --output Output path (default: scripts/.flagged-content-review-needed.md)
|
|
||||||
*/
|
|
||||||
|
|
||||||
import dotenv from "dotenv";
|
|
||||||
import path from "node:path";
|
|
||||||
|
|
||||||
// Load DB config from .env.development (or .env.production if NODE_ENV=production)
|
|
||||||
const envFile =
|
|
||||||
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
|
|
||||||
dotenv.config({ path: path.resolve(__dirname, envFile) });
|
|
||||||
import { createClient } from "@libsql/client";
|
|
||||||
import fs from "node:fs";
|
|
||||||
|
|
||||||
// ─── Config ─────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const MIN_FLAGS = parseInt(
|
|
||||||
process.argv.find((a) => a.startsWith("--min-flags="))?.split("=")[1] ?? "1",
|
|
||||||
10,
|
|
||||||
);
|
|
||||||
const OUTPUT_PATH =
|
|
||||||
process.argv.find((a) => a.startsWith("--output="))?.split("=")[1] ??
|
|
||||||
path.join(__dirname, ".flagged-content-review-needed.md");
|
|
||||||
|
|
||||||
// ─── DB Connection ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const db = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
interface FlaggedRow {
|
|
||||||
id: string;
|
|
||||||
content_type: string;
|
|
||||||
content_id: string;
|
|
||||||
field_name: string;
|
|
||||||
notes: string;
|
|
||||||
flag_count: number;
|
|
||||||
created_at: string;
|
|
||||||
updated_at: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface PlantRow {
|
|
||||||
id: string;
|
|
||||||
common_name: string;
|
|
||||||
scientific_name: string;
|
|
||||||
family: string;
|
|
||||||
image_url: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface DiseaseRow {
|
|
||||||
id: string;
|
|
||||||
name: string;
|
|
||||||
scientific_name: string;
|
|
||||||
plant_id: string;
|
|
||||||
image_url: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const CONTENT_TYPE_LABELS: Record<string, { emoji: string; title: string; description: string }> = {
|
|
||||||
plant_image: {
|
|
||||||
emoji: "🪴",
|
|
||||||
title: "Plant Images Flagged for Review",
|
|
||||||
description: "Plant images that users have flagged as potentially incorrect or low quality.",
|
|
||||||
},
|
|
||||||
disease_image: {
|
|
||||||
emoji: "📸",
|
|
||||||
title: "Disease Images Flagged for Review",
|
|
||||||
description:
|
|
||||||
"Disease symptom images that users have flagged as potentially incorrect or misleading.",
|
|
||||||
},
|
|
||||||
disease_description: {
|
|
||||||
emoji: "📝",
|
|
||||||
title: "Disease Descriptions Flagged for Review",
|
|
||||||
description: "Disease descriptions that users have flagged as potentially inaccurate.",
|
|
||||||
},
|
|
||||||
disease_symptoms: {
|
|
||||||
emoji: "⚠️",
|
|
||||||
title: "Disease Symptoms Flagged for Review",
|
|
||||||
description: "Symptom descriptions that users have flagged as potentially inaccurate.",
|
|
||||||
},
|
|
||||||
disease_causes: {
|
|
||||||
emoji: "🔍",
|
|
||||||
title: "Disease Causes Flagged for Review",
|
|
||||||
description:
|
|
||||||
"Causes and contributing factors that users have flagged as potentially incorrect.",
|
|
||||||
},
|
|
||||||
disease_treatment: {
|
|
||||||
emoji: "💊",
|
|
||||||
title: "Disease Treatment Steps Flagged for Review",
|
|
||||||
description:
|
|
||||||
"Treatment instructions that users have flagged as potentially incorrect or harmful.",
|
|
||||||
},
|
|
||||||
disease_prevention: {
|
|
||||||
emoji: "🛡️",
|
|
||||||
title: "Disease Prevention Tips Flagged for Review",
|
|
||||||
description: "Prevention tips that users have flagged as potentially incorrect or misleading.",
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
function formatDate(iso: string): string {
|
|
||||||
const d = new Date(iso);
|
|
||||||
return d.toLocaleDateString("en-US", {
|
|
||||||
year: "numeric",
|
|
||||||
month: "short",
|
|
||||||
day: "numeric",
|
|
||||||
hour: "2-digit",
|
|
||||||
minute: "2-digit",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Main ───────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log(`📋 Generating flagged content report (min flags: ${MIN_FLAGS})...`);
|
|
||||||
|
|
||||||
// Fetch flagged content
|
|
||||||
const flaggedRs = await db.execute({
|
|
||||||
sql: "SELECT * FROM flagged_content WHERE flag_count >= ? ORDER BY content_type, flag_count DESC, updated_at DESC",
|
|
||||||
args: [MIN_FLAGS],
|
|
||||||
});
|
|
||||||
const flaggedRows = flaggedRs.rows as unknown as FlaggedRow[];
|
|
||||||
|
|
||||||
if (flaggedRows.length === 0) {
|
|
||||||
const report = [
|
|
||||||
"# 🚩 Flagged Content Review — Nothing to Review",
|
|
||||||
"",
|
|
||||||
`Generated: ${new Date().toISOString()}`,
|
|
||||||
"",
|
|
||||||
"**No content has been flagged for review yet.**",
|
|
||||||
"",
|
|
||||||
"Flagged items will appear here once users flag content for manual review.",
|
|
||||||
"",
|
|
||||||
"---",
|
|
||||||
"",
|
|
||||||
`_Report generated with min-flags=${MIN_FLAGS}_`,
|
|
||||||
"",
|
|
||||||
].join("\n");
|
|
||||||
|
|
||||||
fs.writeFileSync(OUTPUT_PATH, report, "utf-8");
|
|
||||||
console.log(`✅ Report written to ${OUTPUT_PATH} (no flagged items)`);
|
|
||||||
db.close();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect all unique plant and disease IDs
|
|
||||||
const plantIds = new Set<string>();
|
|
||||||
const diseaseIds = new Set<string>();
|
|
||||||
|
|
||||||
for (const row of flaggedRows) {
|
|
||||||
if (row.content_type === "plant_image") {
|
|
||||||
plantIds.add(row.content_id);
|
|
||||||
} else {
|
|
||||||
diseaseIds.add(row.content_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch plant names
|
|
||||||
const plantMap = new Map<string, PlantRow>();
|
|
||||||
if (plantIds.size > 0) {
|
|
||||||
const plantRs = await db.execute({
|
|
||||||
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${[...plantIds].map(() => "?").join(",")})`,
|
|
||||||
args: [...plantIds],
|
|
||||||
});
|
|
||||||
for (const row of plantRs.rows as unknown as PlantRow[]) {
|
|
||||||
plantMap.set(row.id, row);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch disease names + their plant references
|
|
||||||
const diseaseMap = new Map<string, DiseaseRow>();
|
|
||||||
if (diseaseIds.size > 0) {
|
|
||||||
const diseaseRs = await db.execute({
|
|
||||||
sql: `SELECT id, name, scientific_name, plant_id, image_url FROM diseases WHERE id IN (${[...diseaseIds].map(() => "?").join(",")})`,
|
|
||||||
args: [...diseaseIds],
|
|
||||||
});
|
|
||||||
for (const row of diseaseRs.rows as unknown as DiseaseRow[]) {
|
|
||||||
diseaseMap.set(row.id, row);
|
|
||||||
if (!plantMap.has(row.plant_id)) {
|
|
||||||
plantIds.add(row.plant_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fetch any missing plant references for diseases
|
|
||||||
if (plantIds.size > 0) {
|
|
||||||
const missingPlantIds = [...plantIds].filter((id) => !plantMap.has(id));
|
|
||||||
if (missingPlantIds.length > 0) {
|
|
||||||
const plantRs = await db.execute({
|
|
||||||
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${missingPlantIds.map(() => "?").join(",")})`,
|
|
||||||
args: missingPlantIds,
|
|
||||||
});
|
|
||||||
for (const row of plantRs.rows as unknown as PlantRow[]) {
|
|
||||||
plantMap.set(row.id, row);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Group by content type
|
|
||||||
const groups: Record<string, FlaggedRow[]> = {};
|
|
||||||
for (const row of flaggedRows) {
|
|
||||||
if (!groups[row.content_type]) groups[row.content_type] = [];
|
|
||||||
groups[row.content_type].push(row);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Build Report ────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const lines: string[] = [];
|
|
||||||
const totalFlags = flaggedRows.reduce((sum, r) => sum + r.flag_count, 0);
|
|
||||||
|
|
||||||
lines.push("# 🚩 Flagged Content — Manual Review Needed");
|
|
||||||
lines.push("");
|
|
||||||
lines.push(`Generated: ${new Date().toISOString()}`);
|
|
||||||
lines.push("");
|
|
||||||
lines.push(
|
|
||||||
flaggedRows.length === 1
|
|
||||||
? `**${flaggedRows.length} item** flagged for review (${totalFlags} total flags).`
|
|
||||||
: `**${flaggedRows.length} items** flagged for review (${totalFlags} total flags).`,
|
|
||||||
);
|
|
||||||
lines.push("");
|
|
||||||
lines.push("Most data in this knowledge base is not reviewed by humans. ");
|
|
||||||
lines.push("Items listed below have been flagged by users for manual review. ");
|
|
||||||
lines.push("Please review each item and take appropriate action.");
|
|
||||||
lines.push("");
|
|
||||||
|
|
||||||
// Summary table
|
|
||||||
lines.push("## 📊 Summary");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("| Content Type | Count | Total Flags |");
|
|
||||||
lines.push("|---|---|---|");
|
|
||||||
const orderedTypes = [
|
|
||||||
"plant_image",
|
|
||||||
"disease_image",
|
|
||||||
"disease_description",
|
|
||||||
"disease_symptoms",
|
|
||||||
"disease_causes",
|
|
||||||
"disease_treatment",
|
|
||||||
"disease_prevention",
|
|
||||||
];
|
|
||||||
for (const type of orderedTypes) {
|
|
||||||
const items = groups[type];
|
|
||||||
if (!items) continue;
|
|
||||||
const label = CONTENT_TYPE_LABELS[type]?.title ?? type;
|
|
||||||
const count = items.length;
|
|
||||||
const sumFlags = items.reduce((s, r) => s + r.flag_count, 0);
|
|
||||||
lines.push(`| ${label} | ${count} | ${sumFlags} |`);
|
|
||||||
}
|
|
||||||
lines.push(`| **Total** | **${flaggedRows.length}** | **${totalFlags}** |`);
|
|
||||||
lines.push("");
|
|
||||||
lines.push("---");
|
|
||||||
lines.push("");
|
|
||||||
|
|
||||||
// Detail sections per content type
|
|
||||||
for (const type of orderedTypes) {
|
|
||||||
const items = groups[type];
|
|
||||||
if (!items) continue;
|
|
||||||
|
|
||||||
const config = CONTENT_TYPE_LABELS[type];
|
|
||||||
lines.push(`## ${config?.emoji ?? "📋"} ${config?.title ?? type}`);
|
|
||||||
lines.push("");
|
|
||||||
lines.push(config?.description ?? "");
|
|
||||||
lines.push("");
|
|
||||||
lines.push(`**${items.length} item${items.length === 1 ? "" : "s"} flagged**`);
|
|
||||||
lines.push("");
|
|
||||||
|
|
||||||
for (const item of items) {
|
|
||||||
// Build label
|
|
||||||
let label = item.content_id;
|
|
||||||
let plantLabel = "";
|
|
||||||
|
|
||||||
if (type === "plant_image") {
|
|
||||||
const plant = plantMap.get(item.content_id);
|
|
||||||
if (plant) {
|
|
||||||
label = `${plant.common_name} (_${plant.scientific_name}_)`;
|
|
||||||
plantLabel = `${plant.family} family`;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const disease = diseaseMap.get(item.content_id);
|
|
||||||
if (disease) {
|
|
||||||
const plant = plantMap.get(disease.plant_id);
|
|
||||||
const plantName = plant?.common_name ?? disease.plant_id;
|
|
||||||
label = `${disease.name} (_${disease.scientific_name}_) on **${plantName}**`;
|
|
||||||
plantLabel = `Affects: ${plantName}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const flagWord = item.flag_count === 1 ? "flag" : "flags";
|
|
||||||
const firstFlagged = formatDate(item.created_at);
|
|
||||||
const lastFlagged = formatDate(item.updated_at);
|
|
||||||
|
|
||||||
lines.push(`### ${label}`);
|
|
||||||
lines.push("");
|
|
||||||
lines.push(`- **Field:** \`${item.field_name}\``);
|
|
||||||
lines.push(`- **Flags:** ${item.flag_count} ${flagWord}`);
|
|
||||||
lines.push(`- **First flagged:** ${firstFlagged}`);
|
|
||||||
lines.push(`- **Last flagged:** ${lastFlagged}`);
|
|
||||||
if (plantLabel) {
|
|
||||||
lines.push(`- **${plantLabel}**`);
|
|
||||||
}
|
|
||||||
if (item.notes) {
|
|
||||||
lines.push(`- **User notes:** ${item.notes}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show the content data if we can fetch it
|
|
||||||
if (type === "plant_image") {
|
|
||||||
const plant = plantMap.get(item.content_id);
|
|
||||||
if (plant?.image_url) {
|
|
||||||
lines.push("");
|
|
||||||
lines.push(` `);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const disease = diseaseMap.get(item.content_id);
|
|
||||||
if (type === "disease_image" && disease?.image_url) {
|
|
||||||
lines.push("");
|
|
||||||
lines.push(` `);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lines.push("");
|
|
||||||
}
|
|
||||||
|
|
||||||
lines.push("---");
|
|
||||||
lines.push("");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Footer
|
|
||||||
lines.push("## ℹ️ How This Works");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("1. **Users** click the 🚩 Flag button on any content they believe needs review.");
|
|
||||||
lines.push("2. **The system** stores the flag in the database with a counter.");
|
|
||||||
lines.push(
|
|
||||||
"3. **This report** is generated by querying the database and formatting the results.",
|
|
||||||
);
|
|
||||||
lines.push("4. **Reviewers** go through each item and take action (fix, update, or dismiss).");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("### Taking Action");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("After reviewing an item, you can clear its flags by running:");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("```sql");
|
|
||||||
lines.push("DELETE FROM flagged_content WHERE id = '<item-id>';");
|
|
||||||
lines.push("```");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("Or clear all flags for a specific item by running:");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("```sql");
|
|
||||||
lines.push(
|
|
||||||
"UPDATE flagged_content SET flag_count = 0 WHERE content_id = '<id>' AND field_name = '<field>';",
|
|
||||||
);
|
|
||||||
lines.push("```");
|
|
||||||
lines.push("");
|
|
||||||
lines.push("---");
|
|
||||||
lines.push("");
|
|
||||||
lines.push(`_Report generated with min-flags=${MIN_FLAGS}_`);
|
|
||||||
|
|
||||||
// Write report
|
|
||||||
fs.writeFileSync(OUTPUT_PATH, lines.join("\n"), "utf-8");
|
|
||||||
console.log(`✅ Report written to ${OUTPUT_PATH}`);
|
|
||||||
console.log(` ${flaggedRows.length} items, ${totalFlags} total flags`);
|
|
||||||
db.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("❌ Failed to generate report:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,254 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* Full Knowledge Base Generator
|
|
||||||
*
|
|
||||||
* Combines the Wikipedia-scraped data with template-based generation
|
|
||||||
* to produce 9,300+ verified disease entries.
|
|
||||||
*
|
|
||||||
* Strategy:
|
|
||||||
* 1. Plants with Wikipedia data → use that data (already in DB)
|
|
||||||
* 2. Plants without Wikipedia data → generate from family + generic templates
|
|
||||||
* 3. All plants get generic cross-family diseases added
|
|
||||||
* 4. Target: ~30 diseases per plant → ~9,300 total
|
|
||||||
*
|
|
||||||
* Usage: cd apps/web && npx tsx scripts/generate-full-kb.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
import "dotenv/config";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { diseases, plants } from "../src/lib/db/schema";
|
|
||||||
import PLANTS from "./plant-list";
|
|
||||||
import { GENERIC_TEMPLATES, getTemplatesForFamily, slugify } from "./disease-templates";
|
|
||||||
import type { CausalAgentType, Prevalence, Severity } from "../src/lib/types";
|
|
||||||
|
|
||||||
interface DiseaseEntry {
|
|
||||||
id: string;
|
|
||||||
plantId: string;
|
|
||||||
name: string;
|
|
||||||
scientificName: string;
|
|
||||||
causalAgentType: CausalAgentType;
|
|
||||||
description: string;
|
|
||||||
symptoms: string[];
|
|
||||||
causes: string[];
|
|
||||||
treatment: string[];
|
|
||||||
prevention: string[];
|
|
||||||
lookalikeIds: string[];
|
|
||||||
severity: Severity;
|
|
||||||
prevalence: Prevalence;
|
|
||||||
sourceUrl: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function makeDesc(name: string, sci: string, plant: string, type: string): string {
|
|
||||||
return `${name} is a ${type} disease affecting ${plant}. Caused by ${sci || "a plant pathogen"}, this disease can cause significant damage under favorable environmental conditions. Early detection and integrated management are essential for controlling spread and minimizing crop losses.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
console.log("🌱 Full Knowledge Base Generator\n");
|
|
||||||
const db = getDb();
|
|
||||||
|
|
||||||
// Step 1: Get existing plants and diseases in the database
|
|
||||||
type DbPlant = { id: string; name: string; family: string; cat: string; care: string };
|
|
||||||
const existingPlants = new Map<string, DbPlant>();
|
|
||||||
const existingPlantRow = await db.select().from(plants);
|
|
||||||
for (const p of existingPlantRow) {
|
|
||||||
existingPlants.set(p.id, {
|
|
||||||
id: p.id,
|
|
||||||
name: p.commonName,
|
|
||||||
family: p.family,
|
|
||||||
cat: p.category,
|
|
||||||
care: p.careSummary,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
console.log(`📊 Database has ${existingPlants.size} existing plants`);
|
|
||||||
|
|
||||||
// Step 2: Get existing disease IDs to avoid duplicates
|
|
||||||
const existingDiseaseIds = new Set<string>();
|
|
||||||
const existingDiseaseRow = await db.select({ id: diseases.id }).from(diseases);
|
|
||||||
for (const d of existingDiseaseRow) {
|
|
||||||
existingDiseaseIds.add(d.id);
|
|
||||||
}
|
|
||||||
console.log(`📊 Database has ${existingDiseaseIds.size} existing diseases\n`);
|
|
||||||
|
|
||||||
// Step 3: Generate diseases for ALL plants (both existing and new)
|
|
||||||
const allPlants = new Map<string, (typeof PLANTS)[0]>();
|
|
||||||
for (const p of PLANTS) allPlants.set(p.slug, p);
|
|
||||||
|
|
||||||
const toInsert: DiseaseEntry[] = [];
|
|
||||||
let plantsWithEnough = 0;
|
|
||||||
let plantsNeedingFill = 0;
|
|
||||||
|
|
||||||
for (const [slug, plant] of allPlants) {
|
|
||||||
const existing = existingPlants.get(slug);
|
|
||||||
const existingId = existing?.id;
|
|
||||||
|
|
||||||
// Count existing diseases for this plant (if in DB)
|
|
||||||
let existingCount = 0;
|
|
||||||
if (existingId && existingDiseaseIds.size > 0) {
|
|
||||||
// We'll approximate: check if any existing IDs start with this slug
|
|
||||||
for (const did of existingDiseaseIds) {
|
|
||||||
if (did.startsWith(slug + "-")) existingCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine how many diseases we need for this plant
|
|
||||||
const targetMin = 15; // minimum diseases per plant
|
|
||||||
|
|
||||||
// Get family-specific templates
|
|
||||||
const familyTemplates = getTemplatesForFamily(plant.fam);
|
|
||||||
|
|
||||||
// All available templates for this plant (family + generic)
|
|
||||||
const availableTemplates = [...familyTemplates, ...GENERIC_TEMPLATES];
|
|
||||||
|
|
||||||
// Generate a base set of disease IDs and track which we already have in DB
|
|
||||||
const alreadyGenerated = new Set<string>();
|
|
||||||
|
|
||||||
// Add family-specific diseases first
|
|
||||||
const plantDiseases: DiseaseEntry[] = [];
|
|
||||||
|
|
||||||
for (const tmpl of availableTemplates) {
|
|
||||||
const diseaseId = `${slug}-${slugify(tmpl.name)}`;
|
|
||||||
|
|
||||||
// Skip if existing in DB (from Wikipedia)
|
|
||||||
if (existingDiseaseIds.has(diseaseId)) {
|
|
||||||
alreadyGenerated.add(diseaseId);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
plantDiseases.push({
|
|
||||||
id: diseaseId,
|
|
||||||
plantId: slug,
|
|
||||||
name: tmpl.name,
|
|
||||||
scientificName: tmpl.sciName,
|
|
||||||
causalAgentType: tmpl.type,
|
|
||||||
description: makeDesc(tmpl.name, tmpl.sciName, plant.name, tmpl.type),
|
|
||||||
symptoms: tmpl.symptoms,
|
|
||||||
causes: tmpl.causes,
|
|
||||||
treatment: tmpl.treatment,
|
|
||||||
prevention: tmpl.prevention,
|
|
||||||
lookalikeIds: [],
|
|
||||||
severity: tmpl.severity,
|
|
||||||
prevalence: tmpl.severity === "critical" ? "uncommon" : "common",
|
|
||||||
sourceUrl: "https://pddc.wisc.edu/ (UW-Madison PDDC extension factsheets)",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we have enough
|
|
||||||
const totalAvailable = plantDiseases.length;
|
|
||||||
const totalExisting = existingCount;
|
|
||||||
const totalAfterInsert = totalExisting + totalAvailable;
|
|
||||||
|
|
||||||
if (totalAfterInsert >= targetMin) {
|
|
||||||
toInsert.push(...plantDiseases);
|
|
||||||
plantsWithEnough++;
|
|
||||||
} else {
|
|
||||||
// This plant doesn't have enough sources — skip for now
|
|
||||||
// (We'll still get some, just not the full 30)
|
|
||||||
toInsert.push(...plantDiseases);
|
|
||||||
plantsNeedingFill++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4: Link lookalikes (same plant, same type)
|
|
||||||
console.log("🔗 Linking lookalike diseases...");
|
|
||||||
const byPlant = new Map<string, DiseaseEntry[]>();
|
|
||||||
for (const d of toInsert) {
|
|
||||||
const list = byPlant.get(d.plantId) || [];
|
|
||||||
list.push(d);
|
|
||||||
byPlant.set(d.plantId, list);
|
|
||||||
}
|
|
||||||
for (const [, di] of byPlant) {
|
|
||||||
for (const d of di) {
|
|
||||||
if (d.severity === "low") continue;
|
|
||||||
const sameType = di.filter((o) => o.causalAgentType === d.causalAgentType && o.id !== d.id);
|
|
||||||
d.lookalikeIds = sameType.slice(0, 3).map((o) => o.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\n📊 Generated ${toInsert.length} new disease entries`);
|
|
||||||
console.log(`📊 Plants with enough diseases: ${plantsWithEnough}`);
|
|
||||||
console.log(`📊 Plants needing more sources: ${plantsNeedingFill}`);
|
|
||||||
|
|
||||||
// Step 5: Insert plants that don't exist yet
|
|
||||||
let newPlantsCount = 0;
|
|
||||||
for (const [slug, p] of allPlants) {
|
|
||||||
if (!existingPlants.has(slug)) {
|
|
||||||
await db
|
|
||||||
.insert(plants)
|
|
||||||
.values({
|
|
||||||
id: slug,
|
|
||||||
commonName: p.name,
|
|
||||||
scientificName: p.sci,
|
|
||||||
family: p.fam,
|
|
||||||
category: p.cat,
|
|
||||||
careSummary: p.care,
|
|
||||||
imageUrl: "",
|
|
||||||
})
|
|
||||||
.onConflictDoNothing();
|
|
||||||
newPlantsCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(`\n🌱 Added ${newPlantsCount} new plants`);
|
|
||||||
|
|
||||||
// Step 6: Bulk insert using raw client
|
|
||||||
if (toInsert.length > 0) {
|
|
||||||
console.log(`\n💾 Inserting ${toInsert.length} diseases via batch...`);
|
|
||||||
const { createClient } = await import("@libsql/client");
|
|
||||||
const rawClient = createClient({
|
|
||||||
url: process.env.DATABASE_URL!,
|
|
||||||
authToken: process.env.DATABASE_TOKEN!,
|
|
||||||
});
|
|
||||||
|
|
||||||
const BATCH = 100;
|
|
||||||
for (let i = 0; i < toInsert.length; i += BATCH) {
|
|
||||||
const chunk = toInsert.slice(i, i + BATCH);
|
|
||||||
const stmts = chunk.map((d) => ({
|
|
||||||
sql: `INSERT OR IGNORE INTO diseases (id, plant_id, name, scientific_name, causal_agent_type, description, symptoms, causes, treatment, prevention, lookalike_ids, severity, prevalence, source_url) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
||||||
args: [
|
|
||||||
d.id,
|
|
||||||
d.plantId,
|
|
||||||
d.name,
|
|
||||||
d.scientificName,
|
|
||||||
d.causalAgentType,
|
|
||||||
d.description,
|
|
||||||
JSON.stringify(d.symptoms),
|
|
||||||
JSON.stringify(d.causes),
|
|
||||||
JSON.stringify(d.treatment),
|
|
||||||
JSON.stringify(d.prevention),
|
|
||||||
JSON.stringify(d.lookalikeIds),
|
|
||||||
d.severity,
|
|
||||||
d.prevalence ?? "uncommon",
|
|
||||||
d.sourceUrl,
|
|
||||||
],
|
|
||||||
}));
|
|
||||||
await rawClient.batch(stmts, "write");
|
|
||||||
process.stdout.write(` ${Math.min(i + BATCH, toInsert.length)}/${toInsert.length}\n`);
|
|
||||||
}
|
|
||||||
rawClient.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 7: Final stats
|
|
||||||
const [pc] = await db.select({ c: sql<number>`COUNT(*)` }).from(plants);
|
|
||||||
const [dc] = await db.select({ c: sql<number>`COUNT(*)` }).from(diseases);
|
|
||||||
const byType = await db
|
|
||||||
.select({
|
|
||||||
type: diseases.causalAgentType,
|
|
||||||
count: sql<number>`COUNT(*)`,
|
|
||||||
})
|
|
||||||
.from(diseases)
|
|
||||||
.groupBy(diseases.causalAgentType);
|
|
||||||
|
|
||||||
console.log(`\n✅ FINAL DATABASE STATE`);
|
|
||||||
console.log(` ${pc.c} plants`);
|
|
||||||
console.log(` ${dc.c} diseases`);
|
|
||||||
for (const r of byType) {
|
|
||||||
console.log(` ${String(r.type).padEnd(16)} ${r.count}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("❌ Fatal:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,71 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* Retry Wikipedia pages that got rate-limited
|
|
||||||
*
|
|
||||||
* Uses longer delays (5s) for pages that previously got 429.
|
|
||||||
*/
|
|
||||||
import "dotenv/config";
|
|
||||||
import { closeDb } from "../src/lib/db/index";
|
|
||||||
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
||||||
import { resolve, dirname } from "path";
|
|
||||||
import { fileURLToPath } from "url";
|
|
||||||
|
|
||||||
const __filedir = dirname(fileURLToPath(import.meta.url));
|
|
||||||
function cacheGet(k: string): string | null {
|
|
||||||
const p = resolve(__filedir, ".scraper-cache", encodeURIComponent(k) + ".json");
|
|
||||||
return existsSync(p) ? readFileSync(p, "utf-8") : null;
|
|
||||||
}
|
|
||||||
function cacheSet(k: string, v: string) {
|
|
||||||
const d = resolve(__filedir, ".scraper-cache");
|
|
||||||
if (!existsSync(d)) mkdirSync(d, { recursive: true });
|
|
||||||
writeFileSync(resolve(d, encodeURIComponent(k) + ".json"), v, "utf-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
const PAGES_TO_RETRY = [
|
|
||||||
"List_of_cranberry_diseases",
|
|
||||||
"List_of_cucurbit_diseases",
|
|
||||||
"List_of_grape_diseases",
|
|
||||||
"List_of_hops_diseases",
|
|
||||||
"List_of_rice_diseases",
|
|
||||||
"List_of_rose_diseases",
|
|
||||||
"List_of_sorghum_diseases",
|
|
||||||
"List_of_soybean_diseases",
|
|
||||||
"List_of_spinach_diseases",
|
|
||||||
"List_of_strawberry_diseases",
|
|
||||||
"List_of_sugarcane_diseases",
|
|
||||||
"List_of_sunflower_diseases",
|
|
||||||
"List_of_sweet_potato_diseases",
|
|
||||||
];
|
|
||||||
|
|
||||||
async function fetchWT(page: string): Promise<string> {
|
|
||||||
const key = `wt-${page}`;
|
|
||||||
const c = cacheGet(key);
|
|
||||||
if (c) return c;
|
|
||||||
const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(page)}&prop=wikitext&format=json&formatversion=2`;
|
|
||||||
const r = await fetch(url, { headers: { "User-Agent": "PlantDiseaseKB/1.0 (research)" } });
|
|
||||||
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
|
||||||
const d = (await r.json()) as { parse: { wikitext: string }; error?: { info: string } };
|
|
||||||
if (d.error) throw new Error(d.error.info);
|
|
||||||
cacheSet(key, d.parse.wikitext);
|
|
||||||
return d.parse.wikitext;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
let success = 0;
|
|
||||||
for (const page of PAGES_TO_RETRY) {
|
|
||||||
process.stdout.write(`📋 ${page}... `);
|
|
||||||
try {
|
|
||||||
await new Promise((r) => setTimeout(r, 5000 + Math.random() * 2000));
|
|
||||||
const wt = await fetchWT(page);
|
|
||||||
console.log(`✅ ${wt.length} bytes`);
|
|
||||||
success++;
|
|
||||||
} catch (e) {
|
|
||||||
console.log(`❌ ${e instanceof Error ? e.message : e}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
console.log(`\nDone. ${success}/${PAGES_TO_RETRY.length} pages fetched`);
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch(console.error);
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,91 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* Seed Existing JSON Data into Turso
|
|
||||||
*
|
|
||||||
* Reads the existing plants.json and diseases.json files and inserts them
|
|
||||||
* into the Turso database via Drizzle ORM.
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* cd apps/web && npx tsx scripts/seed-existing.ts
|
|
||||||
*
|
|
||||||
* Environment: DATABASE_URL and DATABASE_TOKEN from .env.development
|
|
||||||
*/
|
|
||||||
|
|
||||||
import "dotenv/config";
|
|
||||||
import { readFileSync } from "fs";
|
|
||||||
import { resolve } from "path";
|
|
||||||
import { sql } from "drizzle-orm";
|
|
||||||
import { getDb, closeDb } from "../src/lib/db/index";
|
|
||||||
import { plants, diseases } from "../src/lib/db/schema";
|
|
||||||
import type { Plant, Disease } from "../src/lib/types";
|
|
||||||
|
|
||||||
// ─── Load JSON data ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const __dirname = resolve(new URL(".", import.meta.url).pathname);
|
|
||||||
|
|
||||||
const plantsPath = resolve(__dirname, "../src/data/plants.json");
|
|
||||||
const diseasesPath = resolve(__dirname, "../src/data/diseases.json");
|
|
||||||
|
|
||||||
const rawPlants = JSON.parse(readFileSync(plantsPath, "utf-8")) as Plant[];
|
|
||||||
const rawDiseases = JSON.parse(readFileSync(diseasesPath, "utf-8")) as Disease[];
|
|
||||||
|
|
||||||
// ─── Seed ────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const db = getDb();
|
|
||||||
|
|
||||||
console.log(`Seeding ${rawPlants.length} plants...`);
|
|
||||||
for (const p of rawPlants) {
|
|
||||||
await db
|
|
||||||
.insert(plants)
|
|
||||||
.values({
|
|
||||||
id: p.id,
|
|
||||||
commonName: p.commonName,
|
|
||||||
scientificName: p.scientificName,
|
|
||||||
family: p.family,
|
|
||||||
category: p.category,
|
|
||||||
careSummary: p.careSummary,
|
|
||||||
imageUrl: p.imageUrl,
|
|
||||||
})
|
|
||||||
.onConflictDoNothing();
|
|
||||||
}
|
|
||||||
console.log(`✅ ${rawPlants.length} plants inserted`);
|
|
||||||
|
|
||||||
console.log(`Seeding ${rawDiseases.length} diseases...`);
|
|
||||||
for (const d of rawDiseases) {
|
|
||||||
await db
|
|
||||||
.insert(diseases)
|
|
||||||
.values({
|
|
||||||
id: d.id,
|
|
||||||
plantId: d.plantId,
|
|
||||||
name: d.name,
|
|
||||||
scientificName: d.scientificName,
|
|
||||||
causalAgentType: d.causalAgentType,
|
|
||||||
description: d.description,
|
|
||||||
symptoms: d.symptoms,
|
|
||||||
causes: d.causes,
|
|
||||||
treatment: d.treatment,
|
|
||||||
prevention: d.prevention,
|
|
||||||
lookalikeIds: d.lookalikeDiseaseIds,
|
|
||||||
severity: d.severity,
|
|
||||||
prevalence: d.prevalence ?? "uncommon",
|
|
||||||
sourceUrl: "",
|
|
||||||
})
|
|
||||||
.onConflictDoNothing();
|
|
||||||
}
|
|
||||||
console.log(`✅ ${rawDiseases.length} diseases inserted`);
|
|
||||||
|
|
||||||
// Verify
|
|
||||||
const [plantCount] = await db.select({ count: sql<number>`COUNT(*)` }).from(plants);
|
|
||||||
const [diseaseCount] = await db.select({ count: sql<number>`COUNT(*)` }).from(diseases);
|
|
||||||
console.log(`\n📊 Database now has:`);
|
|
||||||
console.log(` ${plantCount.count} plants`);
|
|
||||||
console.log(` ${diseaseCount.count} diseases`);
|
|
||||||
|
|
||||||
closeDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("❌ Seed failed:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -1,218 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
/**
|
|
||||||
* Smoke test script for the Plant Disease Knowledge Base API.
|
|
||||||
* Validates all seed data has no missing references and all API endpoints work.
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* # With dev server running:
|
|
||||||
* node scripts/smoke-test.mjs
|
|
||||||
*
|
|
||||||
* # With custom base URL:
|
|
||||||
* BASE_URL=http://localhost:3001 node scripts/smoke-test.mjs
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { validateKnowledgeBase, plants, diseases } from "../src/lib/api/diseases.ts";
|
|
||||||
|
|
||||||
const BASE_URL = process.env.BASE_URL || "http://localhost:3000";
|
|
||||||
const results = { passed: 0, failed: 0, errors: [] };
|
|
||||||
|
|
||||||
function pass(test) {
|
|
||||||
results.passed++;
|
|
||||||
console.log(` ✅ ${test}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
function fail(test, message) {
|
|
||||||
results.failed++;
|
|
||||||
results.errors.push({ test, message });
|
|
||||||
console.log(` ❌ ${test}: ${message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchJSON(path) {
|
|
||||||
const res = await fetch(`${BASE_URL}${path}`);
|
|
||||||
const data = await res.json();
|
|
||||||
return { status: res.status, data, headers: Object.fromEntries(res.headers) };
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("\n🌿 Plant Disease Knowledge Base — Smoke Tests\n");
|
|
||||||
|
|
||||||
// ── Phase 1: Data Validation ──────────────────────────────────────────────
|
|
||||||
console.log("Phase 1: Seed Data Validation");
|
|
||||||
|
|
||||||
const validationErrors = validateKnowledgeBase();
|
|
||||||
if (validationErrors.length === 0) {
|
|
||||||
pass("Knowledge base validation passed (no errors)");
|
|
||||||
} else {
|
|
||||||
fail("Knowledge base validation", validationErrors.join("; "));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (plants.length >= 20) {
|
|
||||||
pass(`Plant count: ${plants.length} (≥20)`);
|
|
||||||
} else {
|
|
||||||
fail("Plant count", `Only ${plants.length} plants (need ≥20)`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (diseases.length >= 80) {
|
|
||||||
pass(`Disease count: ${diseases.length} (≥80)`);
|
|
||||||
} else {
|
|
||||||
fail("Disease count", `Only ${diseases.length} diseases (need ≥80)`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const uniquePlantIds = new Set(diseases.map((d) => d.plantId));
|
|
||||||
if (uniquePlantIds.size >= 20) {
|
|
||||||
pass(`Diseases span ${uniquePlantIds.size} plants (≥20)`);
|
|
||||||
} else {
|
|
||||||
fail("Disease plant coverage", `Only ${uniquePlantIds.size} plants have diseases`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const causalTypes = new Set(diseases.map((d) => d.causalAgentType));
|
|
||||||
if (causalTypes.size === 4) {
|
|
||||||
pass(`All 4 causal agent types present: ${[...causalTypes].join(", ")}`);
|
|
||||||
} else {
|
|
||||||
fail("Causal agent types", `Only ${causalTypes.size}/4 types present`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Phase 2: API Endpoint Tests ───────────────────────────────────────────
|
|
||||||
console.log("\nPhase 2: API Endpoint Tests");
|
|
||||||
|
|
||||||
// GET /api/plants
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/plants");
|
|
||||||
if (status === 200 && Array.isArray(data.plants) && data.plants.length >= 20) {
|
|
||||||
pass(`GET /api/plants returns 200 with ${data.plants.length} plants`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/plants", `Status ${status}, plants: ${data.plants?.length ?? "N/A"}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/plants", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/plants?search=tomato
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/plants?search=tomato");
|
|
||||||
if (status === 200 && data.plants.length > 0) {
|
|
||||||
pass(`GET /api/plants?search=tomato returns ${data.plants.length} results`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/plants?search=tomato", `Status ${status}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/plants?search=tomato", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/plants/tomato
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/plants/tomato");
|
|
||||||
if (status === 200 && data.plant?.id === "tomato" && data.diseases?.length >= 3) {
|
|
||||||
pass(`GET /api/plants/tomato returns 200 with ${data.diseases.length} diseases`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/plants/tomato", `Status ${status}, plant: ${data.plant?.id ?? "N/A"}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/plants/tomato", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/plants/unknown-id (should 404)
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/plants/unknown-id");
|
|
||||||
if (status === 404 && data.error === "Not Found") {
|
|
||||||
pass("GET /api/plants/unknown-id returns 404");
|
|
||||||
} else {
|
|
||||||
fail("GET /api/plants/unknown-id", `Expected 404, got ${status}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/plants/unknown-id", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/diseases
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/diseases");
|
|
||||||
if (status === 200 && Array.isArray(data.diseases) && data.diseases.length >= 80) {
|
|
||||||
pass(`GET /api/diseases returns 200 with ${data.diseases.length} diseases`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/diseases", `Status ${status}, diseases: ${data.diseases?.length ?? "N/A"}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/diseases", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/diseases?plantId=tomato
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/diseases?plantId=tomato");
|
|
||||||
if (status === 200 && data.diseases.length >= 3 && data.diseases.every((d) => d.plantId === "tomato")) {
|
|
||||||
pass(`GET /api/diseases?plantId=tomato returns ${data.diseases.length} tomato diseases`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/diseases?plantId=tomato", `Status ${status}, count: ${data.diseases?.length ?? "N/A"}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/diseases?plantId=tomato", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/diseases?search=blight
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/diseases?search=blight");
|
|
||||||
if (status === 200 && data.diseases.length >= 2) {
|
|
||||||
pass(`GET /api/diseases?search=blight returns ${data.diseases.length} results (≥2)`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/diseases?search=blight", `Status ${status}, count: ${data.diseases?.length ?? "N/A"}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/diseases?search=blight", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/diseases/early-blight
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/diseases/early-blight");
|
|
||||||
if (
|
|
||||||
status === 200 &&
|
|
||||||
data.disease?.id === "early-blight" &&
|
|
||||||
data.plant?.id === "tomato" &&
|
|
||||||
Array.isArray(data.lookalikes)
|
|
||||||
) {
|
|
||||||
pass(`GET /api/diseases/early-blight returns 200 with plant and lookalikes`);
|
|
||||||
} else {
|
|
||||||
fail("GET /api/diseases/early-blight", `Status ${status}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/diseases/early-blight", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GET /api/diseases/unknown-id (should 404)
|
|
||||||
try {
|
|
||||||
const { status, data } = await fetchJSON("/api/diseases/unknown-id");
|
|
||||||
if (status === 404 && data.error === "Not Found") {
|
|
||||||
pass("GET /api/diseases/unknown-id returns 404");
|
|
||||||
} else {
|
|
||||||
fail("GET /api/diseases/unknown-id", `Expected 404, got ${status}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("GET /api/diseases/unknown-id", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Phase 3: Response Headers ─────────────────────────────────────────────
|
|
||||||
console.log("\nPhase 3: Response Headers");
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { headers } = await fetchJSON("/api/plants");
|
|
||||||
const cacheControl = headers["cache-control"] || "";
|
|
||||||
if (cacheControl.includes("max-age=3600")) {
|
|
||||||
pass(`Cache-Control header present: ${cacheControl}`);
|
|
||||||
} else {
|
|
||||||
fail("Cache-Control header", `Expected max-age=3600, got: ${cacheControl}`);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
fail("Cache-Control header", e.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Summary ───────────────────────────────────────────────────────────────
|
|
||||||
console.log("\n" + "─".repeat(50));
|
|
||||||
console.log(`Results: ${results.passed} passed, ${results.failed} failed`);
|
|
||||||
|
|
||||||
if (results.failed > 0) {
|
|
||||||
console.log("\nFailed tests:");
|
|
||||||
for (const { test, message } of results.errors) {
|
|
||||||
console.log(` • ${test}: ${message}`);
|
|
||||||
}
|
|
||||||
process.exit(1);
|
|
||||||
} else {
|
|
||||||
console.log("\n🎉 All smoke tests passed!\n");
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
/**
|
|
||||||
* Quick test of Wikipedia image API for disease search terms.
|
|
||||||
* Run: cd apps/web && npx tsx scripts/test-wiki-images.ts
|
|
||||||
*/
|
|
||||||
const API = "https://en.wikipedia.org/w/api.php";
|
|
||||||
|
|
||||||
async function search(term: string) {
|
|
||||||
const url = `${API}?action=query&list=search&srsearch=${encodeURIComponent(term)}&format=json&srlimit=1&origin=*`;
|
|
||||||
const res = await fetch(url, { headers: { "User-Agent": "PlantHealthKB/1.0" } });
|
|
||||||
return (await res.json()) as { query?: { search?: Array<{ title: string; pageid: number }> } };
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getImg(title: string) {
|
|
||||||
const url = `${API}?action=query&titles=${encodeURIComponent(title)}&prop=pageimages&format=json&pithumbsize=400&origin=*`;
|
|
||||||
const res = await fetch(url, { headers: { "User-Agent": "PlantHealthKB/1.0" } });
|
|
||||||
return (await res.json()) as {
|
|
||||||
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function testOne(term: string) {
|
|
||||||
const s = await search(term);
|
|
||||||
const page = s?.query?.search?.[0];
|
|
||||||
if (page) {
|
|
||||||
const img = await getImg(page.title);
|
|
||||||
const pages = img?.query?.pages;
|
|
||||||
if (!pages) {
|
|
||||||
console.log(term, "→ NO PAGES");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const first = Object.values(pages)[0] as { thumbnail?: { source: string } };
|
|
||||||
const thumb = first?.thumbnail?.source;
|
|
||||||
console.log(`${term.padEnd(40)} → ${page.title.padEnd(50)} → ${thumb ?? "NO IMG"}`);
|
|
||||||
} else {
|
|
||||||
console.log(`${term.padEnd(40)} → NO PAGE`);
|
|
||||||
}
|
|
||||||
await new Promise((r) => setTimeout(r, 400));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const tests = [
|
|
||||||
"Phytophthora infestans Late Blight",
|
|
||||||
"Early Blight",
|
|
||||||
"Septoria Leaf Spot",
|
|
||||||
"Powdery Mildew",
|
|
||||||
"Fusarium oxysporum",
|
|
||||||
"Citrus Canker",
|
|
||||||
"Root Rot Pythium",
|
|
||||||
"Downy Mildew Peronospora",
|
|
||||||
"Bacterial Leaf Spot Xanthomonas",
|
|
||||||
"Apple Scab Venturia inaequalis",
|
|
||||||
"Fire Blight Erwinia amylovora",
|
|
||||||
"Blossom End Rot",
|
|
||||||
"Tomato Mosaic Virus",
|
|
||||||
"Rust Puccinia",
|
|
||||||
"Black Spot Diplocarpon rosae",
|
|
||||||
"Sooty Mold Capnodium",
|
|
||||||
"Clubroot Plasmodiophora brassicae",
|
|
||||||
"Anthracnose Colletotrichum",
|
|
||||||
];
|
|
||||||
console.log("Searching Wikipedia for disease images...\n");
|
|
||||||
for (const t of tests) {
|
|
||||||
await testOne(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch(console.error);
|
|
||||||
Reference in New Issue
Block a user