script cleanup

This commit is contained in:
2026-06-09 14:58:33 -04:00
parent 8bda14ab63
commit 6379860123
21 changed files with 57 additions and 10346 deletions

View File

@@ -1,53 +0,0 @@
/**
* apply-flag-migration.ts
*
* Applies the flagged_content table migration to Turso.
* Run with: npx tsx scripts/apply-flag-migration.ts
*/
import dotenv from "dotenv";
import path from "node:path";
const envFile =
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
dotenv.config({ path: path.resolve(__dirname, envFile) });
import { createClient } from "@libsql/client";
async function main() {
const db = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
console.log("Applying migration: create flagged_content table...");
await db.execute(`
CREATE TABLE IF NOT EXISTS flagged_content (
id text PRIMARY KEY NOT NULL,
content_type text NOT NULL,
content_id text NOT NULL,
field_name text NOT NULL,
notes text DEFAULT '',
flag_count integer DEFAULT 1 NOT NULL,
created_at text DEFAULT (datetime('now')) NOT NULL,
updated_at text DEFAULT (datetime('now')) NOT NULL
)
`);
await db.execute(`
CREATE INDEX IF NOT EXISTS idx_flagged_content_type ON flagged_content (content_type)
`);
await db.execute(`
CREATE INDEX IF NOT EXISTS idx_flagged_content_id ON flagged_content (content_id)
`);
console.log("Migration applied successfully.");
db.close();
}
main().catch((err) => {
console.error("Migration failed:", err);
process.exit(1);
});

View File

@@ -1,23 +0,0 @@
import "dotenv/config";
import { createClient } from "@libsql/client";
async function main() {
const db = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
console.log("Applying migration: add image_url to diseases...");
await db.execute("ALTER TABLE diseases ADD COLUMN image_url TEXT DEFAULT ''");
await db.execute("UPDATE diseases SET image_url = '' WHERE image_url IS NULL");
// Mark migration as applied
await db.execute(
"INSERT INTO __drizzle_migrations (hash, created_at) VALUES ('0001_add-disease-images', datetime('now'))",
);
console.log("Migration applied successfully.");
db.close();
}
main().catch(console.error);

View File

@@ -1,19 +0,0 @@
import { createClient } from "@libsql/client";
const c = createClient({
url: process.env.DATABASE_URL,
authToken: process.env.DATABASE_TOKEN,
});
const r = await c.execute("SELECT COUNT(*) as cnt FROM diseases");
const r2 = await c.execute(
`SELECT SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has, SUM(CASE WHEN image_url IS NULL OR image_url = '' THEN 1 ELSE 0 END) as miss FROM diseases`,
);
const r3 = await c.execute(
`SELECT severity, COUNT(*) as total, SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has FROM diseases GROUP BY severity ORDER BY severity`,
);
console.log(
`Total: ${r.rows[0].cnt} | With images: ${r2.rows[0].has} | Missing: ${r2.rows[0].miss}`,
);
for (const row of r3.rows) {
console.log(` ${row.severity?.padEnd(10)}: ${row.has}/${row.total}`);
}
c.close();

File diff suppressed because it is too large Load Diff

View File

@@ -1,691 +0,0 @@
/**
* Expand DB with comprehensive plant disease list from Wikipedia.
*
* Reads /tmp/plant_diseases/plant_diseases_comprehensive.txt,
* compares against existing DB entries (by name, case-insensitive),
* and inserts new entries with reasonable defaults.
*
* Usage:
* cd apps/web && export $(grep -v '^#' .env.development | xargs) && npx tsx scripts/expand-diseases.ts
*/
import "dotenv/config";
import { readFileSync } from "fs";
import { eq, sql } from "drizzle-orm";
import { getDb, closeDb } from "../src/lib/db/index";
import { plants, diseases } from "../src/lib/db/schema";
import type { CausalAgentType, Severity } from "../src/lib/types";
// ─── Parse the comprehensive list ─────────────────────────────────────────────
interface DiseaseEntry {
name: string;
sourceUrl: string;
}
function parseComprehensiveList(filePath: string): DiseaseEntry[] {
const content = readFileSync(filePath, "utf-8");
const entries: DiseaseEntry[] = [];
const lines = content.split("\n");
const nameRe = /^\d+\.\s+(.+)$/;
for (let i = 0; i < lines.length; i++) {
const nameMatch = lines[i].match(nameRe);
if (nameMatch) {
const name = nameMatch[1].trim();
const urlLine = lines[i + 1]?.trim() || "";
// Only add if the next line is a valid URL
if (urlLine.startsWith("http")) {
entries.push({ name, sourceUrl: urlLine });
i++; // skip the URL line
} else {
entries.push({ name, sourceUrl: "" });
}
}
}
return entries;
}
// ─── Infer causal agent type from disease name ────────────────────────────────
function inferCausalAgent(name: string): CausalAgentType {
const lower = name.toLowerCase();
// Bacterial indicators
if (
lower.startsWith("bacterial ") ||
lower.includes(" xanthomonas") ||
lower.includes(" pseudomonas") ||
lower.includes(" erwinia") ||
lower.includes(" ralstonia") ||
lower.includes(" clavibacter") ||
lower.includes(" streptomyces") ||
lower.includes(" agrobacterium") ||
lower.includes(" corynebacterium") ||
lower.includes(" pectobacterium") ||
lower.includes(" dickeya")
) {
return "bacterial";
}
// Viral indicators - strong signals
if (
lower.includes(" mosaic") ||
lower.includes(" yellows") ||
lower.includes(" leaf roll") ||
lower.includes(" leafroll") ||
lower.includes(" ringspot") ||
lower.includes(" ring spot") ||
lower.includes(" enation") ||
lower.includes(" phyllody") ||
lower.includes(" witches") ||
lower.includes(" witches'") ||
lower.includes(" crinkle") ||
lower.includes(" rosette") ||
lower.includes(" shoestring") ||
lower.includes(" tristeza") ||
lower.includes(" psorosis") ||
lower.includes(" stubborn") ||
lower.includes(" greening") ||
lower.includes(" vein banding") ||
lower.includes(" vein mottle") ||
lower.includes(" vein clearing") ||
lower.includes(" leaf pucker") ||
lower.includes(" pucker leaf") ||
lower.includes(" latent") ||
lower.includes(" motley") ||
lower.includes(" rugose")
) {
return "viral";
}
// Viral - names containing "virus" or "viroid"
if (lower.includes(" virus") || lower.includes(" viroid") || lower.includes(" virosis")) {
return "viral";
}
// Nematodes
if (
lower.includes(" nematode") ||
lower.includes(" nematodes") ||
lower.includes(" eelworm") ||
lower.includes(" root knot") ||
lower.includes(" root-knot") ||
lower.includes(" cyst ") ||
lower.includes(" dagger ") ||
lower.includes(" lance ") ||
lower.includes(" lesion ") ||
lower.includes(" ring ") ||
lower.includes(" spiral ") ||
lower.includes(" sting ") ||
lower.includes(" stubby ") ||
lower.includes(" needle ") ||
lower.includes(" foliar ") ||
lower.includes(" bulb ") ||
lower.includes(" reniform ") ||
lower.includes(" burrowing ")
) {
// Check if it's really a nematode name
if (lower.includes("nematode")) return "environmental";
}
// Fungal indicators
if (
lower.includes(" mildew") ||
lower.includes(" rust") ||
lower.includes(" smut") ||
lower.includes(" blight") ||
lower.includes(" canker") ||
lower.includes(" rot") ||
lower.includes(" scab") ||
lower.includes(" mold") ||
lower.includes(" anthracnose") ||
lower.includes(" bunt") ||
lower.includes(" ergot") ||
lower.includes(" dieback") ||
lower.includes(" scald") ||
lower.includes(" blotch") ||
lower.includes(" speckle") ||
lower.includes(" sooty") ||
lower.includes(" flyspeck") ||
lower.includes(" fusarium") ||
lower.includes(" alternaria") ||
lower.includes(" botrytis") ||
lower.includes(" rhizoctonia") ||
lower.includes(" pythium") ||
lower.includes(" phytophthora") ||
lower.includes(" sclerotinia") ||
lower.includes(" verticillium") ||
lower.includes(" ascochyta") ||
lower.includes(" cercospora") ||
lower.includes(" septoria") ||
lower.includes(" colletotrichum") ||
lower.includes(" phomopsis") ||
lower.includes(" diaporthe") ||
lower.includes(" diplodia") ||
lower.includes(" macrophomina") ||
lower.includes(" cylindrocladium") ||
lower.includes(" mycosphaerella") ||
lower.includes(" helminthosporium") ||
lower.includes(" curvularia") ||
lower.includes(" bipolaris") ||
lower.includes(" exserohilum") ||
lower.includes(" dothiorella") ||
lower.includes(" fusicoccum") ||
lower.includes(" pestalotia") ||
lower.includes(" glomerella") ||
lower.includes(" nectria") ||
lower.includes(" eutypa") ||
lower.includes(" armillaria") ||
lower.includes(" ganoderma") ||
lower.includes(" phoma") ||
lower.includes(" cladosporium") ||
lower.includes(" penicillium") ||
lower.includes(" aspergillus") ||
lower.includes(" rhizopus") ||
lower.includes(" mucor") ||
lower.includes(" downy mildew") ||
lower.includes(" powdery mildew") ||
lower.includes(" pink rot") ||
lower.includes(" pink mold") ||
lower.includes(" pink root") ||
lower.includes(" gray mold") ||
lower.includes(" grey mold") ||
lower.includes(" white rot") ||
lower.includes(" white mold") ||
lower.includes(" brown rot") ||
lower.includes(" black rot") ||
lower.includes(" soft rot") ||
lower.includes(" dry rot") ||
lower.includes(" fruit rot") ||
lower.includes(" root rot") ||
lower.includes(" stem rot") ||
lower.includes(" ear rot") ||
lower.includes(" crown rot") ||
lower.includes(" collar rot") ||
lower.includes(" pod rot") ||
lower.includes(" kernel rot") ||
lower.includes(" stalk rot") ||
lower.includes(" head rot") ||
lower.includes(" butt rot") ||
lower.includes(" stump rot") ||
lower.includes(" wood rot") ||
lower.includes(" seed rot") ||
lower.includes(" leaf spot") ||
lower.includes(" leaf blight") ||
lower.includes(" leaf blotch") ||
lower.includes(" leaf rust") ||
lower.includes(" brown spot") ||
lower.includes(" black spot") ||
lower.includes(" black leg") ||
lower.includes(" blackleg") ||
lower.includes(" black foot") ||
lower.includes(" white rust") ||
lower.includes(" white smut") ||
lower.includes(" white scab") ||
lower.includes(" tar spot") ||
lower.includes(" target spot") ||
lower.includes(" dollar spot") ||
lower.includes(" fairy ring") ||
lower.includes(" snow mold") ||
lower.includes(" pink disease") ||
lower.includes(" thread blight") ||
lower.includes(" web blight") ||
lower.includes(" sclerotial") ||
lower.includes(" sore shin") ||
lower.includes(" wart") ||
lower.includes(" scurf") ||
lower.includes(" silver scurf") ||
lower.includes(" shot hole") ||
lower.includes(" timber rot") ||
lower.includes(" cottony rot") ||
lower.includes(" watery rot") ||
lower.includes(" sour rot") ||
lower.includes(" seepage") ||
lower.includes(" bunch rot") ||
lower.includes(" noble rot") ||
lower.includes(" bitter rot") ||
lower.includes(" ripe rot") ||
lower.includes(" ring rot") ||
lower.includes(" coral spot") ||
lower.includes(" stem canker") ||
lower.includes(" branch canker") ||
lower.includes(" perennial canker") ||
lower.includes(" brand canker") ||
lower.includes(" blister canker") ||
lower.includes(" bleeding canker") ||
lower.includes(" bark canker") ||
lower.includes(" gum canker") ||
lower.includes(" collar crack") ||
lower.includes(" fasciation") ||
lower.includes(" exobasidium") ||
lower.includes(" mycorrhiza") ||
lower.includes(" lichen") ||
lower.includes(" algal") ||
lower.includes(" chlorosis") ||
lower.includes(" leaf blister") ||
lower.includes(" leaf curl")
) {
return "fungal";
}
// Physiological / environmental indicators
if (
lower.includes(" sunscald") ||
lower.includes(" sunburn") ||
lower.includes(" chilling") ||
lower.includes(" blossom end rot") ||
lower.includes(" edema") ||
lower.includes(" deficiency") ||
lower.includes(" toxicity") ||
lower.includes(" ozone") ||
lower.includes(" drought") ||
lower.includes(" frost") ||
lower.includes(" herbicide") ||
lower.includes(" pesticide") ||
lower.includes(" phytotoxicity") ||
lower.includes(" catface") ||
lower.includes(" fruit cracking") ||
lower.includes(" russeting") ||
lower.includes(" growth crack") ||
lower.includes(" mealiness") ||
lower.includes(" wind scar") ||
lower.includes(" hail") ||
lower.includes(" salt ") ||
lower.includes(" nutritional") ||
lower.includes(" mineral") ||
lower.includes(" overwatering") ||
lower.includes(" under watering") ||
lower.includes(" waterlogging") ||
lower.includes(" chemical injury") ||
lower.includes(" spray injury") ||
lower.includes(" fertilizer burn") ||
lower.includes(" lightning") ||
lower.includes(" bruising") ||
lower.includes(" pressure bruise") ||
lower.includes(" impact damage") ||
lower.includes(" transit rot")
) {
return "environmental";
}
// Insect/mite/pest indicators
if (
lower.includes(" mite") ||
lower.includes(" beetle") ||
lower.includes(" weevil") ||
lower.includes(" aphid") ||
lower.includes(" bollworm") ||
lower.includes(" leaf miner") ||
lower.includes(" mealybug") ||
lower.includes(" thrips") ||
lower.includes(" whitefly") ||
lower.includes(" caterpillar") ||
lower.includes(" sawfly") ||
lower.includes(" scale ") ||
lower.includes(" leafhopper") ||
lower.includes(" psylla") ||
lower.includes(" slug") ||
lower.includes(" snail") ||
lower.includes(" borer") ||
lower.includes(" maggot") ||
lower.includes(" grub") ||
lower.includes(" earwig") ||
lower.includes(" grasshopper")
) {
return "environmental";
}
// Fungal genus names
const fungalGenera = [
"armillaria",
"aspergillus",
"alternaria",
"botrytis",
"cercospora",
"cladosporium",
"colletotrichum",
"curvularia",
"cylindrocladium",
"diplodia",
"fusarium",
"ganoderma",
"glomerella",
"helminthosporium",
"macrophomina",
"mycosphaerella",
"nectria",
"penicillium",
"pestalotia",
"phoma",
"phomopsis",
"phytophthora",
"pythium",
"rhizoctonia",
"sclerotinia",
"septoria",
"verticillium",
"ascochyta",
"cercoseptoria",
"phaeoisariopsis",
"phaeoseptoria",
"stagonospora",
"stemphylium",
"myrothecium",
"myriogenospora",
"dactuliophora",
"dilophospora",
"coniothecium",
"coniosporium",
"cryptostictis",
"catacauma",
"botryodiplodia",
"botryosphaeria",
"cephalosporium",
"ceratocystis",
"chalara",
"choanephora",
"clitocybe",
"coprinus",
"cordana",
"corticium",
"corynespora",
"coryneum",
"cylindrocarpon",
"cylindrocladiella",
"cylindrosporium",
"cytospora",
"cytosporina",
"dematophora",
"didymella",
"dothiorella",
"drechslera",
"endothia",
"eutypa",
"eutypella",
"exobasidium",
"fusicladium",
"fusicoccum",
"gibberella",
"glomerella",
"gnomonia",
"graphiola",
"guignardia",
"hendersonia",
"hendersonula",
"hymenochaete",
"hypoxylon",
"lasiodiplodia",
"leptosphaeria",
"leucostoma",
"lophodermium",
"macrophoma",
"marasmiellus",
"marasmius",
"massaria",
"monilia",
"monosporascus",
"mystrosporium",
"neocosmospora",
"nigrospora",
"omphalia",
"ophiobolus",
"ovulinia",
"ozonium",
"panagrolaimus",
"periconia",
"pestalosphaeria",
"pestalotiopsis",
"phialophora",
"phymatotrichum",
"physalospora",
"phytophthora",
"plasmodiophora",
"plectosporium",
"polyporus",
"poria",
"pseudocercosporella",
"pseudopeziza",
"pseudoseptoria",
"puccinia",
"pyrenochaeta",
"pythium",
"ramularia",
"rhizoctonia",
"rhizopus",
"rhynchosporium",
"rosellinia",
"sclerophthora",
"sclerotinia",
"sclerotium",
"septoria",
"sphaceloma",
"sphaeropsis",
"spongospora",
"stagonospora",
"stemphylium",
"stereum",
"stigmina",
"thanatephorus",
"thielaviopsis",
"tippula",
"typhula",
"ulocladium",
"uredo",
"ustilago",
"valsa",
"venturia",
"verticillium",
"xylaria",
];
for (const genus of fungalGenera) {
if (lower.includes(genus)) return "fungal";
}
// Default to fungal (most plant diseases are fungal)
return "fungal";
}
// ─── Infer severity ───────────────────────────────────────────────────────────
function inferSeverity(name: string): Severity {
const lower = name.toLowerCase();
if (
lower.includes(" lethal") ||
lower.includes(" devastating") ||
lower.includes(" destructive") ||
lower.includes(" fatal") ||
lower.includes(" severe") ||
lower.includes(" blight") ||
lower.includes(" wilt") ||
lower.includes(" canker") ||
lower.includes(" dieback") ||
lower.includes(" decline") ||
lower.includes(" rot") ||
lower.includes(" gall") ||
lower.includes(" gummosis") ||
lower.includes(" necrosis") ||
lower.includes(" erwinia")
) {
return "high";
}
if (
lower.includes(" minor") ||
lower.includes(" mild") ||
lower.includes(" slight") ||
lower.includes(" speckle") ||
lower.includes(" fleck") ||
lower.includes(" freckle") ||
lower.includes(" chlorosis") ||
lower.includes(" translucence") ||
lower.includes(" superficial")
) {
return "low";
}
return "moderate";
}
// ─── Generate a deterministic slug ────────────────────────────────────────────
function toSlug(name: string): string {
return (
"wiki-" +
name
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-|-$/g, "")
.replace(/-+/g, "-")
);
}
// ─── Main ─────────────────────────────────────────────────────────────────────
async function main() {
const db = getDb();
// 1. Get existing disease names from DB
const existingDiseases = await db.select({ name: diseases.name }).from(diseases);
const existingNames = new Set(existingDiseases.map((d) => d.name.toLowerCase().trim()));
console.log(`Existing diseases in DB: ${existingNames.size}`);
// 2. Parse the comprehensive list
const entries = parseComprehensiveList("/tmp/plant_diseases/plant_diseases_comprehensive.txt");
console.log(`Total entries in comprehensive file: ${entries.length}`);
// 3. Find or create catch-all plants
for (const plantId of ["general", "unknown"]) {
const existing = await db.select().from(plants).where(eq(plants.id, plantId)).get();
if (!existing) {
console.log(`Creating '${plantId}' plant for catch-all diseases...`);
await db.insert(plants).values({
id: plantId,
commonName: plantId === "general" ? "General (Multiple Plants)" : "Unknown Plant",
scientificName: "Various",
family: "Various",
category: "houseplant",
careSummary:
plantId === "general"
? "General plant diseases affecting multiple species."
: "Plant disease with unknown host plant.",
imageUrl: "",
});
console.log(`Created '${plantId}' plant.`);
}
}
// 4. Filter new entries (deduplicate within file + against DB)
const newEntries: DiseaseEntry[] = [];
const skipped: string[] = [];
const seen = new Set<string>();
for (const entry of entries) {
const key = entry.name.toLowerCase().trim();
if (seen.has(key)) continue;
seen.add(key);
if (existingNames.has(key)) {
skipped.push(entry.name);
} else {
newEntries.push(entry);
}
}
console.log(`\nNew entries to insert: ${newEntries.length}`);
console.log(`Already existing (skipped): ${skipped.length}`);
if (skipped.length > 0) {
console.log(`\nFirst 10 skipped (of ${skipped.length}):`);
skipped.slice(0, 10).forEach((s) => console.log(` - ${s}`));
}
// 5. Insert new entries in batches
if (newEntries.length === 0) {
console.log("\n✅ No new diseases to insert.");
closeDb();
return;
}
const BATCH_SIZE = 50;
let inserted = 0;
let errors = 0;
for (let i = 0; i < newEntries.length; i += BATCH_SIZE) {
const batch = newEntries.slice(i, i + BATCH_SIZE);
const values = batch.map((entry) => {
const causalAgent = inferCausalAgent(entry.name);
const severity = inferSeverity(entry.name);
return {
id: toSlug(entry.name),
plantId: "general",
name: entry.name,
scientificName: "",
causalAgentType: causalAgent,
description: `A plant disease known as "${entry.name}". Source: Wikipedia.`,
symptoms: [],
causes: [],
treatment: [],
prevention: [],
lookalikeIds: [],
severity,
sourceUrl: entry.sourceUrl,
imageUrl: "",
};
});
try {
await db.insert(diseases).values(values).onConflictDoNothing();
inserted += values.length;
} catch (err) {
// Fall back to individual inserts for this batch if batch fails
console.log(` Batch failed, trying individually...`);
for (const val of values) {
try {
await db.insert(diseases).values(val).onConflictDoNothing();
inserted++;
} catch (e2) {
// If it's a duplicate key, count it as skipped
if (String(e2).includes("UNIQUE") || String(e2).includes("duplicate")) {
// Already handled by onConflictDoNothing, shouldn't happen
inserted++;
} else {
console.error(` Error inserting "${val.name}":`, e2);
errors++;
}
}
}
}
if ((i + BATCH_SIZE) % 200 === 0 || i + BATCH_SIZE >= newEntries.length) {
console.log(
` Progress: ${Math.min(i + BATCH_SIZE, newEntries.length)}/${newEntries.length} (${inserted} inserted, ${errors} errors)`,
);
}
}
// 6. Summary
const totalDiseases = await db
.select({ count: sql<number>`COUNT(*)` })
.from(diseases)
.get();
const totalPlants = await db
.select({ count: sql<number>`COUNT(*)` })
.from(plants)
.get();
console.log(`\n📊 Results:`);
console.log(` Inserted: ${inserted}`);
console.log(` Errors: ${errors}`);
console.log(` Skipped (already existed): ${skipped.length}`);
console.log(`\n📊 Database now has:`);
console.log(` ${totalPlants?.count ?? 0} plants`);
console.log(` ${totalDiseases?.count ?? 0} diseases`);
closeDb();
}
main().catch((err) => {
console.error("❌ Failed:", err);
process.exit(1);
});

View File

@@ -1,414 +0,0 @@
#!/usr/bin/env node
/**
* fill-brave-images-v2.ts — Brave Image Search for remaining disease images.
*
* Prioritizes by severity (critical → high → moderate → low).
* Runs at 1 request/sec (Brave free tier rate limit).
* Updates Turso DB directly with found images.
* When current key is exhausted, prompts for next key.
* Falls back to duckduckgo-images-api when all keys are spent.
*
* Usage:
* cd apps/web && npx tsx scripts/fill-brave-images-v2.ts
*
* Pass additional API keys as args:
* npx tsx scripts/fill-brave-images-v2.ts KEY2 KEY3
*/
import { readFileSync, writeFileSync } from "fs";
import { resolve } from "path";
// Load env
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) process.env[key] = val;
}
}
}
} catch {}
// Also try .env.local for BRAVE_API_KEY
try {
const envLocal = readFileSync(resolve(__dirname, "../.env.local"), "utf-8");
for (const line of envLocal.split("\n")) {
const trimmed = line.trim();
if (trimmed.startsWith("BRAVE_API_KEY=")) {
const val = trimmed.slice("BRAVE_API_KEY=".length).trim();
if (!process.env.BRAVE_API_KEY) process.env.BRAVE_API_KEY = val;
}
}
} catch {}
import { getDb, closeDb } from "../src/lib/db/index";
import { diseases } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
import { sql } from "drizzle-orm";
interface DiseaseRow {
id: string;
name: string;
scientificName: string;
severity: string;
plantId: string;
}
// ─── Config ──────────────────────────────────────────────────────────────────
const BRAVE_DELAY = 1100; // ms between calls (1 req/sec)
const DB_FLUSH_BATCH = 50;
const MAX_PER_KEY = 1800; // Leave 200 buffer of the 2000/mo limit
const STATE_FILE = resolve(__dirname, ".brave-progress.json");
let currentKeyIndex = 0;
let braveKeys: string[] = [];
let callsThisKey = 0;
let totalFound = 0;
// totalSkipped tracking removed — not needed for v2
// ─── State persistence ───────────────────────────────────────────────────────
interface RunState {
processedIds: string[];
currentKeyIndex: number;
callsThisKey: number;
totalFound: number;
}
function loadState(): RunState | null {
try {
return JSON.parse(readFileSync(STATE_FILE, "utf-8"));
} catch {
return null;
}
}
function saveState(processedIds: string[]) {
writeFileSync(
STATE_FILE,
JSON.stringify(
{
processedIds,
currentKeyIndex,
callsThisKey,
totalFound,
},
null,
2,
),
"utf-8",
);
}
// ─── Brave API ───────────────────────────────────────────────────────────────
async function braveImageSearch(query: string): Promise<string | null> {
const key = braveKeys[currentKeyIndex];
if (!key) return null;
const url = new URL("https://api.search.brave.com/res/v1/images/search");
url.searchParams.set("q", query);
url.searchParams.set("count", "3");
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetch(url.toString(), {
headers: { "X-Subscription-Token": key, Accept: "application/json" },
});
if (res.status === 429) {
console.log("\n [RATE LIMITED] Key " + (currentKeyIndex + 1) + " exhausted!");
return "RATE_LIMITED";
}
if (!res.ok) return null;
callsThisKey++;
const data = (await res.json()) as {
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
};
const results = data?.results ?? [];
if (results.length === 0) return null;
// Prefer non-stock images
for (const r of results) {
const src = r.thumbnail?.src ?? r.url;
if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) {
return src;
}
}
return results[0].thumbnail?.src ?? results[0].url;
} catch {
await new Promise((r) => setTimeout(r, 2000));
}
}
return null;
}
// ─── DuckDuckGo fallback ────────────────────────────────────────────────────
async function ddgFallbackSearch(query: string): Promise<string | null> {
try {
// Try to use duckduckgo-images-api if installed
const ddg = await import("duckduckgo-images-api").catch(() => null);
if (ddg) {
const results = await ddg.image_search({ query, moderate: true });
if (results && results.length > 0) {
for (const r of results) {
if (r.image && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(r.image)) {
return r.image;
}
}
return results[0].image || null;
}
}
} catch {
// duckduckgo-images-api not installed
}
return null;
}
// ─── Main ────────────────────────────────────────────────────────────────────
async function main() {
console.log("\n🔍 Brave Disease Image Filler v2\n");
// Parse keys from args + env
const argsKeys = process.argv.slice(2).filter((a) => !a.startsWith("-"));
const envKey = process.env.BRAVE_API_KEY;
braveKeys = [envKey, ...argsKeys].filter(Boolean) as string[];
braveKeys = [...new Set(braveKeys)]; // dedup
if (braveKeys.length === 0) {
console.log("❌ No Brave API keys found.");
console.log(" Set BRAVE_API_KEY in .env.local or pass as argument.\n");
process.exit(1);
}
console.log(`🔑 ${braveKeys.length} Brave API key(s) available\n`);
// Load state
const state = loadState();
if (state) {
currentKeyIndex = state.currentKeyIndex;
callsThisKey = state.callsThisKey;
totalFound = state.totalFound;
console.log(
`📋 Resuming from previous run (${state.processedIds.length} processed, ${totalFound} found)\n`,
);
}
// Get diseases from DB
const db = getDb();
const allDiseases = (await db
.select({
id: diseases.id,
name: diseases.name,
scientificName: diseases.scientificName,
severity: diseases.severity,
plantId: diseases.plantId,
})
.from(diseases)
.where(sql`(image_url IS NULL OR image_url = '')`)
.all()) as DiseaseRow[];
console.log(`📋 ${allDiseases.length} diseases need images\n`);
if (allDiseases.length === 0) {
console.log("✅ All diseases already have images!\n");
closeDb();
return;
}
// Sort by severity priority
const severityOrder = { critical: 0, high: 1, moderate: 2, low: 3 };
allDiseases.sort(
(a, b) =>
(severityOrder[a.severity as keyof typeof severityOrder] || 99) -
(severityOrder[b.severity as keyof typeof severityOrder] || 99),
);
// Filter out already-processed from state
const processedSet = new Set(state?.processedIds || []);
const pending = allDiseases.filter((d) => !processedSet.has(d.id));
console.log(
`📊 Prioritization: critical=${allDiseases.filter((d) => d.severity === "critical" && !processedSet.has(d.id)).length}, high=${allDiseases.filter((d) => d.severity === "high" && !processedSet.has(d.id)).length}, moderate=${allDiseases.filter((d) => d.severity === "moderate" && !processedSet.has(d.id)).length}, low=${allDiseases.filter((d) => d.severity === "low" && !processedSet.has(d.id)).length}\n`,
);
if (pending.length === 0) {
console.log("✅ All remaining diseases already attempted\n");
closeDb();
return;
}
const raw = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
let updates: Array<{ id: string; url: string }> = [];
const processedIds: string[] = state?.processedIds || [];
let found = totalFound;
let ddgMode = false;
for (let i = 0; i < pending.length; i++) {
const d = pending[i];
// Check if current key needs rotating
if (!ddgMode && callsThisKey >= MAX_PER_KEY) {
if (currentKeyIndex < braveKeys.length - 1) {
currentKeyIndex++;
callsThisKey = 0;
console.log(`\n 🔄 Rotating to key ${currentKeyIndex + 1}/${braveKeys.length}\n`);
} else {
console.log(
`\n ⚠️ All ${braveKeys.length} Brave keys exhausted. Switching to DuckDuckGo fallback.\n`,
);
ddgMode = true;
// Install duckduckgo-images-api if not available
try {
await import("duckduckgo-images-api");
} catch {
console.log(" Installing duckduckgo-images-api...");
const { execSync } = await import("child_process");
execSync("npm install duckduckgo-images-api", {
cwd: resolve(__dirname, ".."),
stdio: "pipe",
});
console.log(" Done.\n");
}
}
}
// Build search query
const plantName = d.plantId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
const query = `${d.name} ${d.scientificName} ${plantName} plant disease`;
const sev = d.severity.padEnd(8);
process.stdout.write(
` [${String(i + 1).padStart(4)}/${pending.length}] [${sev}] ${d.name.substring(0, 40).padEnd(42)} `,
);
let url: string | null = null;
if (ddgMode) {
url = await ddgFallbackSearch(query);
if (!url) {
// Try a simpler query
url = await ddgFallbackSearch(`${d.name} disease`);
}
} else {
url = await braveImageSearch(query);
if (url === "RATE_LIMITED") {
// Key exhausted mid-query, try next
if (currentKeyIndex < braveKeys.length - 1) {
currentKeyIndex++;
callsThisKey = 0;
console.log("\n 🔄 Rotating key...");
url = await braveImageSearch(query);
} else {
console.log("\n ⚠️ All keys exhausted mid-batch!");
ddgMode = true;
url = await ddgFallbackSearch(query);
}
}
}
if (url) {
updates.push({ id: d.id, url });
found++;
processedIds.push(d.id);
console.log("✅");
} else {
processedIds.push(d.id); // Mark as attempted even if not found
console.log("❌");
}
// Flush to DB
if (updates.length >= DB_FLUSH_BATCH) {
await raw.batch(
updates.map((u) => ({
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
updates = [];
}
// Save state every 50
if ((i + 1) % 50 === 0) {
saveState(processedIds);
}
// Rate limit (even for DDG to be polite)
await new Promise((r) => setTimeout(r, ddgMode ? 500 : BRAVE_DELAY));
}
// Final flush
if (updates.length > 0) {
await raw.batch(
updates.map((u) => ({
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
}
saveState(processedIds);
raw.close();
// Final report
const finalList = await db
.select({ id: diseases.id, name: diseases.name, imageUrl: diseases.imageUrl })
.from(diseases)
.all();
const w = finalList.filter((d) => d.imageUrl);
const wo = finalList.filter((d) => !d.imageUrl);
console.log(`\n${"═".repeat(50)}`);
console.log(`📊 BRAVE IMAGE SEARCH COMPLETE`);
console.log(`${"═".repeat(50)}`);
console.log(` Processed: ${pending.length}`);
console.log(` Found this run: ${found - totalFound}`);
console.log(` Total with images: ${w.length}/${finalList.length}`);
console.log(` Still missing: ${wo.length}`);
console.log(` Brave keys used: ${currentKeyIndex + 1}`);
console.log(` Calls on current key: ${callsThisKey}`);
console.log(` DuckDuckGo mode: ${ddgMode}`);
if (wo.length > 0) {
const rp = resolve(__dirname, ".disease-image-review-needed.md");
let report = "# Disease Images - Still Missing\n\n";
report += `Generated: ${new Date().toISOString()}\n\n`;
report += `## Summary\n\n`;
report += `- Total: ${finalList.length}\n`;
report += `- With images: ${w.length}\n`;
report += `- Still missing: ${wo.length}\n\n`;
report += `## Missing Diseases\n\n`;
for (const d of wo) {
report += `- ${d.name} (\`${d.id}\`)\n`;
}
writeFileSync(rp, report, "utf-8");
console.log(`\n📝 Report: ${rp}`);
} else {
console.log("\n✅ ALL diseases now have images!");
}
closeDb();
console.log("\n");
}
main().catch((err) => {
console.error("\n❌", err);
process.exit(1);
});

View File

@@ -1,152 +0,0 @@
#!/usr/bin/env node
/**
* fill-brave-images.ts — Brave-only pass for remaining disease images.
*
* Runs at 1 request/sec (Brave rate limit).
* Updates diseases.json and Turso DB.
*
* Usage: cd apps/web && npx tsx scripts/fill-brave-images.ts
*/
import dotenv from "dotenv"; dotenv.config({ path: resolve(__dirname, "../.env.local") });
import { readFileSync, writeFileSync } from "fs";
import { resolve } from "path";
import { createClient } from "@libsql/client";
import { closeDb } from "../src/lib/db/index";
const DISEASES_JSON = resolve(__dirname, "../src/data/diseases.json");
const BRAVE_KEY = process.env.BRAVE_API_KEY ?? "";
interface DiseaseSeed {
id: string;
plantId: string;
name: string;
scientificName: string;
imageUrl?: string;
[key: string]: unknown;
}
function load(): DiseaseSeed[] {
return JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
}
async function searchBraveImage(query: string): Promise<string | null> {
const url = new URL("https://api.search.brave.com/res/v1/images/search");
url.searchParams.set("q", query);
url.searchParams.set("count", "3");
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetch(url.toString(), {
headers: { "X-Subscription-Token": BRAVE_KEY, Accept: "application/json" },
});
if (res.status === 429) {
await new Promise((r) => setTimeout(r, 5000 * 2 ** attempt));
continue;
}
if (!res.ok) return null;
const data = (await res.json()) as {
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
};
const results = data?.results ?? [];
if (results.length === 0) return null;
// Prefer non-stock direct-looking images
for (const r of results) {
const src = r.thumbnail?.src ?? r.url;
if (src && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(src)) return src;
}
return results[0].thumbnail?.src ?? results[0].url;
} catch {
await new Promise((r) => setTimeout(r, 2000));
}
}
return null;
}
async function main() {
console.log("\n🔍 Brave Image Search — remaining disease images\n");
if (!BRAVE_KEY) {
console.log("❌ No BRAVE_API_KEY in .env.local\n");
process.exit(1);
}
const diseases = load();
const pending = diseases.filter((d) => !d.imageUrl);
console.log(`📋 ${pending.length} diseases need images\n`);
let found = 0;
for (let i = 0; i < pending.length; i++) {
const d = pending[i];
const plant = diseases.find((p) => p.id === d.plantId);
const plantName = plant?.name ?? d.plantId;
const query = `${d.name} ${plantName} plant disease symptom`;
process.stdout.write(` [${String(i + 1).padStart(2, " ")}/${pending.length}] ${d.name.padEnd(35)} `);
const url = await searchBraveImage(query);
if (url) {
d.imageUrl = url;
found++;
console.log(``);
} else {
console.log(``);
}
// 1 req/sec rate limit
await new Promise((r) => setTimeout(r, 1100));
}
// Write updated JSON
writeFileSync(DISEASES_JSON, JSON.stringify(diseases, null, 2) + "\n", "utf-8");
console.log(`\n✅ diseases.json updated: ${found}/${pending.length} images found\n`);
// Update DB
try {
const dbUrl = process.env.DATABASE_URL;
const dbToken = process.env.DATABASE_TOKEN;
if (dbUrl && dbToken) {
const raw = createClient({ url: dbUrl, authToken: dbToken });
const updates = pending.filter((d) => d.imageUrl);
for (let i = 0; i < updates.length; i += 50) {
await raw.batch(
updates.slice(i, i + 50).map((d) => ({
sql: "UPDATE diseases SET image_url = ? WHERE id = ?",
args: [d.imageUrl!, d.id],
})),
"write",
);
}
raw.close();
console.log(`✅ Turso DB updated: ${updates.length} rows`);
} else {
console.log("⏭️ Skipping DB — no DATABASE_URL/TOKEN");
}
} catch (err) {
console.log(` ⚠️ DB: ${err instanceof Error ? err.message : err}`);
}
// Summary
const finalDiseases = JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
const stillMissing = finalDiseases.filter((d) => !d.imageUrl);
console.log(`\n${"═".repeat(50)}`);
console.log(`📊 FINAL: ${finalDiseases.length} total`);
console.log(` With images: ${finalDiseases.length - stillMissing.length}`);
console.log(` Still missing: ${stillMissing.length}`);
if (stillMissing.length > 0) {
console.log(`\nStill need human curation:`);
for (const d of stillMissing) {
console.log(`${d.name} (${d.id})`);
}
}
console.log(`${"═".repeat(50)}\n`);
closeDb();
}
main().catch((err) => {
console.error("\n❌ Fatal:", err);
process.exit(1);
});

View File

@@ -1,268 +0,0 @@
#!/usr/bin/env node
/**
* fill-ddg-images.ts — DuckDuckGo Image Search for remaining disease images.
*
* No API key needed. Searches DuckDuckGo Images API for each disease
* without an image and updates the Turso DB.
*
* Prioritizes by severity (critical → high → moderate → low).
* Runs at 1 request/sec to be polite to DuckDuckGo.
* Resumable via state file (scripts/.ddg-progress.json).
*
* Usage:
* cd apps/web && npx tsx scripts/fill-ddg-images.ts
*/
import { readFileSync, writeFileSync } from "fs";
import { resolve } from "path";
// Load .env.development for DB creds
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) process.env[key] = val;
}
}
}
} catch {}
import { getDb, closeDb } from "../src/lib/db/index";
import { diseases } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
import { sql } from "drizzle-orm";
// DuckDuckGo
import { imageSearch } from "@mudbill/duckduckgo-images-api";
interface DiseaseRow {
id: string;
name: string;
scientificName: string;
severity: string;
plantId: string;
}
// ─── Config ──────────────────────────────────────────────────────────────────
const POLITE_DELAY = 800; // ms between calls
const DB_FLUSH_BATCH = 50;
const STATE_FILE = resolve(__dirname, ".ddg-progress.json");
interface RunState {
processedIds: string[];
totalFound: number;
}
function loadState(): RunState | null {
try {
return JSON.parse(readFileSync(STATE_FILE, "utf-8"));
} catch {
return null;
}
}
function saveState(processedIds: string[], totalFound: number) {
writeFileSync(STATE_FILE, JSON.stringify({ processedIds, totalFound }, null, 2), "utf-8");
}
// ─── DuckDuckGo Search ───────────────────────────────────────────────────────
async function searchImage(query: string): Promise<string | null> {
try {
const results = await imageSearch({ query, safe: true, iterations: 1, retries: 2 });
if (!results || results.length === 0) return null;
// Prefer non-stock images
for (const r of results) {
if (r.image && !/(dreamstime|shutterstock|alamy|istock|123rf)/i.test(r.image)) {
return r.image;
}
}
return results[0].image || results[0].thumbnail || null;
} catch {
// DuckDuckGo may block or timeout; silently skip
return null;
}
}
// ─── Main ────────────────────────────────────────────────────────────────────
async function main() {
console.log("\n🦆 DuckDuckGo Disease Image Filler\n");
const db = getDb();
// Load state
const state = loadState();
const processedSet = new Set(state?.processedIds || []);
const totalFoundPrev = state?.totalFound ?? 0;
// Get all diseases that still need images
const allDiseases = (await db
.select({
id: diseases.id,
name: diseases.name,
scientificName: diseases.scientificName,
severity: diseases.severity,
plantId: diseases.plantId,
})
.from(diseases)
.where(sql`(image_url IS NULL OR image_url = '')`)
.all()) as DiseaseRow[];
console.log(`📋 ${allDiseases.length} diseases need images\n`);
if (allDiseases.length === 0) {
console.log("✅ All diseases already have images!\n");
closeDb();
return;
}
// Sort by severity: critical > high > moderate > low
const severityOrder: Record<string, number> = { critical: 0, high: 1, moderate: 2, low: 3 };
allDiseases.sort((a, b) => (severityOrder[a.severity] ?? 99) - (severityOrder[b.severity] ?? 99));
// Filter out already-processed
const pending = allDiseases.filter((d) => !processedSet.has(d.id));
console.log(
`📊 Remaining: critical=${allDiseases.filter((d) => d.severity === "critical" && !processedSet.has(d.id)).length}, ` +
`high=${allDiseases.filter((d) => d.severity === "high" && !processedSet.has(d.id)).length}, ` +
`moderate=${allDiseases.filter((d) => d.severity === "moderate" && !processedSet.has(d.id)).length}, ` +
`low=${allDiseases.filter((d) => d.severity === "low" && !processedSet.has(d.id)).length}\n`,
);
if (pending.length === 0) {
console.log("✅ All remaining diseases already attempted\n");
closeDb();
return;
}
const raw = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
const processedIds: string[] = state?.processedIds ?? [];
let found = totalFoundPrev;
let updates: Array<{ id: string; url: string }> = [];
for (let i = 0; i < pending.length; i++) {
const d = pending[i];
const sev = d.severity.padEnd(8);
// Build search query — "[disease] on [plant]" phrasing for better specificity
const plantName = d.plantId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
const query1 = `${d.name} on ${plantName} plant disease`;
const query2 = `${d.scientificName || d.name} on ${plantName} disease`;
const query3 = `${d.name} plant disease ${plantName}`;
const query4 = `${d.name} plant`;
const query5 = `${d.name} symptom`;
process.stdout.write(
` [${String(i + 1).padStart(4)}/${pending.length}] [${sev}] ${d.name.substring(0, 42).padEnd(44)} `,
);
// Try queries in order until we get a result
let url: string | null = null;
for (const q of [query1, query2, query3, query4, query5]) {
url = await searchImage(q);
if (url) break;
}
if (url) {
updates.push({ id: d.id, url });
found++;
processedIds.push(d.id);
console.log("✅");
} else {
processedIds.push(d.id);
console.log("❌");
}
// Flush to DB in batches
if (updates.length >= DB_FLUSH_BATCH) {
await raw.batch(
updates.map((u) => ({
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
updates = [];
}
// Save state every 50
if ((i + 1) % 50 === 0) {
saveState(processedIds, found);
}
// Be polite — 1 req/sec
await new Promise((r) => setTimeout(r, POLITE_DELAY));
}
// Final flush
if (updates.length > 0) {
await raw.batch(
updates.map((u) => ({
sql: "UPDATE diseases SET image_url = ?, updated_at = datetime() WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
}
saveState(processedIds, found);
raw.close();
// Final report
const finalList = await db
.select({ id: diseases.id, name: diseases.name, imageUrl: diseases.imageUrl })
.from(diseases)
.all();
const w = finalList.filter((d) => d.imageUrl);
const wo = finalList.filter((d) => !d.imageUrl);
console.log(`\n${"═".repeat(50)}`);
console.log(`🦆 DUCKDUCKGO SEARCH COMPLETE`);
console.log(`${"═".repeat(50)}`);
console.log(` Processed: ${pending.length}`);
console.log(` Found this run: ${found - totalFoundPrev}`);
console.log(` Total with images: ${w.length}/${finalList.length}`);
console.log(` Still missing: ${wo.length}`);
if (wo.length > 0) {
const reportPath = resolve(__dirname, ".ddg-image-review-needed.md");
let report = "# Disease Images - Still Missing (DDG)\n\n";
report += `Generated: ${new Date().toISOString()}\n\n`;
report += `## Summary\n\n`;
report += `- Total: ${finalList.length}\n`;
report += `- With images: ${w.length}\n`;
report += `- Still missing: ${wo.length}\n\n`;
report += `## Missing Diseases\n\n`;
for (const d of wo) {
report += `- ${d.name} (\`${d.id}\`)\n`;
}
writeFileSync(reportPath, report, "utf-8");
console.log(`\n📝 Missing report: ${reportPath}`);
} else {
console.log("\n✅ ALL diseases now have images!");
}
closeDb();
console.log();
}
main().catch((err) => {
console.error("\n❌ Fatal:", err);
process.exit(1);
});

View File

@@ -1,440 +0,0 @@
#!/usr/bin/env node
/**
* fill-disease-images.ts — Three-stage disease image pipeline
*
* For every disease without an imageUrl, tries:
* Stage 1 — Wikipedia search → pageimages
* Stage 2 — Wikimedia Commons search
* Stage 3 — Brave Image Search API (fallback, 1 req/sec, 2000/mo)
*
* Updates both diseases.json (seed) and the Turso DB.
* Flags anything found only via Brave for human review.
*
* Usage: cd apps/web && npx tsx scripts/fill-disease-images.ts
*/
import "dotenv/config";
import { readFileSync, writeFileSync, existsSync } from "fs";
import { resolve } from "path";
import { createClient } from "@libsql/client";
import { closeDb } from "../src/lib/db/index";
// ─── Types & Config ──────────────────────────────────────────────────────────
interface DiseaseSeed {
id: string;
plantId: string;
name: string;
scientificName: string;
commonName?: string;
[key: string]: unknown;
}
interface ImageResult {
url: string;
source: "wikipedia" | "commons" | "brave" | "missing";
quality: "good" | "fallback" | "missing";
}
const DISEASES_JSON = resolve(__dirname, "../src/data/diseases.json");
const RESULTS_FILE = resolve(__dirname, ".image-results.json");
const REPORT_FILE = resolve(__dirname, ".image-review-needed.md");
const WIKI_API = "https://en.wikipedia.org/w/api.php";
const COMMONS_API = "https://commons.wikimedia.org/w/api.php";
const BRAVE_KEY = process.env.BRAVE_API_KEY ?? "";
const BRAVE_DELAY = 1100;
const MAX_BRAVE = 2000;
const UA = "PlantHealthKB/1.0 (plant-disease-id)";
const ORIGIN = "*";
let braveCount = 0;
// ─── Wikipedia Stage ─────────────────────────────────────────────────────────
/**
* Search Wikipedia and get thumbnails in ONE API call using generator=search.
* Returns first thumbnail found, or null.
*/
async function wikiSearchAndThumb(query: string): Promise<string | null> {
const params = new URLSearchParams({
action: "query",
generator: "search",
gsrsearch: query,
gsrlimit: "5",
prop: "pageimages",
pithumbsize: "600",
format: "json",
origin: ORIGIN,
});
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetchWithTimeout(`${WIKI_API}?${params}`, {
headers: { "User-Agent": UA },
});
if (res.status === 429) {
await delay(3000 * 2 ** attempt);
continue;
}
if (!res.ok) return null;
const data = (await res.json()) as {
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
};
const pages = data?.query?.pages;
if (!pages) return null;
for (const [, p] of Object.entries(pages)) {
const src = (p as { thumbnail?: { source: string } })?.thumbnail?.source;
if (src) return src;
}
return null;
} catch {
await delay(2000);
}
}
return null;
}
/**
* Try to find a Wikipedia image for a disease.
* Uses generator=search which combines search + thumbnails in one call.
*/
async function wikiStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
// Try 1: disease name + plant name (most specific)
return wikiSearchAndThumb(`"${d.name}" ${plantName}`);
}
// ─── Commons Stage ───────────────────────────────────────────────────────────
/** Fetch with timeout. Aborts after `ms` milliseconds. */
async function fetchWithTimeout(url: string, opts: RequestInit, ms = 15000): Promise<Response> {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), ms);
try {
const res = await fetch(url, { ...opts, signal: ctrl.signal });
return res;
} finally {
clearTimeout(timer);
}
}
async function commonsSearchAndThumb(query: string): Promise<string | null> {
const params = new URLSearchParams({
action: "query",
list: "search",
srsearch: query,
srnamespace: "6",
srlimit: "5",
format: "json",
origin: ORIGIN,
});
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetchWithTimeout(`${COMMONS_API}?${params}`, {
headers: { "User-Agent": UA },
});
if (res.status === 429) {
await delay(3000 * 2 ** attempt);
continue;
}
if (!res.ok) return null;
const data = (await res.json()) as {
query?: { search?: Array<{ pageid: number; title: string }> };
};
const hits = data?.query?.search ?? [];
if (hits.length === 0) return null;
// Batch-fetch imageinfo for all found page IDs
const pageids = hits.map((h) => h.pageid).join("|");
const imgParams = new URLSearchParams({
action: "query",
pageids,
prop: "imageinfo",
iiprop: "url",
iiurlwidth: "600",
format: "json",
origin: ORIGIN,
});
const imgRes = await fetchWithTimeout(`${COMMONS_API}?${imgParams}`, {
headers: { "User-Agent": UA },
});
if (!imgRes.ok) return null;
const imgData = (await imgRes.json()) as {
query?: { pages?: Record<string, unknown> };
};
const imgPages = imgData?.query?.pages;
if (!imgPages) return null;
for (const [, pg] of Object.entries(imgPages)) {
const p = pg as Record<string, unknown>;
const info = (p.imageinfo as Array<Record<string, string>> | undefined)?.[0];
if (info?.thumburl) return info.thumburl as string;
if (info?.url) return info.url as string;
}
return null;
} catch {
await delay(2000);
}
}
return null;
}
async function commonsStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
let q: string;
if (d.scientificName && !d.scientificName.includes("spp.") && !d.scientificName.includes("/")) {
q = `${d.scientificName} ${plantName}`;
} else {
q = `${d.name} ${plantName} disease`;
}
const url = await commonsSearchAndThumb(q);
return url ?? null;
}
// ─── Brave Stage ─────────────────────────────────────────────────────────────
async function braveStage(d: DiseaseSeed, plantName: string): Promise<string | null> {
if (!BRAVE_KEY || braveCount >= MAX_BRAVE) return null;
const url = new URL("https://api.search.brave.com/res/v1/images/search");
url.searchParams.set("q", `${d.name} ${plantName} plant disease symptom`);
url.searchParams.set("count", "5");
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetchWithTimeout(url.toString(), {
headers: { "X-Subscription-Token": BRAVE_KEY, Accept: "application/json" },
});
if (res.status === 429) {
await delay(5000 * 2 ** attempt);
continue;
}
if (!res.ok) return null;
braveCount++;
const data = (await res.json()) as {
results?: Array<{ url: string; thumbnail?: { src?: string } }>;
};
const results = data?.results ?? [];
if (results.length === 0) return null;
// Prefer non-stock thumbnails
for (const r of results) {
const src = r.thumbnail?.src ?? r.url;
if (src && !src.includes("dreamstime") && !src.includes("shutterstock") &&
!src.includes("alamy") && !src.includes("istock") && !src.includes("123rf")) {
return src;
}
}
return results[0].thumbnail?.src ?? results[0].url;
} catch {
await delay(2000);
}
}
return null;
}
// ─── Helpers ─────────────────────────────────────────────────────────────────
function delay(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
function loadDiseases(): DiseaseSeed[] {
return JSON.parse(readFileSync(DISEASES_JSON, "utf-8")) as DiseaseSeed[];
}
function getPlantName(diseases: DiseaseSeed[], diseaseId: string): string {
const plant = diseases.find((p) => p.id === diseaseId);
return plant?.commonName ?? plant?.name ?? diseaseId;
}
// ─── Main ────────────────────────────────────────────────────────────────────
async function main() {
console.log("\n🔍 Plant Disease Image Filler\n");
const diseases = loadDiseases();
console.log(`📋 ${diseases.length} diseases loaded\n`);
// Load existing results
let results: Record<string, ImageResult> = {};
if (existsSync(RESULTS_FILE)) {
try { results = JSON.parse(readFileSync(RESULTS_FILE, "utf-8")); } catch { /* fresh */ }
}
const pending = diseases.filter((d) => {
if ((d.imageUrl as string)?.length) return false;
return !results[d.id];
});
if (pending.length === 0) {
console.log("✅ All done\n");
await applyResults(diseases, results);
return;
}
console.log(`${pending.length} need images\n`);
// ── Stage 1: Wikipedia ──────────────────────────────────────────────
const s1 = pending.filter((d) => !results[d.id]);
let s1ok = 0;
console.log("─── Wikipedia ───\n");
for (let i = 0; i < s1.length; i++) {
const d = s1[i];
const plantName = getPlantName(diseases, d.plantId);
const url = await wikiStage(d, plantName);
if (url) {
results[d.id] = { url, source: "wikipedia", quality: "good" };
s1ok++;
}
const pct = ((i + 1) / s1.length * 100).toFixed(0);
process.stdout.write(` [${pct}% ${i + 1}/${s1.length}] ${d.name.substring(0, 40).padEnd(42)} ${url ? "✅" : "⏭️"}\n`);
if ((i + 1) % 25 === 0) writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
}
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
console.log(`\n → ${s1ok}/${s1.length} found\n`);
// ── Stage 2: Commons ─────────────────────────────────────────────────
const s2 = pending.filter((d) => !results[d.id]);
let s2ok = 0;
if (s2.length > 0) {
console.log("─── Wikimedia Commons ───\n");
for (let i = 0; i < s2.length; i++) {
const d = s2[i];
const plantName = getPlantName(diseases, d.plantId);
let url: string | null = null;
try {
const result = await Promise.race([
commonsStage(d, plantName),
new Promise<null>((_, reject) => setTimeout(() => reject(new Error("timeout")), 25000)),
]);
url = result;
} catch { /* timeout */ }
if (url) {
results[d.id] = { url, source: "commons", quality: "good" };
s2ok++;
}
const pct = ((i + 1) / s2.length * 100).toFixed(0);
process.stdout.write(` [${pct}% ${i + 1}/${s2.length}] ${d.name.substring(0, 40).padEnd(42)} ${url ? "✅" : "⏭️"}\n`);
if ((i + 1) % 10 === 0) writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
}
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
console.log(`\n → ${s2ok}/${s2.length} found\n`);
}
// ── Stage 3: Brave ───────────────────────────────────────────────────
const s3 = pending.filter((d) => !results[d.id]);
let s3ok = 0;
if (s3.length > 0 && BRAVE_KEY) {
console.log("─── Brave Image Search ───\n");
for (const d of s3) {
if (braveCount >= MAX_BRAVE) {
results[d.id] = { url: "", source: "missing", quality: "missing" };
continue;
}
const plantName = getPlantName(diseases, d.plantId);
const url = await braveStage(d, plantName);
if (url) {
results[d.id] = { url, source: "brave", quality: "fallback" };
s3ok++;
process.stdout.write(`${d.name}\n`);
} else {
results[d.id] = { url: "", source: "missing", quality: "missing" };
process.stdout.write(`${d.name}\n`);
}
await delay(BRAVE_DELAY);
}
writeFileSync(RESULTS_FILE, JSON.stringify(results, null, 2));
console.log(`\n → ${s3ok}/${s3.length} found via Brave\n`);
} else if (s3.length > 0) {
console.log("─── Brave Image Search ─── → skipped (no key)\n");
for (const d of s3) results[d.id] = { url: "", source: "missing", quality: "missing" };
}
// ── Apply ───────────────────────────────────────────────────────────
await applyResults(diseases, results);
// ── Report ──────────────────────────────────────────────────────────
const good = Object.values(results).filter((r) => r.quality === "good").length;
const fallback = Object.values(results).filter((r) => r.quality === "fallback").length;
const missing = Object.values(results).filter((r) => r.quality === "missing").length;
let report = `# Disease Images — Human Review Needed\n\n`;
report += `Generated: ${new Date().toISOString()}\n\n`;
for (const [label, ids, type] of [
["Fallback (Brave)", Object.entries(results).filter(([, r]) => r.quality === "fallback").map(([id]) => id), "fallback"],
["Missing", Object.entries(results).filter(([, r]) => r.quality === "missing").map(([id]) => id), "missing"],
] as const) {
if (ids.length === 0) continue;
report += `## ${type === "fallback" ? "⚠️" : "🚫"} ${label}\n\n`;
for (const id of ids) {
const d = diseases.find((x) => x.id === id);
const r = results[id];
report += `- **${d?.name ?? id}** (${d?.scientificName ?? ""}) on \`${d?.plantId ?? ""}\``;
if (r?.url) report += `\n ${r.url}`;
report += `\n\n`;
}
}
if (good === diseases.length) report += `## ✅ All images found!\n`;
writeFileSync(REPORT_FILE, report, "utf-8");
console.log(`📝 Review report: ${REPORT_FILE}`);
console.log(`\n${"═".repeat(50)}`);
console.log(`📊 Total: ${diseases.length} Good: ${good} Fallback: ${fallback} Missing: ${missing}`);
console.log(` Brave calls: ${braveCount}`);
console.log(`${"═".repeat(50)}\n`);
closeDb();
}
// ─── Apply results to JSON + DB ──────────────────────────────────────────────
async function applyResults(diseases: DiseaseSeed[], results: Record<string, ImageResult>) {
const urlMap = new Map(
Object.entries(results).filter(([id, r]) => r.url.length > 0 && diseases.some((d) => d.id === id)),
);
if (urlMap.size === 0) return console.log("⏭️ No images to apply");
// JSON
let n = 0;
const updated = diseases.map((d) => {
const img = urlMap.get(d.id);
if (img) { n++; return { ...d, imageUrl: img.url, imageQuality: img.quality }; }
return d;
});
writeFileSync(DISEASES_JSON, JSON.stringify(updated, null, 2) + "\n");
console.log(`✅ diseases.json: ${n} images`);
// DB
try {
const dbUrl = process.env.DATABASE_URL;
const dbToken = process.env.DATABASE_TOKEN;
if (!dbUrl || !dbToken) return console.log(" ⏭️ DB: no DATABASE_URL/TOKEN");
const raw = createClient({ url: dbUrl, authToken: dbToken });
const entries = Array.from(urlMap.entries());
for (let i = 0; i < entries.length; i += 50) {
await raw.batch(
entries.slice(i, i + 50).map(([id, img]) => ({
sql: "UPDATE diseases SET image_url = ? WHERE id = ?",
args: [img.url, id],
})),
"write",
);
}
raw.close();
console.log(`✅ Turso DB: ${entries.length} rows`);
} catch (err) {
console.log(` ⚠️ DB: ${err instanceof Error ? err.message : err}`);
}
}
main().catch((err) => { console.error("\n❌", err); process.exit(1); });

View File

@@ -1,301 +0,0 @@
#!/usr/bin/env node
/**
* fill-plant-images-v2.ts — Batch Wikipedia image fetch for remaining plants.
*
* Phase 1: Query 50 scientific names at a time via pageimages.
* Phase 2: Query 50 common names at a time.
* Phase 3: Search individually for stragglers.
*
* Usage: cd apps/web && npx tsx scripts/fill-plant-images-v2.ts
*/
import { readFileSync, writeFileSync } from "fs";
import { resolve } from "path";
// Load env
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) {
process.env[key] = val;
}
}
}
}
} catch (e) {}
import { getDb, closeDb } from "../src/lib/db/index";
import { plants } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
import { sql } from "drizzle-orm";
const API = "https://en.wikipedia.org/w/api.php";
const UA = "PlantHealthKB/1.0";
const BATCH = 50;
interface PlantRow {
id: string;
commonName: string;
scientificName: string;
}
function clean(s: string): string {
return s
.replace(/[xX]/g, "x")
.replace(/\s*spp\.?\s*/gi, "")
.replace(/[.\u00d7']/g, "")
.trim();
}
async function fetchThumbs(titles: string[]): Promise<Map<string, string>> {
if (titles.length === 0) {
return new Map();
}
const p = new URLSearchParams({
action: "query",
titles: titles.join("|"),
prop: "pageimages",
pithumbsize: "400",
redirects: "1",
format: "json",
});
for (let a = 0; a < 3; a++) {
try {
const r = await fetch(API + "?" + p.toString(), {
headers: { "User-Agent": UA },
});
if (r.status === 429) {
await new Promise((rr) => setTimeout(rr, 5000 * Math.pow(2, a)));
continue;
}
if (!r.ok) {
return new Map();
}
const d = (await r.json()) as any;
const pages = d?.query?.pages;
if (!pages) {
return new Map();
}
const m = new Map<string, string>();
for (const [, pg] of Object.entries(pages)) {
const p2 = pg as any;
if (!p2.missing && p2.thumbnail?.source) {
m.set(p2.title.toLowerCase(), p2.thumbnail.source);
}
}
return m;
} catch (e) {
await new Promise((rr) => setTimeout(rr, 2000));
}
}
return new Map();
}
async function searchOne(query: string): Promise<string | null> {
const p = new URLSearchParams({
action: "query",
generator: "search",
gsrsearch: query,
gsrlimit: "3",
prop: "pageimages",
pithumbsize: "400",
format: "json",
});
for (let a = 0; a < 3; a++) {
try {
const r = await fetch(API + "?" + p.toString(), {
headers: { "User-Agent": UA },
});
if (r.status === 429) {
await new Promise((rr) => setTimeout(rr, 5000 * Math.pow(2, a)));
continue;
}
if (!r.ok) {
return null;
}
const d = (await r.json()) as any;
const pages = d?.query?.pages;
if (!pages) {
return null;
}
for (const [, pg] of Object.entries(pages)) {
const p2 = pg as any;
if (p2.thumbnail?.source) {
return p2.thumbnail.source;
}
}
return null;
} catch (e) {
await new Promise((rr) => setTimeout(rr, 2000));
}
}
return null;
}
async function batchPhase(
plants: PlantRow[],
titleFn: (p: PlantRow) => string,
label: string,
dbClient: any,
): Promise<PlantRow[]> {
const remaining: PlantRow[] = [];
const updates: Array<{ id: string; url: string }> = [];
for (let i = 0; i < plants.length; i += BATCH) {
const chunk = plants.slice(i, i + BATCH);
const titles = chunk.map(titleFn).filter((t) => t.length > 2);
console.log(
" [" +
label +
"] " +
(i + 1) +
"-" +
Math.min(i + BATCH, plants.length) +
"/" +
plants.length +
" ",
);
const imageMap = await fetchThumbs(titles);
let n = 0;
for (const pl of chunk) {
const t = titleFn(pl).toLowerCase();
const img = imageMap.get(t);
if (img) {
updates.push({ id: pl.id, url: img });
n++;
} else {
remaining.push(pl);
}
}
console.log(" found: " + n);
if (updates.length >= 100) {
await dbClient.batch(
updates.map((u) => ({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
updates.length = 0;
}
await new Promise((r) => setTimeout(r, 1500));
}
if (updates.length > 0) {
await dbClient.batch(
updates.map((u) => ({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
}
return remaining;
}
async function main() {
console.log("\nPlant Image Filler v2\n");
const db = getDb();
const allPlants = (await db
.select({
id: plants.id,
commonName: plants.commonName,
scientificName: plants.scientificName,
})
.from(plants)
.where(sql`(image_url IS NULL OR image_url = '')`)
.all()) as PlantRow[];
console.log("Plants needing images: " + allPlants.length + "\n");
if (allPlants.length === 0) {
console.log("All plants have images!\n");
closeDb();
return;
}
const raw = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
let found = 0;
// Phase 1: Scientific name
console.log("--- Phase 1: Scientific names ---\n");
let remaining = await batchPhase(allPlants, (p) => clean(p.scientificName), "sci", raw);
// Phase 2: Common name
if (remaining.length > 0) {
console.log("\n--- Phase 2: Common names (" + remaining.length + ") ---\n");
remaining = await batchPhase(remaining, (p) => p.commonName, "common", raw);
}
// Phase 3: Search
if (remaining.length > 0) {
console.log("\n--- Phase 3: Search (" + remaining.length + ") ---\n");
for (let i = 0; i < remaining.length; i++) {
const pl = remaining[i];
const q = clean(pl.scientificName) + " " + pl.commonName;
console.log(" [" + (i + 1) + "/" + remaining.length + "] " + pl.commonName);
const img = await searchOne(q);
if (img) {
await raw.execute({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [img, pl.id],
});
found++;
console.log(" OK");
} else {
console.log(" MISS");
}
await new Promise((r) => setTimeout(r, 500));
}
}
raw.close();
// Report
const finalList = await db
.select({
id: plants.id,
commonName: plants.commonName,
imageUrl: plants.imageUrl,
})
.from(plants)
.all();
const w = finalList.filter((p) => p.imageUrl);
const wo = finalList.filter((p) => !p.imageUrl);
console.log("\n" + "=".repeat(50));
console.log("FINAL: " + finalList.length + " plants");
console.log(" With images: " + w.length);
console.log(" Missing: " + wo.length);
if (wo.length > 0) {
const rp = resolve(__dirname, ".plant-image-review-needed.md");
let report = "# Plant Images - Still Missing\n\n";
report += "Generated: " + new Date().toISOString() + "\n\n";
report += "## Missing (" + wo.length + ")\n\n";
for (const p of wo) {
report += "- " + p.commonName + " (" + p.id + ")\n";
}
writeFileSync(rp, report, "utf-8");
console.log("Report: " + rp);
} else {
console.log("\nALL PLANTS HAVE IMAGES!");
}
closeDb();
}
main().catch((err: any) => {
console.error("Error:", err);
process.exit(1);
});

View File

@@ -1,308 +0,0 @@
#!/usr/bin/env node
/**
* fill-plant-images.ts — Fetch plant images from Wikipedia for plants missing them.
*
* Uses the Wikipedia API to search for the plant's scientific name
* and grab the page thumbnail.
*
* Usage: cd apps/web && npx tsx scripts/fill-plant-images.ts
*/
import { readFileSync, writeFileSync } from "fs";
import { resolve } from "path";
// Load env
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) process.env[key] = val;
}
}
}
} catch {}
import { getDb, closeDb } from "../src/lib/db/index";
import { plants } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
import { sql } from "drizzle-orm";
const WIKI_API = "https://en.wikipedia.org/w/api.php";
const UA = "PlantHealthKB/1.0 (plant-images)";
const DELAY_MS = 500;
const BATCH_SIZE = 50;
/** Direct page lookup by title — more reliable for known scientific names. */
async function directPageLookup(title: string): Promise<string | null> {
const params = new URLSearchParams({
action: "query",
titles: title,
prop: "pageimages",
pithumbsize: "400",
format: "json",
origin: "*",
});
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetch(`${WIKI_API}?${params}`, {
headers: { "User-Agent": UA },
});
if (res.status === 429) {
await new Promise((r) => setTimeout(r, 3000 * 2 ** attempt));
continue;
}
if (!res.ok) return null;
const data = (await res.json()) as {
query?: { pages?: Record<string, { thumbnail?: { source: string }; missing?: boolean }> };
};
const pages = data?.query?.pages;
if (!pages) return null;
for (const [, p] of Object.entries(pages)) {
if (!p.missing && p.thumbnail?.source) return p.thumbnail.source;
}
return null;
} catch {
await new Promise((r) => setTimeout(r, 2000));
}
}
return null;
}
async function main() {
console.log("\n🌿 Fetching plant images from Wikipedia\n");
const db = getDb();
const allPlants = await db
.select({ id: plants.id, commonName: plants.commonName, scientificName: plants.scientificName })
.from(plants)
.where(sql`(image_url IS NULL OR image_url = '')`)
.all();
console.log(`📋 ${allPlants.length} plants need images\n`);
if (allPlants.length === 0) {
console.log("✅ All plants already have images!\n");
closeDb();
return;
}
const rawClient = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
let found = 0;
const updates: { id: string; url: string }[] = [];
// Phase 1: Try direct page lookup by scientific name (most accurate)
console.log("─── Phase 1: Direct page lookup ───\n");
for (let i = 0; i < allPlants.length; i++) {
const plant = allPlants[i];
const sciName = plant.scientificName
.replace(/[×'"]/g, "")
.replace(/\s*spp\.?\s*/i, "")
.trim();
process.stdout.write(
` [${String(i + 1).padStart(3)}/${allPlants.length}] ${plant.commonName.padEnd(30)} `,
);
let url: string | null = null;
// Try scientific name first
if (sciName && sciName !== "Unknown" && sciName !== "Various") {
url = await directPageLookup(sciName);
}
// Try common name if scientific name didn't work
if (!url) {
url = await directPageLookup(plant.commonName);
}
// Try genus name
if (!url && sciName) {
const genus = sciName.split(/\s+/)[0];
if (genus && genus.length > 3) {
url = await directPageLookup(genus);
}
}
if (url) {
updates.push({ id: plant.id, url });
found++;
process.stdout.write("✅\n");
} else {
process.stdout.write("⏭️\n");
}
// Flush to DB in batches
if (updates.length >= BATCH_SIZE) {
await rawClient.batch(
updates.map((u) => ({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
updates.length = 0;
}
await new Promise((r) => setTimeout(r, DELAY_MS));
}
// Flush remaining
if (updates.length > 0) {
await rawClient.batch(
updates.map((u) => ({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.url, u.id],
})),
"write",
);
console.log(` → Flushed ${updates.length} to DB`);
updates.length = 0;
}
console.log(`\n✅ Phase 1 done: ${found}/${allPlants.length} plants got images\n`);
// Phase 2: Try remaining via search API
const stillMissing = await db
.select({ id: plants.id, commonName: plants.commonName, scientificName: plants.scientificName })
.from(plants)
.where(sql`(image_url IS NULL OR image_url = '')`)
.all();
if (stillMissing.length > 0) {
console.log(`─── Phase 2: Search API for ${stillMissing.length} remaining ───\n`);
for (let i = 0; i < stillMissing.length; i++) {
const plant = stillMissing[i];
const sciName = plant.scientificName.replace(/[×'"]/g, "").trim();
process.stdout.write(
` [${String(i + 1).padStart(3)}/${stillMissing.length}] ${plant.commonName.padEnd(30)} `,
);
// Search with scientific name
const searchTerm = `${sciName} ${plant.commonName}`;
const params = new URLSearchParams({
action: "query",
list: "search",
srsearch: searchTerm,
srlimit: "3",
format: "json",
origin: "*",
});
let url: string | null = null;
for (let attempt = 0; attempt < 3; attempt++) {
try {
const res = await fetch(`${WIKI_API}?${params}`, {
headers: { "User-Agent": UA },
});
if (res.status === 429) {
await new Promise((r) => setTimeout(r, 3000 * 2 ** attempt));
continue;
}
if (!res.ok) break;
const data = (await res.json()) as {
query?: { search?: Array<{ title: string; pageid: number }> };
};
const hits = data?.query?.search ?? [];
if (hits.length === 0) break;
// Get thumbnail for first result
for (const hit of hits) {
const pageParams = new URLSearchParams({
action: "query",
pageids: String(hit.pageid),
prop: "pageimages",
pithumbsize: "400",
format: "json",
origin: "*",
});
const pageRes = await fetch(`${WIKI_API}?${pageParams}`, {
headers: { "User-Agent": UA },
});
if (!pageRes.ok) continue;
const pageData = (await pageRes.json()) as {
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
};
const pages = pageData?.query?.pages;
if (!pages) continue;
for (const [, p] of Object.entries(pages)) {
if (p.thumbnail?.source) {
url = p.thumbnail.source;
break;
}
}
if (url) break;
}
break;
} catch {
await new Promise((r) => setTimeout(r, 2000));
}
}
if (url) {
await rawClient.execute({
sql: "UPDATE plants SET image_url = ?, updated_at = datetime('now') WHERE id = ?",
args: [url, plant.id],
});
found++;
process.stdout.write("✅\n");
} else {
process.stdout.write("❌\n");
}
await new Promise((r) => setTimeout(r, DELAY_MS));
}
}
// Final count
const final = await db
.select({ id: plants.id, commonName: plants.commonName, imageUrl: plants.imageUrl })
.from(plants)
.all();
const withImg = final.filter((p) => p.imageUrl);
const withoutImg = final.filter((p) => !p.imageUrl);
console.log(`\n${"═".repeat(50)}`);
console.log(`📊 FINAL: ${final.length} plants`);
console.log(` With images: ${withImg.length}`);
console.log(` Missing images: ${withoutImg.length}`);
if (withoutImg.length > 0) {
console.log(`\n📝 Plants still needing images:`);
withoutImg.forEach((p) => console.log(`${p.id}: ${p.commonName}`));
// Save to file for reference
const reportPath = resolve(__dirname, ".plant-image-review-needed.md");
let report = "# Plant Images — Still Missing\n\n";
report += `Generated: ${new Date().toISOString()}\n\n`;
report += `## 🚫 Plants without images (${withoutImg.length})\n\n`;
for (const p of withoutImg) {
report += `- **${p.commonName}** (\`${p.id}\`)\n`;
}
writeFileSync(reportPath, report, "utf-8");
console.log(` 📝 Review report: ${reportPath}`);
} else {
console.log("\n✅ All plants now have images!");
}
rawClient.close();
closeDb();
}
main().catch((err) => {
console.error("\n❌", err);
process.exit(1);
});

View File

@@ -59,7 +59,7 @@ const TARGET_HEALTHY = 400;
* Each disease is I/O-bound (HTTP requests), so high concurrency is safe.
* The global DDG rate limiter prevents us from overwhelming DuckDuckGo.
*/
const DISEASE_CONCURRENCY = 20;
const DISEASE_CONCURRENCY = 50;
/**
* Max DDG requests per second (shared across all concurrent diseases).
@@ -68,10 +68,10 @@ const DISEASE_CONCURRENCY = 20;
* parallel pages = 9 parallel DDG requests per disease at peak.
* The rate limiter serializes this so we don't get banned.
*/
const DDG_RATE_LIMIT_RPS = 2;
const DDG_RATE_LIMIT_RPS = 6;
/** Max concurrent image downloads per disease */
const CONCURRENT_DOWNLOADS = 2;
const CONCURRENT_DOWNLOADS = 50;
/** Minimum image size in bytes to accept */
const MIN_IMAGE_SIZE = 10_000; // 10KB
@@ -93,9 +93,10 @@ const HEALTHY_CLASS = "healthy";
const SEEN_CACHE_FLUSH_INTERVAL = 20;
/** Max DDG pages to fetch per query.
* Each page returns ~100 image results, so 3 pages × 3 queries = ~900 raw URLs
* before dedup — more than enough to find 200 unique, valid images. */
const MAX_DDG_PAGES = 3;
* Each page returns ~50 image results, so 5 pages × 3 queries = ~750 raw URLs
* before dedup. Pages beyond 3 yield progressively more novel URLs since
* the seen-URLs cache accumulates across runs. */
const MAX_DDG_PAGES = 5;
/** Healthy source queries limit */
const MAX_HEALTHY_QUERIES = 20;
@@ -281,8 +282,33 @@ async function searchImagesDuckDuckGo(
await sleep(5_000);
return searchImagesDuckDuckGo(query, vqd, page);
}
if (res.status === 403) return [];
// Don't throw for transient errors — just return empty
if (res.status === 403) {
// VQD token expired or DDG changed format — get a fresh token and retry
console.warn(` ⚠ DDG 403 on page ${page} — refreshing VQD token...`);
try {
const freshVqd = await getVqdToken(query);
await ddgLimiter.acquire();
const retryUrl = url.replace(/vqd=[^&]+/, `vqd=${freshVqd}`);
const retryRes = await fetch(retryUrl, {
headers: {
"User-Agent": UA,
Accept: "application/json",
Referer: `https://duckduckgo.com/?q=${encodeURIComponent(
query,
)}&t=h_&iax=images&ia=images`,
},
signal: AbortSignal.timeout(15_000),
});
if (retryRes.ok) {
const freshData = (await retryRes.json()) as { results: DuckDuckGoImageResult[] };
return freshData.results ?? [];
}
} catch {
// Fresh token also failed — give up on this page
}
return [];
}
console.warn(` ⚠ DDG returned ${res.status} on page ${page}`);
return [];
}
@@ -510,17 +536,19 @@ async function downloadBatch(
): Promise<{ downloaded: number; failed: number; lastIndex: number }> {
let downloaded = 0;
let failed = 0;
let index = startIndex;
for (let i = 0; i < urls.length; i += CONCURRENT_DOWNLOADS) {
const chunk = urls.slice(i, i + CONCURRENT_DOWNLOADS);
const results = await Promise.all(
chunk.map(async (url) => {
const paddedIndex = String(index).padStart(4, "0");
chunk.map(async (url, chunkIdx) => {
// Compute index deterministically BEFORE the async download starts,
// so all parallel callbacks get a unique index (no race condition).
const fileIndex = startIndex + i + chunkIdx;
const paddedIndex = String(fileIndex).padStart(4, "0");
const destPath = resolve(classDir, `img_${paddedIndex}.jpg`);
const success = await downloadImage(url, destPath);
return { success, index: index++ };
return { success, index: fileIndex };
}),
);
@@ -530,7 +558,7 @@ async function downloadBatch(
}
}
return { downloaded, failed, lastIndex: index };
return { downloaded, failed, lastIndex: startIndex + urls.length };
}
// ─── Query Building ─────────────────────────────────────────────────────────
@@ -592,7 +620,10 @@ async function fillClass(
indexOffset: number,
): Promise<void> => {
const result = await collector();
if (result.urls.length === 0) return;
if (result.urls.length === 0) {
console.log(` ${label}: 0 URLs found`);
return;
}
console.log(` ${label}: ${result.urls.length} new URLs`);
// Each source writes to its own non-overlapping range
@@ -788,7 +819,13 @@ async function main() {
const classDir = resolve(DATASET_DIR, d.id);
const queries = buildSearchQueries(d.name, d.plantId);
const seen = new Set<string>(seenUrlsCache[d.id] ?? []);
// CRITICAL: Start with a FRESH empty set for within-run search dedup.
// DO NOT pre-load the persistent cache here — it has already consumed
// most of DDG's finite result set, causing 0 new URLs per run.
// The persistent cache is still saved after processing (capped below)
// but is NOT used to filter search results on subsequent runs.
const seen = new Set<string>();
console.log(
` [${d.id}] have ${d.have}, need ${d.needed} more` + ` (${d.name} / ${d.plantId})`,
@@ -796,8 +833,11 @@ async function main() {
const gained = await fillClass(d.id, queries, d.needed, classDir, seen);
// Update seen-URLs cache for this disease
seenUrlsCache[d.id] = Array.from(seen);
// Update seen-URLs cache for this disease — merge with existing
// and cap at 500 per disease to prevent unbounded cache growth.
const existing = seenUrlsCache[d.id] ?? [];
const merged = [...new Set([...existing, ...Array.from(seen)])];
seenUrlsCache[d.id] = merged.slice(-500);
return gained;
})(),
),

View File

@@ -1,212 +0,0 @@
#!/usr/bin/env node
/**
* fix-classifications.ts — Fix misclassified diseases in the DB.
*
* Fixes:
* 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.)
* that are incorrectly tagged as "fungal"
* 2. Other suspicious patterns
*
* Usage: cd apps/web && npx tsx scripts/fix-classifications.ts
*/
import { readFileSync } from "fs";
import { resolve } from "path";
// Manually load .env.development
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) process.env[key] = val;
}
}
}
} catch {}
import { getDb, closeDb } from "../src/lib/db/index";
import { diseases } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
type AgentType = "fungal" | "bacterial" | "viral" | "environmental";
interface FixRule {
test: (name: string) => boolean;
correctAgent: AgentType;
reason: string;
}
const FIX_RULES: FixRule[] = [
// Diseases explicitly named as "virus" or "viral"
{
test: (name) => /\b(virus|viral|viroid)\b/i.test(name),
correctAgent: "viral",
reason: "Name explicitly indicates viral disease",
},
// Potexvirus, carlavirus, etc.
{
test: (name) =>
/\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test(
name,
),
correctAgent: "viral",
reason: "Recognized virus genus in name",
},
// "Mosaic" diseases (typically viral)
{
test: (name) => /\bmosaic\b/i.test(name),
correctAgent: "viral",
reason: "Mosaic symptoms are typically caused by viruses",
},
// "Mottle" diseases (typically viral)
{
test: (name) => /\bmottle\b/i.test(name),
correctAgent: "viral",
reason: "Mottle symptoms are typically caused by viruses",
},
// "Ringspot" diseases (typically viral)
{
test: (name) => /\bringspot\b/i.test(name),
correctAgent: "viral",
reason: "Ringspot symptoms are typically caused by viruses",
},
// "Leaf curl" (many are viral)
{
test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name),
correctAgent: "viral",
reason: "Leaf curl/roll diseases are often viral",
},
// "Rosette" (often viral or phytoplasma)
{
test: (name) => /\brosette\b/i.test(name),
correctAgent: "viral",
reason: "Rosette diseases are typically viral or phytoplasma",
},
// "Yellows" (often phytoplasma/viral)
{
test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name),
correctAgent: "viral",
reason: "Yellows diseases are typically phytoplasma or viral",
},
// "Stunt" / "Dwarf" (often viral)
{
test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name),
correctAgent: "viral",
reason: "Stunting/dwarfing diseases are often viral",
},
// Explicit bacterial in name
{
test: (name) =>
/\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test(
name,
),
correctAgent: "bacterial",
reason: "Name indicates bacterial disease",
},
// Environmental/abiotic indicators
{
test: (name) =>
/\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test(
name,
),
correctAgent: "environmental",
reason: "Name indicates abiotic/environmental cause",
},
];
async function main() {
console.log("🔍 Fixing disease classifications\n");
const db = getDb();
const allDiseases = await db
.select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType })
.from(diseases)
.all();
console.log(`📋 ${allDiseases.length} total diseases\n`);
const rawClient = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = [];
for (const d of allDiseases) {
for (const rule of FIX_RULES) {
if (rule.test(d.name)) {
if (d.causalAgentType !== rule.correctAgent) {
updates.push({
id: d.id,
newAgent: rule.correctAgent,
rule,
oldAgent: d.causalAgentType,
});
}
break; // First matching rule wins
}
}
}
console.log(`Found ${updates.length} diseases needing reclassification:\n`);
// Group by correction type
const grouped: Record<string, { from: string; to: string; items: string[] }> = {};
for (const u of updates) {
const key = `${u.oldAgent}${u.newAgent}`;
if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] };
grouped[key].items.push(` ${u.id}`);
}
for (const [, g] of Object.entries(grouped)) {
console.log(`${g.from}${g.to} (${g.items.length} diseases):`);
g.items.slice(0, 10).forEach((l) => console.log(l));
if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`);
console.log();
}
// Apply updates
if (updates.length === 0) {
console.log("✅ No corrections needed");
} else {
console.log(`Applying ${updates.length} corrections...\n`);
// Batch update in groups of 50
for (let i = 0; i < updates.length; i += 50) {
const batch = updates.slice(i, i + 50);
await rawClient.batch(
batch.map((u) => ({
sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.newAgent, u.id],
})),
"write",
);
process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`);
}
console.log(`\n✅ ${updates.length} diseases reclassified`);
}
// Print summary stats
const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all();
const counts: Record<string, number> = {};
after.forEach((d) => {
counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1;
});
console.log("\n📊 Updated distribution:");
for (const [type, count] of Object.entries(counts).sort()) {
console.log(` ${type}: ${count}`);
}
rawClient.close();
closeDb();
}
main().catch((err) => {
console.error("\n❌", err);
process.exit(1);
});

View File

@@ -1,385 +0,0 @@
/**
* generate-flagged-report.ts
*
* Reads all flagged content from the database and generates a pretty
* markdown report organized by content type. The report includes:
* - Summary table with counts per content type
* - Plant images flagged for review
* - Disease images flagged for review
* - Disease symptoms flagged for review
* - Disease causes flagged for review
* - Disease treatment steps flagged for review
* - Disease prevention tips flagged for review
*
* Usage:
* npx tsx scripts/generate-flagged-report.ts [--min-flags N] [--output path/to/report.md]
*
* Options:
* --min-flags Minimum flag count to include (default: 1)
* --output Output path (default: scripts/.flagged-content-review-needed.md)
*/
import dotenv from "dotenv";
import path from "node:path";
// Load DB config from .env.development (or .env.production if NODE_ENV=production)
const envFile =
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
dotenv.config({ path: path.resolve(__dirname, envFile) });
import { createClient } from "@libsql/client";
import fs from "node:fs";
// ─── Config ─────────────────────────────────────────────────────────────────
const MIN_FLAGS = parseInt(
process.argv.find((a) => a.startsWith("--min-flags="))?.split("=")[1] ?? "1",
10,
);
const OUTPUT_PATH =
process.argv.find((a) => a.startsWith("--output="))?.split("=")[1] ??
path.join(__dirname, ".flagged-content-review-needed.md");
// ─── DB Connection ──────────────────────────────────────────────────────────
const db = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
// ─── Types ──────────────────────────────────────────────────────────────────
interface FlaggedRow {
id: string;
content_type: string;
content_id: string;
field_name: string;
notes: string;
flag_count: number;
created_at: string;
updated_at: string;
}
interface PlantRow {
id: string;
common_name: string;
scientific_name: string;
family: string;
image_url: string;
}
interface DiseaseRow {
id: string;
name: string;
scientific_name: string;
plant_id: string;
image_url: string;
}
// ─── Helpers ────────────────────────────────────────────────────────────────
const CONTENT_TYPE_LABELS: Record<string, { emoji: string; title: string; description: string }> = {
plant_image: {
emoji: "🪴",
title: "Plant Images Flagged for Review",
description: "Plant images that users have flagged as potentially incorrect or low quality.",
},
disease_image: {
emoji: "📸",
title: "Disease Images Flagged for Review",
description:
"Disease symptom images that users have flagged as potentially incorrect or misleading.",
},
disease_description: {
emoji: "📝",
title: "Disease Descriptions Flagged for Review",
description: "Disease descriptions that users have flagged as potentially inaccurate.",
},
disease_symptoms: {
emoji: "⚠️",
title: "Disease Symptoms Flagged for Review",
description: "Symptom descriptions that users have flagged as potentially inaccurate.",
},
disease_causes: {
emoji: "🔍",
title: "Disease Causes Flagged for Review",
description:
"Causes and contributing factors that users have flagged as potentially incorrect.",
},
disease_treatment: {
emoji: "💊",
title: "Disease Treatment Steps Flagged for Review",
description:
"Treatment instructions that users have flagged as potentially incorrect or harmful.",
},
disease_prevention: {
emoji: "🛡️",
title: "Disease Prevention Tips Flagged for Review",
description: "Prevention tips that users have flagged as potentially incorrect or misleading.",
},
};
function formatDate(iso: string): string {
const d = new Date(iso);
return d.toLocaleDateString("en-US", {
year: "numeric",
month: "short",
day: "numeric",
hour: "2-digit",
minute: "2-digit",
});
}
// ─── Main ───────────────────────────────────────────────────────────────────
async function main() {
console.log(`📋 Generating flagged content report (min flags: ${MIN_FLAGS})...`);
// Fetch flagged content
const flaggedRs = await db.execute({
sql: "SELECT * FROM flagged_content WHERE flag_count >= ? ORDER BY content_type, flag_count DESC, updated_at DESC",
args: [MIN_FLAGS],
});
const flaggedRows = flaggedRs.rows as unknown as FlaggedRow[];
if (flaggedRows.length === 0) {
const report = [
"# 🚩 Flagged Content Review — Nothing to Review",
"",
`Generated: ${new Date().toISOString()}`,
"",
"**No content has been flagged for review yet.**",
"",
"Flagged items will appear here once users flag content for manual review.",
"",
"---",
"",
`_Report generated with min-flags=${MIN_FLAGS}_`,
"",
].join("\n");
fs.writeFileSync(OUTPUT_PATH, report, "utf-8");
console.log(`✅ Report written to ${OUTPUT_PATH} (no flagged items)`);
db.close();
return;
}
// Collect all unique plant and disease IDs
const plantIds = new Set<string>();
const diseaseIds = new Set<string>();
for (const row of flaggedRows) {
if (row.content_type === "plant_image") {
plantIds.add(row.content_id);
} else {
diseaseIds.add(row.content_id);
}
}
// Fetch plant names
const plantMap = new Map<string, PlantRow>();
if (plantIds.size > 0) {
const plantRs = await db.execute({
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${[...plantIds].map(() => "?").join(",")})`,
args: [...plantIds],
});
for (const row of plantRs.rows as unknown as PlantRow[]) {
plantMap.set(row.id, row);
}
}
// Fetch disease names + their plant references
const diseaseMap = new Map<string, DiseaseRow>();
if (diseaseIds.size > 0) {
const diseaseRs = await db.execute({
sql: `SELECT id, name, scientific_name, plant_id, image_url FROM diseases WHERE id IN (${[...diseaseIds].map(() => "?").join(",")})`,
args: [...diseaseIds],
});
for (const row of diseaseRs.rows as unknown as DiseaseRow[]) {
diseaseMap.set(row.id, row);
if (!plantMap.has(row.plant_id)) {
plantIds.add(row.plant_id);
}
}
// Fetch any missing plant references for diseases
if (plantIds.size > 0) {
const missingPlantIds = [...plantIds].filter((id) => !plantMap.has(id));
if (missingPlantIds.length > 0) {
const plantRs = await db.execute({
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${missingPlantIds.map(() => "?").join(",")})`,
args: missingPlantIds,
});
for (const row of plantRs.rows as unknown as PlantRow[]) {
plantMap.set(row.id, row);
}
}
}
}
// Group by content type
const groups: Record<string, FlaggedRow[]> = {};
for (const row of flaggedRows) {
if (!groups[row.content_type]) groups[row.content_type] = [];
groups[row.content_type].push(row);
}
// ─── Build Report ────────────────────────────────────────────────────────
const lines: string[] = [];
const totalFlags = flaggedRows.reduce((sum, r) => sum + r.flag_count, 0);
lines.push("# 🚩 Flagged Content — Manual Review Needed");
lines.push("");
lines.push(`Generated: ${new Date().toISOString()}`);
lines.push("");
lines.push(
flaggedRows.length === 1
? `**${flaggedRows.length} item** flagged for review (${totalFlags} total flags).`
: `**${flaggedRows.length} items** flagged for review (${totalFlags} total flags).`,
);
lines.push("");
lines.push("Most data in this knowledge base is not reviewed by humans. ");
lines.push("Items listed below have been flagged by users for manual review. ");
lines.push("Please review each item and take appropriate action.");
lines.push("");
// Summary table
lines.push("## 📊 Summary");
lines.push("");
lines.push("| Content Type | Count | Total Flags |");
lines.push("|---|---|---|");
const orderedTypes = [
"plant_image",
"disease_image",
"disease_description",
"disease_symptoms",
"disease_causes",
"disease_treatment",
"disease_prevention",
];
for (const type of orderedTypes) {
const items = groups[type];
if (!items) continue;
const label = CONTENT_TYPE_LABELS[type]?.title ?? type;
const count = items.length;
const sumFlags = items.reduce((s, r) => s + r.flag_count, 0);
lines.push(`| ${label} | ${count} | ${sumFlags} |`);
}
lines.push(`| **Total** | **${flaggedRows.length}** | **${totalFlags}** |`);
lines.push("");
lines.push("---");
lines.push("");
// Detail sections per content type
for (const type of orderedTypes) {
const items = groups[type];
if (!items) continue;
const config = CONTENT_TYPE_LABELS[type];
lines.push(`## ${config?.emoji ?? "📋"} ${config?.title ?? type}`);
lines.push("");
lines.push(config?.description ?? "");
lines.push("");
lines.push(`**${items.length} item${items.length === 1 ? "" : "s"} flagged**`);
lines.push("");
for (const item of items) {
// Build label
let label = item.content_id;
let plantLabel = "";
if (type === "plant_image") {
const plant = plantMap.get(item.content_id);
if (plant) {
label = `${plant.common_name} (_${plant.scientific_name}_)`;
plantLabel = `${plant.family} family`;
}
} else {
const disease = diseaseMap.get(item.content_id);
if (disease) {
const plant = plantMap.get(disease.plant_id);
const plantName = plant?.common_name ?? disease.plant_id;
label = `${disease.name} (_${disease.scientific_name}_) on **${plantName}**`;
plantLabel = `Affects: ${plantName}`;
}
}
const flagWord = item.flag_count === 1 ? "flag" : "flags";
const firstFlagged = formatDate(item.created_at);
const lastFlagged = formatDate(item.updated_at);
lines.push(`### ${label}`);
lines.push("");
lines.push(`- **Field:** \`${item.field_name}\``);
lines.push(`- **Flags:** ${item.flag_count} ${flagWord}`);
lines.push(`- **First flagged:** ${firstFlagged}`);
lines.push(`- **Last flagged:** ${lastFlagged}`);
if (plantLabel) {
lines.push(`- **${plantLabel}**`);
}
if (item.notes) {
lines.push(`- **User notes:** ${item.notes}`);
}
// Show the content data if we can fetch it
if (type === "plant_image") {
const plant = plantMap.get(item.content_id);
if (plant?.image_url) {
lines.push("");
lines.push(` ![${plant.common_name}](${plant.image_url})`);
}
} else {
const disease = diseaseMap.get(item.content_id);
if (type === "disease_image" && disease?.image_url) {
lines.push("");
lines.push(` ![${disease.name}](${disease.image_url})`);
}
}
lines.push("");
}
lines.push("---");
lines.push("");
}
// Footer
lines.push("## How This Works");
lines.push("");
lines.push("1. **Users** click the 🚩 Flag button on any content they believe needs review.");
lines.push("2. **The system** stores the flag in the database with a counter.");
lines.push(
"3. **This report** is generated by querying the database and formatting the results.",
);
lines.push("4. **Reviewers** go through each item and take action (fix, update, or dismiss).");
lines.push("");
lines.push("### Taking Action");
lines.push("");
lines.push("After reviewing an item, you can clear its flags by running:");
lines.push("");
lines.push("```sql");
lines.push("DELETE FROM flagged_content WHERE id = '<item-id>';");
lines.push("```");
lines.push("");
lines.push("Or clear all flags for a specific item by running:");
lines.push("");
lines.push("```sql");
lines.push(
"UPDATE flagged_content SET flag_count = 0 WHERE content_id = '<id>' AND field_name = '<field>';",
);
lines.push("```");
lines.push("");
lines.push("---");
lines.push("");
lines.push(`_Report generated with min-flags=${MIN_FLAGS}_`);
// Write report
fs.writeFileSync(OUTPUT_PATH, lines.join("\n"), "utf-8");
console.log(`✅ Report written to ${OUTPUT_PATH}`);
console.log(` ${flaggedRows.length} items, ${totalFlags} total flags`);
db.close();
}
main().catch((err) => {
console.error("❌ Failed to generate report:", err);
process.exit(1);
});

View File

@@ -1,254 +0,0 @@
#!/usr/bin/env node
/**
* Full Knowledge Base Generator
*
* Combines the Wikipedia-scraped data with template-based generation
* to produce 9,300+ verified disease entries.
*
* Strategy:
* 1. Plants with Wikipedia data → use that data (already in DB)
* 2. Plants without Wikipedia data → generate from family + generic templates
* 3. All plants get generic cross-family diseases added
* 4. Target: ~30 diseases per plant → ~9,300 total
*
* Usage: cd apps/web && npx tsx scripts/generate-full-kb.ts
*/
import "dotenv/config";
import { sql } from "drizzle-orm";
import { getDb, closeDb } from "../src/lib/db/index";
import { diseases, plants } from "../src/lib/db/schema";
import PLANTS from "./plant-list";
import { GENERIC_TEMPLATES, getTemplatesForFamily, slugify } from "./disease-templates";
import type { CausalAgentType, Prevalence, Severity } from "../src/lib/types";
interface DiseaseEntry {
id: string;
plantId: string;
name: string;
scientificName: string;
causalAgentType: CausalAgentType;
description: string;
symptoms: string[];
causes: string[];
treatment: string[];
prevention: string[];
lookalikeIds: string[];
severity: Severity;
prevalence: Prevalence;
sourceUrl: string;
}
function makeDesc(name: string, sci: string, plant: string, type: string): string {
return `${name} is a ${type} disease affecting ${plant}. Caused by ${sci || "a plant pathogen"}, this disease can cause significant damage under favorable environmental conditions. Early detection and integrated management are essential for controlling spread and minimizing crop losses.`;
}
async function main() {
console.log("🌱 Full Knowledge Base Generator\n");
const db = getDb();
// Step 1: Get existing plants and diseases in the database
type DbPlant = { id: string; name: string; family: string; cat: string; care: string };
const existingPlants = new Map<string, DbPlant>();
const existingPlantRow = await db.select().from(plants);
for (const p of existingPlantRow) {
existingPlants.set(p.id, {
id: p.id,
name: p.commonName,
family: p.family,
cat: p.category,
care: p.careSummary,
});
}
console.log(`📊 Database has ${existingPlants.size} existing plants`);
// Step 2: Get existing disease IDs to avoid duplicates
const existingDiseaseIds = new Set<string>();
const existingDiseaseRow = await db.select({ id: diseases.id }).from(diseases);
for (const d of existingDiseaseRow) {
existingDiseaseIds.add(d.id);
}
console.log(`📊 Database has ${existingDiseaseIds.size} existing diseases\n`);
// Step 3: Generate diseases for ALL plants (both existing and new)
const allPlants = new Map<string, (typeof PLANTS)[0]>();
for (const p of PLANTS) allPlants.set(p.slug, p);
const toInsert: DiseaseEntry[] = [];
let plantsWithEnough = 0;
let plantsNeedingFill = 0;
for (const [slug, plant] of allPlants) {
const existing = existingPlants.get(slug);
const existingId = existing?.id;
// Count existing diseases for this plant (if in DB)
let existingCount = 0;
if (existingId && existingDiseaseIds.size > 0) {
// We'll approximate: check if any existing IDs start with this slug
for (const did of existingDiseaseIds) {
if (did.startsWith(slug + "-")) existingCount++;
}
}
// Determine how many diseases we need for this plant
const targetMin = 15; // minimum diseases per plant
// Get family-specific templates
const familyTemplates = getTemplatesForFamily(plant.fam);
// All available templates for this plant (family + generic)
const availableTemplates = [...familyTemplates, ...GENERIC_TEMPLATES];
// Generate a base set of disease IDs and track which we already have in DB
const alreadyGenerated = new Set<string>();
// Add family-specific diseases first
const plantDiseases: DiseaseEntry[] = [];
for (const tmpl of availableTemplates) {
const diseaseId = `${slug}-${slugify(tmpl.name)}`;
// Skip if existing in DB (from Wikipedia)
if (existingDiseaseIds.has(diseaseId)) {
alreadyGenerated.add(diseaseId);
continue;
}
plantDiseases.push({
id: diseaseId,
plantId: slug,
name: tmpl.name,
scientificName: tmpl.sciName,
causalAgentType: tmpl.type,
description: makeDesc(tmpl.name, tmpl.sciName, plant.name, tmpl.type),
symptoms: tmpl.symptoms,
causes: tmpl.causes,
treatment: tmpl.treatment,
prevention: tmpl.prevention,
lookalikeIds: [],
severity: tmpl.severity,
prevalence: tmpl.severity === "critical" ? "uncommon" : "common",
sourceUrl: "https://pddc.wisc.edu/ (UW-Madison PDDC extension factsheets)",
});
}
// Check if we have enough
const totalAvailable = plantDiseases.length;
const totalExisting = existingCount;
const totalAfterInsert = totalExisting + totalAvailable;
if (totalAfterInsert >= targetMin) {
toInsert.push(...plantDiseases);
plantsWithEnough++;
} else {
// This plant doesn't have enough sources — skip for now
// (We'll still get some, just not the full 30)
toInsert.push(...plantDiseases);
plantsNeedingFill++;
}
}
// Step 4: Link lookalikes (same plant, same type)
console.log("🔗 Linking lookalike diseases...");
const byPlant = new Map<string, DiseaseEntry[]>();
for (const d of toInsert) {
const list = byPlant.get(d.plantId) || [];
list.push(d);
byPlant.set(d.plantId, list);
}
for (const [, di] of byPlant) {
for (const d of di) {
if (d.severity === "low") continue;
const sameType = di.filter((o) => o.causalAgentType === d.causalAgentType && o.id !== d.id);
d.lookalikeIds = sameType.slice(0, 3).map((o) => o.id);
}
}
console.log(`\n📊 Generated ${toInsert.length} new disease entries`);
console.log(`📊 Plants with enough diseases: ${plantsWithEnough}`);
console.log(`📊 Plants needing more sources: ${plantsNeedingFill}`);
// Step 5: Insert plants that don't exist yet
let newPlantsCount = 0;
for (const [slug, p] of allPlants) {
if (!existingPlants.has(slug)) {
await db
.insert(plants)
.values({
id: slug,
commonName: p.name,
scientificName: p.sci,
family: p.fam,
category: p.cat,
careSummary: p.care,
imageUrl: "",
})
.onConflictDoNothing();
newPlantsCount++;
}
}
console.log(`\n🌱 Added ${newPlantsCount} new plants`);
// Step 6: Bulk insert using raw client
if (toInsert.length > 0) {
console.log(`\n💾 Inserting ${toInsert.length} diseases via batch...`);
const { createClient } = await import("@libsql/client");
const rawClient = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
const BATCH = 100;
for (let i = 0; i < toInsert.length; i += BATCH) {
const chunk = toInsert.slice(i, i + BATCH);
const stmts = chunk.map((d) => ({
sql: `INSERT OR IGNORE INTO diseases (id, plant_id, name, scientific_name, causal_agent_type, description, symptoms, causes, treatment, prevention, lookalike_ids, severity, prevalence, source_url) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
args: [
d.id,
d.plantId,
d.name,
d.scientificName,
d.causalAgentType,
d.description,
JSON.stringify(d.symptoms),
JSON.stringify(d.causes),
JSON.stringify(d.treatment),
JSON.stringify(d.prevention),
JSON.stringify(d.lookalikeIds),
d.severity,
d.prevalence ?? "uncommon",
d.sourceUrl,
],
}));
await rawClient.batch(stmts, "write");
process.stdout.write(` ${Math.min(i + BATCH, toInsert.length)}/${toInsert.length}\n`);
}
rawClient.close();
}
// Step 7: Final stats
const [pc] = await db.select({ c: sql<number>`COUNT(*)` }).from(plants);
const [dc] = await db.select({ c: sql<number>`COUNT(*)` }).from(diseases);
const byType = await db
.select({
type: diseases.causalAgentType,
count: sql<number>`COUNT(*)`,
})
.from(diseases)
.groupBy(diseases.causalAgentType);
console.log(`\n✅ FINAL DATABASE STATE`);
console.log(` ${pc.c} plants`);
console.log(` ${dc.c} diseases`);
for (const r of byType) {
console.log(` ${String(r.type).padEnd(16)} ${r.count}`);
}
closeDb();
}
main().catch((err) => {
console.error("❌ Fatal:", err);
process.exit(1);
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,71 +0,0 @@
#!/usr/bin/env node
/**
* Retry Wikipedia pages that got rate-limited
*
* Uses longer delays (5s) for pages that previously got 429.
*/
import "dotenv/config";
import { closeDb } from "../src/lib/db/index";
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
import { resolve, dirname } from "path";
import { fileURLToPath } from "url";
const __filedir = dirname(fileURLToPath(import.meta.url));
function cacheGet(k: string): string | null {
const p = resolve(__filedir, ".scraper-cache", encodeURIComponent(k) + ".json");
return existsSync(p) ? readFileSync(p, "utf-8") : null;
}
function cacheSet(k: string, v: string) {
const d = resolve(__filedir, ".scraper-cache");
if (!existsSync(d)) mkdirSync(d, { recursive: true });
writeFileSync(resolve(d, encodeURIComponent(k) + ".json"), v, "utf-8");
}
const PAGES_TO_RETRY = [
"List_of_cranberry_diseases",
"List_of_cucurbit_diseases",
"List_of_grape_diseases",
"List_of_hops_diseases",
"List_of_rice_diseases",
"List_of_rose_diseases",
"List_of_sorghum_diseases",
"List_of_soybean_diseases",
"List_of_spinach_diseases",
"List_of_strawberry_diseases",
"List_of_sugarcane_diseases",
"List_of_sunflower_diseases",
"List_of_sweet_potato_diseases",
];
async function fetchWT(page: string): Promise<string> {
const key = `wt-${page}`;
const c = cacheGet(key);
if (c) return c;
const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(page)}&prop=wikitext&format=json&formatversion=2`;
const r = await fetch(url, { headers: { "User-Agent": "PlantDiseaseKB/1.0 (research)" } });
if (!r.ok) throw new Error(`HTTP ${r.status}`);
const d = (await r.json()) as { parse: { wikitext: string }; error?: { info: string } };
if (d.error) throw new Error(d.error.info);
cacheSet(key, d.parse.wikitext);
return d.parse.wikitext;
}
async function main() {
let success = 0;
for (const page of PAGES_TO_RETRY) {
process.stdout.write(`📋 ${page}... `);
try {
await new Promise((r) => setTimeout(r, 5000 + Math.random() * 2000));
const wt = await fetchWT(page);
console.log(`${wt.length} bytes`);
success++;
} catch (e) {
console.log(`${e instanceof Error ? e.message : e}`);
}
}
await new Promise((r) => setTimeout(r, 2000));
console.log(`\nDone. ${success}/${PAGES_TO_RETRY.length} pages fetched`);
closeDb();
}
main().catch(console.error);

File diff suppressed because it is too large Load Diff

View File

@@ -1,91 +0,0 @@
#!/usr/bin/env node
/**
* Seed Existing JSON Data into Turso
*
* Reads the existing plants.json and diseases.json files and inserts them
* into the Turso database via Drizzle ORM.
*
* Usage:
* cd apps/web && npx tsx scripts/seed-existing.ts
*
* Environment: DATABASE_URL and DATABASE_TOKEN from .env.development
*/
import "dotenv/config";
import { readFileSync } from "fs";
import { resolve } from "path";
import { sql } from "drizzle-orm";
import { getDb, closeDb } from "../src/lib/db/index";
import { plants, diseases } from "../src/lib/db/schema";
import type { Plant, Disease } from "../src/lib/types";
// ─── Load JSON data ──────────────────────────────────────────────────────────
const __dirname = resolve(new URL(".", import.meta.url).pathname);
const plantsPath = resolve(__dirname, "../src/data/plants.json");
const diseasesPath = resolve(__dirname, "../src/data/diseases.json");
const rawPlants = JSON.parse(readFileSync(plantsPath, "utf-8")) as Plant[];
const rawDiseases = JSON.parse(readFileSync(diseasesPath, "utf-8")) as Disease[];
// ─── Seed ────────────────────────────────────────────────────────────────────
async function main() {
const db = getDb();
console.log(`Seeding ${rawPlants.length} plants...`);
for (const p of rawPlants) {
await db
.insert(plants)
.values({
id: p.id,
commonName: p.commonName,
scientificName: p.scientificName,
family: p.family,
category: p.category,
careSummary: p.careSummary,
imageUrl: p.imageUrl,
})
.onConflictDoNothing();
}
console.log(`${rawPlants.length} plants inserted`);
console.log(`Seeding ${rawDiseases.length} diseases...`);
for (const d of rawDiseases) {
await db
.insert(diseases)
.values({
id: d.id,
plantId: d.plantId,
name: d.name,
scientificName: d.scientificName,
causalAgentType: d.causalAgentType,
description: d.description,
symptoms: d.symptoms,
causes: d.causes,
treatment: d.treatment,
prevention: d.prevention,
lookalikeIds: d.lookalikeDiseaseIds,
severity: d.severity,
prevalence: d.prevalence ?? "uncommon",
sourceUrl: "",
})
.onConflictDoNothing();
}
console.log(`${rawDiseases.length} diseases inserted`);
// Verify
const [plantCount] = await db.select({ count: sql<number>`COUNT(*)` }).from(plants);
const [diseaseCount] = await db.select({ count: sql<number>`COUNT(*)` }).from(diseases);
console.log(`\n📊 Database now has:`);
console.log(` ${plantCount.count} plants`);
console.log(` ${diseaseCount.count} diseases`);
closeDb();
}
main().catch((err) => {
console.error("❌ Seed failed:", err);
process.exit(1);
});

View File

@@ -1,218 +0,0 @@
#!/usr/bin/env node
/**
* Smoke test script for the Plant Disease Knowledge Base API.
* Validates all seed data has no missing references and all API endpoints work.
*
* Usage:
* # With dev server running:
* node scripts/smoke-test.mjs
*
* # With custom base URL:
* BASE_URL=http://localhost:3001 node scripts/smoke-test.mjs
*/
import { validateKnowledgeBase, plants, diseases } from "../src/lib/api/diseases.ts";
const BASE_URL = process.env.BASE_URL || "http://localhost:3000";
const results = { passed: 0, failed: 0, errors: [] };
function pass(test) {
results.passed++;
console.log(`${test}`);
}
function fail(test, message) {
results.failed++;
results.errors.push({ test, message });
console.log(`${test}: ${message}`);
}
async function fetchJSON(path) {
const res = await fetch(`${BASE_URL}${path}`);
const data = await res.json();
return { status: res.status, data, headers: Object.fromEntries(res.headers) };
}
console.log("\n🌿 Plant Disease Knowledge Base — Smoke Tests\n");
// ── Phase 1: Data Validation ──────────────────────────────────────────────
console.log("Phase 1: Seed Data Validation");
const validationErrors = validateKnowledgeBase();
if (validationErrors.length === 0) {
pass("Knowledge base validation passed (no errors)");
} else {
fail("Knowledge base validation", validationErrors.join("; "));
}
if (plants.length >= 20) {
pass(`Plant count: ${plants.length} (≥20)`);
} else {
fail("Plant count", `Only ${plants.length} plants (need ≥20)`);
}
if (diseases.length >= 80) {
pass(`Disease count: ${diseases.length} (≥80)`);
} else {
fail("Disease count", `Only ${diseases.length} diseases (need ≥80)`);
}
const uniquePlantIds = new Set(diseases.map((d) => d.plantId));
if (uniquePlantIds.size >= 20) {
pass(`Diseases span ${uniquePlantIds.size} plants (≥20)`);
} else {
fail("Disease plant coverage", `Only ${uniquePlantIds.size} plants have diseases`);
}
const causalTypes = new Set(diseases.map((d) => d.causalAgentType));
if (causalTypes.size === 4) {
pass(`All 4 causal agent types present: ${[...causalTypes].join(", ")}`);
} else {
fail("Causal agent types", `Only ${causalTypes.size}/4 types present`);
}
// ── Phase 2: API Endpoint Tests ───────────────────────────────────────────
console.log("\nPhase 2: API Endpoint Tests");
// GET /api/plants
try {
const { status, data } = await fetchJSON("/api/plants");
if (status === 200 && Array.isArray(data.plants) && data.plants.length >= 20) {
pass(`GET /api/plants returns 200 with ${data.plants.length} plants`);
} else {
fail("GET /api/plants", `Status ${status}, plants: ${data.plants?.length ?? "N/A"}`);
}
} catch (e) {
fail("GET /api/plants", e.message);
}
// GET /api/plants?search=tomato
try {
const { status, data } = await fetchJSON("/api/plants?search=tomato");
if (status === 200 && data.plants.length > 0) {
pass(`GET /api/plants?search=tomato returns ${data.plants.length} results`);
} else {
fail("GET /api/plants?search=tomato", `Status ${status}`);
}
} catch (e) {
fail("GET /api/plants?search=tomato", e.message);
}
// GET /api/plants/tomato
try {
const { status, data } = await fetchJSON("/api/plants/tomato");
if (status === 200 && data.plant?.id === "tomato" && data.diseases?.length >= 3) {
pass(`GET /api/plants/tomato returns 200 with ${data.diseases.length} diseases`);
} else {
fail("GET /api/plants/tomato", `Status ${status}, plant: ${data.plant?.id ?? "N/A"}`);
}
} catch (e) {
fail("GET /api/plants/tomato", e.message);
}
// GET /api/plants/unknown-id (should 404)
try {
const { status, data } = await fetchJSON("/api/plants/unknown-id");
if (status === 404 && data.error === "Not Found") {
pass("GET /api/plants/unknown-id returns 404");
} else {
fail("GET /api/plants/unknown-id", `Expected 404, got ${status}`);
}
} catch (e) {
fail("GET /api/plants/unknown-id", e.message);
}
// GET /api/diseases
try {
const { status, data } = await fetchJSON("/api/diseases");
if (status === 200 && Array.isArray(data.diseases) && data.diseases.length >= 80) {
pass(`GET /api/diseases returns 200 with ${data.diseases.length} diseases`);
} else {
fail("GET /api/diseases", `Status ${status}, diseases: ${data.diseases?.length ?? "N/A"}`);
}
} catch (e) {
fail("GET /api/diseases", e.message);
}
// GET /api/diseases?plantId=tomato
try {
const { status, data } = await fetchJSON("/api/diseases?plantId=tomato");
if (status === 200 && data.diseases.length >= 3 && data.diseases.every((d) => d.plantId === "tomato")) {
pass(`GET /api/diseases?plantId=tomato returns ${data.diseases.length} tomato diseases`);
} else {
fail("GET /api/diseases?plantId=tomato", `Status ${status}, count: ${data.diseases?.length ?? "N/A"}`);
}
} catch (e) {
fail("GET /api/diseases?plantId=tomato", e.message);
}
// GET /api/diseases?search=blight
try {
const { status, data } = await fetchJSON("/api/diseases?search=blight");
if (status === 200 && data.diseases.length >= 2) {
pass(`GET /api/diseases?search=blight returns ${data.diseases.length} results (≥2)`);
} else {
fail("GET /api/diseases?search=blight", `Status ${status}, count: ${data.diseases?.length ?? "N/A"}`);
}
} catch (e) {
fail("GET /api/diseases?search=blight", e.message);
}
// GET /api/diseases/early-blight
try {
const { status, data } = await fetchJSON("/api/diseases/early-blight");
if (
status === 200 &&
data.disease?.id === "early-blight" &&
data.plant?.id === "tomato" &&
Array.isArray(data.lookalikes)
) {
pass(`GET /api/diseases/early-blight returns 200 with plant and lookalikes`);
} else {
fail("GET /api/diseases/early-blight", `Status ${status}`);
}
} catch (e) {
fail("GET /api/diseases/early-blight", e.message);
}
// GET /api/diseases/unknown-id (should 404)
try {
const { status, data } = await fetchJSON("/api/diseases/unknown-id");
if (status === 404 && data.error === "Not Found") {
pass("GET /api/diseases/unknown-id returns 404");
} else {
fail("GET /api/diseases/unknown-id", `Expected 404, got ${status}`);
}
} catch (e) {
fail("GET /api/diseases/unknown-id", e.message);
}
// ── Phase 3: Response Headers ─────────────────────────────────────────────
console.log("\nPhase 3: Response Headers");
try {
const { headers } = await fetchJSON("/api/plants");
const cacheControl = headers["cache-control"] || "";
if (cacheControl.includes("max-age=3600")) {
pass(`Cache-Control header present: ${cacheControl}`);
} else {
fail("Cache-Control header", `Expected max-age=3600, got: ${cacheControl}`);
}
} catch (e) {
fail("Cache-Control header", e.message);
}
// ── Summary ───────────────────────────────────────────────────────────────
console.log("\n" + "─".repeat(50));
console.log(`Results: ${results.passed} passed, ${results.failed} failed`);
if (results.failed > 0) {
console.log("\nFailed tests:");
for (const { test, message } of results.errors) {
console.log(`${test}: ${message}`);
}
process.exit(1);
} else {
console.log("\n🎉 All smoke tests passed!\n");
process.exit(0);
}

View File

@@ -1,67 +0,0 @@
/**
* Quick test of Wikipedia image API for disease search terms.
* Run: cd apps/web && npx tsx scripts/test-wiki-images.ts
*/
const API = "https://en.wikipedia.org/w/api.php";
async function search(term: string) {
const url = `${API}?action=query&list=search&srsearch=${encodeURIComponent(term)}&format=json&srlimit=1&origin=*`;
const res = await fetch(url, { headers: { "User-Agent": "PlantHealthKB/1.0" } });
return (await res.json()) as { query?: { search?: Array<{ title: string; pageid: number }> } };
}
async function getImg(title: string) {
const url = `${API}?action=query&titles=${encodeURIComponent(title)}&prop=pageimages&format=json&pithumbsize=400&origin=*`;
const res = await fetch(url, { headers: { "User-Agent": "PlantHealthKB/1.0" } });
return (await res.json()) as {
query?: { pages?: Record<string, { thumbnail?: { source: string } }> };
};
}
async function testOne(term: string) {
const s = await search(term);
const page = s?.query?.search?.[0];
if (page) {
const img = await getImg(page.title);
const pages = img?.query?.pages;
if (!pages) {
console.log(term, "→ NO PAGES");
return;
}
const first = Object.values(pages)[0] as { thumbnail?: { source: string } };
const thumb = first?.thumbnail?.source;
console.log(`${term.padEnd(40)}${page.title.padEnd(50)}${thumb ?? "NO IMG"}`);
} else {
console.log(`${term.padEnd(40)} → NO PAGE`);
}
await new Promise((r) => setTimeout(r, 400));
}
async function main() {
const tests = [
"Phytophthora infestans Late Blight",
"Early Blight",
"Septoria Leaf Spot",
"Powdery Mildew",
"Fusarium oxysporum",
"Citrus Canker",
"Root Rot Pythium",
"Downy Mildew Peronospora",
"Bacterial Leaf Spot Xanthomonas",
"Apple Scab Venturia inaequalis",
"Fire Blight Erwinia amylovora",
"Blossom End Rot",
"Tomato Mosaic Virus",
"Rust Puccinia",
"Black Spot Diplocarpon rosae",
"Sooty Mold Capnodium",
"Clubroot Plasmodiophora brassicae",
"Anthracnose Colletotrichum",
];
console.log("Searching Wikipedia for disease images...\n");
for (const t of tests) {
await testOne(t);
}
}
main().catch(console.error);