/** * Expand DB with comprehensive plant disease list from Wikipedia. * * Reads /tmp/plant_diseases/plant_diseases_comprehensive.txt, * compares against existing DB entries (by name, case-insensitive), * and inserts new entries with reasonable defaults. * * Usage: * cd apps/web && export $(grep -v '^#' .env.development | xargs) && npx tsx scripts/expand-diseases.ts */ import "dotenv/config"; import { readFileSync } from "fs"; import { eq, sql } from "drizzle-orm"; import { getDb, closeDb } from "../src/lib/db/index"; import { plants, diseases } from "../src/lib/db/schema"; import type { CausalAgentType, Severity } from "../src/lib/types"; // ─── Parse the comprehensive list ───────────────────────────────────────────── interface DiseaseEntry { name: string; sourceUrl: string; } function parseComprehensiveList(filePath: string): DiseaseEntry[] { const content = readFileSync(filePath, "utf-8"); const entries: DiseaseEntry[] = []; const lines = content.split("\n"); const nameRe = /^\d+\.\s+(.+)$/; for (let i = 0; i < lines.length; i++) { const nameMatch = lines[i].match(nameRe); if (nameMatch) { const name = nameMatch[1].trim(); const urlLine = lines[i + 1]?.trim() || ""; // Only add if the next line is a valid URL if (urlLine.startsWith("http")) { entries.push({ name, sourceUrl: urlLine }); i++; // skip the URL line } else { entries.push({ name, sourceUrl: "" }); } } } return entries; } // ─── Infer causal agent type from disease name ──────────────────────────────── function inferCausalAgent(name: string): CausalAgentType { const lower = name.toLowerCase(); // Bacterial indicators if ( lower.startsWith("bacterial ") || lower.includes(" xanthomonas") || lower.includes(" pseudomonas") || lower.includes(" erwinia") || lower.includes(" ralstonia") || lower.includes(" clavibacter") || lower.includes(" streptomyces") || lower.includes(" agrobacterium") || lower.includes(" corynebacterium") || lower.includes(" pectobacterium") || lower.includes(" dickeya") ) { return "bacterial"; } // Viral indicators - strong signals if ( lower.includes(" mosaic") || lower.includes(" yellows") || lower.includes(" leaf roll") || lower.includes(" leafroll") || lower.includes(" ringspot") || lower.includes(" ring spot") || lower.includes(" enation") || lower.includes(" phyllody") || lower.includes(" witches") || lower.includes(" witches'") || lower.includes(" crinkle") || lower.includes(" rosette") || lower.includes(" shoestring") || lower.includes(" tristeza") || lower.includes(" psorosis") || lower.includes(" stubborn") || lower.includes(" greening") || lower.includes(" vein banding") || lower.includes(" vein mottle") || lower.includes(" vein clearing") || lower.includes(" leaf pucker") || lower.includes(" pucker leaf") || lower.includes(" latent") || lower.includes(" motley") || lower.includes(" rugose") ) { return "viral"; } // Viral - names containing "virus" or "viroid" if (lower.includes(" virus") || lower.includes(" viroid") || lower.includes(" virosis")) { return "viral"; } // Nematodes if ( lower.includes(" nematode") || lower.includes(" nematodes") || lower.includes(" eelworm") || lower.includes(" root knot") || lower.includes(" root-knot") || lower.includes(" cyst ") || lower.includes(" dagger ") || lower.includes(" lance ") || lower.includes(" lesion ") || lower.includes(" ring ") || lower.includes(" spiral ") || lower.includes(" sting ") || lower.includes(" stubby ") || lower.includes(" needle ") || lower.includes(" foliar ") || lower.includes(" bulb ") || lower.includes(" reniform ") || lower.includes(" burrowing ") ) { // Check if it's really a nematode name if (lower.includes("nematode")) return "environmental"; } // Fungal indicators if ( lower.includes(" mildew") || lower.includes(" rust") || lower.includes(" smut") || lower.includes(" blight") || lower.includes(" canker") || lower.includes(" rot") || lower.includes(" scab") || lower.includes(" mold") || lower.includes(" anthracnose") || lower.includes(" bunt") || lower.includes(" ergot") || lower.includes(" dieback") || lower.includes(" scald") || lower.includes(" blotch") || lower.includes(" speckle") || lower.includes(" sooty") || lower.includes(" flyspeck") || lower.includes(" fusarium") || lower.includes(" alternaria") || lower.includes(" botrytis") || lower.includes(" rhizoctonia") || lower.includes(" pythium") || lower.includes(" phytophthora") || lower.includes(" sclerotinia") || lower.includes(" verticillium") || lower.includes(" ascochyta") || lower.includes(" cercospora") || lower.includes(" septoria") || lower.includes(" colletotrichum") || lower.includes(" phomopsis") || lower.includes(" diaporthe") || lower.includes(" diplodia") || lower.includes(" macrophomina") || lower.includes(" cylindrocladium") || lower.includes(" mycosphaerella") || lower.includes(" helminthosporium") || lower.includes(" curvularia") || lower.includes(" bipolaris") || lower.includes(" exserohilum") || lower.includes(" dothiorella") || lower.includes(" fusicoccum") || lower.includes(" pestalotia") || lower.includes(" glomerella") || lower.includes(" nectria") || lower.includes(" eutypa") || lower.includes(" armillaria") || lower.includes(" ganoderma") || lower.includes(" phoma") || lower.includes(" cladosporium") || lower.includes(" penicillium") || lower.includes(" aspergillus") || lower.includes(" rhizopus") || lower.includes(" mucor") || lower.includes(" downy mildew") || lower.includes(" powdery mildew") || lower.includes(" pink rot") || lower.includes(" pink mold") || lower.includes(" pink root") || lower.includes(" gray mold") || lower.includes(" grey mold") || lower.includes(" white rot") || lower.includes(" white mold") || lower.includes(" brown rot") || lower.includes(" black rot") || lower.includes(" soft rot") || lower.includes(" dry rot") || lower.includes(" fruit rot") || lower.includes(" root rot") || lower.includes(" stem rot") || lower.includes(" ear rot") || lower.includes(" crown rot") || lower.includes(" collar rot") || lower.includes(" pod rot") || lower.includes(" kernel rot") || lower.includes(" stalk rot") || lower.includes(" head rot") || lower.includes(" butt rot") || lower.includes(" stump rot") || lower.includes(" wood rot") || lower.includes(" seed rot") || lower.includes(" leaf spot") || lower.includes(" leaf blight") || lower.includes(" leaf blotch") || lower.includes(" leaf rust") || lower.includes(" brown spot") || lower.includes(" black spot") || lower.includes(" black leg") || lower.includes(" blackleg") || lower.includes(" black foot") || lower.includes(" white rust") || lower.includes(" white smut") || lower.includes(" white scab") || lower.includes(" tar spot") || lower.includes(" target spot") || lower.includes(" dollar spot") || lower.includes(" fairy ring") || lower.includes(" snow mold") || lower.includes(" pink disease") || lower.includes(" thread blight") || lower.includes(" web blight") || lower.includes(" sclerotial") || lower.includes(" sore shin") || lower.includes(" wart") || lower.includes(" scurf") || lower.includes(" silver scurf") || lower.includes(" shot hole") || lower.includes(" timber rot") || lower.includes(" cottony rot") || lower.includes(" watery rot") || lower.includes(" sour rot") || lower.includes(" seepage") || lower.includes(" bunch rot") || lower.includes(" noble rot") || lower.includes(" bitter rot") || lower.includes(" ripe rot") || lower.includes(" ring rot") || lower.includes(" coral spot") || lower.includes(" stem canker") || lower.includes(" branch canker") || lower.includes(" perennial canker") || lower.includes(" brand canker") || lower.includes(" blister canker") || lower.includes(" bleeding canker") || lower.includes(" bark canker") || lower.includes(" gum canker") || lower.includes(" collar crack") || lower.includes(" fasciation") || lower.includes(" exobasidium") || lower.includes(" mycorrhiza") || lower.includes(" lichen") || lower.includes(" algal") || lower.includes(" chlorosis") || lower.includes(" leaf blister") || lower.includes(" leaf curl") ) { return "fungal"; } // Physiological / environmental indicators if ( lower.includes(" sunscald") || lower.includes(" sunburn") || lower.includes(" chilling") || lower.includes(" blossom end rot") || lower.includes(" edema") || lower.includes(" deficiency") || lower.includes(" toxicity") || lower.includes(" ozone") || lower.includes(" drought") || lower.includes(" frost") || lower.includes(" herbicide") || lower.includes(" pesticide") || lower.includes(" phytotoxicity") || lower.includes(" catface") || lower.includes(" fruit cracking") || lower.includes(" russeting") || lower.includes(" growth crack") || lower.includes(" mealiness") || lower.includes(" wind scar") || lower.includes(" hail") || lower.includes(" salt ") || lower.includes(" nutritional") || lower.includes(" mineral") || lower.includes(" overwatering") || lower.includes(" under watering") || lower.includes(" waterlogging") || lower.includes(" chemical injury") || lower.includes(" spray injury") || lower.includes(" fertilizer burn") || lower.includes(" lightning") || lower.includes(" bruising") || lower.includes(" pressure bruise") || lower.includes(" impact damage") || lower.includes(" transit rot") ) { return "environmental"; } // Insect/mite/pest indicators if ( lower.includes(" mite") || lower.includes(" beetle") || lower.includes(" weevil") || lower.includes(" aphid") || lower.includes(" bollworm") || lower.includes(" leaf miner") || lower.includes(" mealybug") || lower.includes(" thrips") || lower.includes(" whitefly") || lower.includes(" caterpillar") || lower.includes(" sawfly") || lower.includes(" scale ") || lower.includes(" leafhopper") || lower.includes(" psylla") || lower.includes(" slug") || lower.includes(" snail") || lower.includes(" borer") || lower.includes(" maggot") || lower.includes(" grub") || lower.includes(" earwig") || lower.includes(" grasshopper") ) { return "environmental"; } // Fungal genus names const fungalGenera = [ "armillaria", "aspergillus", "alternaria", "botrytis", "cercospora", "cladosporium", "colletotrichum", "curvularia", "cylindrocladium", "diplodia", "fusarium", "ganoderma", "glomerella", "helminthosporium", "macrophomina", "mycosphaerella", "nectria", "penicillium", "pestalotia", "phoma", "phomopsis", "phytophthora", "pythium", "rhizoctonia", "sclerotinia", "septoria", "verticillium", "ascochyta", "cercoseptoria", "phaeoisariopsis", "phaeoseptoria", "stagonospora", "stemphylium", "myrothecium", "myriogenospora", "dactuliophora", "dilophospora", "coniothecium", "coniosporium", "cryptostictis", "catacauma", "botryodiplodia", "botryosphaeria", "cephalosporium", "ceratocystis", "chalara", "choanephora", "clitocybe", "coprinus", "cordana", "corticium", "corynespora", "coryneum", "cylindrocarpon", "cylindrocladiella", "cylindrosporium", "cytospora", "cytosporina", "dematophora", "didymella", "dothiorella", "drechslera", "endothia", "eutypa", "eutypella", "exobasidium", "fusicladium", "fusicoccum", "gibberella", "glomerella", "gnomonia", "graphiola", "guignardia", "hendersonia", "hendersonula", "hymenochaete", "hypoxylon", "lasiodiplodia", "leptosphaeria", "leucostoma", "lophodermium", "macrophoma", "marasmiellus", "marasmius", "massaria", "monilia", "monosporascus", "mystrosporium", "neocosmospora", "nigrospora", "omphalia", "ophiobolus", "ovulinia", "ozonium", "panagrolaimus", "periconia", "pestalosphaeria", "pestalotiopsis", "phialophora", "phymatotrichum", "physalospora", "phytophthora", "plasmodiophora", "plectosporium", "polyporus", "poria", "pseudocercosporella", "pseudopeziza", "pseudoseptoria", "puccinia", "pyrenochaeta", "pythium", "ramularia", "rhizoctonia", "rhizopus", "rhynchosporium", "rosellinia", "sclerophthora", "sclerotinia", "sclerotium", "septoria", "sphaceloma", "sphaeropsis", "spongospora", "stagonospora", "stemphylium", "stereum", "stigmina", "thanatephorus", "thielaviopsis", "tippula", "typhula", "ulocladium", "uredo", "ustilago", "valsa", "venturia", "verticillium", "xylaria", ]; for (const genus of fungalGenera) { if (lower.includes(genus)) return "fungal"; } // Default to fungal (most plant diseases are fungal) return "fungal"; } // ─── Infer severity ─────────────────────────────────────────────────────────── function inferSeverity(name: string): Severity { const lower = name.toLowerCase(); if ( lower.includes(" lethal") || lower.includes(" devastating") || lower.includes(" destructive") || lower.includes(" fatal") || lower.includes(" severe") || lower.includes(" blight") || lower.includes(" wilt") || lower.includes(" canker") || lower.includes(" dieback") || lower.includes(" decline") || lower.includes(" rot") || lower.includes(" gall") || lower.includes(" gummosis") || lower.includes(" necrosis") || lower.includes(" erwinia") ) { return "high"; } if ( lower.includes(" minor") || lower.includes(" mild") || lower.includes(" slight") || lower.includes(" speckle") || lower.includes(" fleck") || lower.includes(" freckle") || lower.includes(" chlorosis") || lower.includes(" translucence") || lower.includes(" superficial") ) { return "low"; } return "moderate"; } // ─── Generate a deterministic slug ──────────────────────────────────────────── function toSlug(name: string): string { return ( "wiki-" + name .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-|-$/g, "") .replace(/-+/g, "-") ); } // ─── Main ───────────────────────────────────────────────────────────────────── async function main() { const db = getDb(); // 1. Get existing disease names from DB const existingDiseases = await db.select({ name: diseases.name }).from(diseases); const existingNames = new Set(existingDiseases.map((d) => d.name.toLowerCase().trim())); console.log(`Existing diseases in DB: ${existingNames.size}`); // 2. Parse the comprehensive list const entries = parseComprehensiveList("/tmp/plant_diseases/plant_diseases_comprehensive.txt"); console.log(`Total entries in comprehensive file: ${entries.length}`); // 3. Find or create catch-all plants for (const plantId of ["general", "unknown"]) { const existing = await db.select().from(plants).where(eq(plants.id, plantId)).get(); if (!existing) { console.log(`Creating '${plantId}' plant for catch-all diseases...`); await db.insert(plants).values({ id: plantId, commonName: plantId === "general" ? "General (Multiple Plants)" : "Unknown Plant", scientificName: "Various", family: "Various", category: "houseplant", careSummary: plantId === "general" ? "General plant diseases affecting multiple species." : "Plant disease with unknown host plant.", imageUrl: "", }); console.log(`Created '${plantId}' plant.`); } } // 4. Filter new entries (deduplicate within file + against DB) const newEntries: DiseaseEntry[] = []; const skipped: string[] = []; const seen = new Set(); for (const entry of entries) { const key = entry.name.toLowerCase().trim(); if (seen.has(key)) continue; seen.add(key); if (existingNames.has(key)) { skipped.push(entry.name); } else { newEntries.push(entry); } } console.log(`\nNew entries to insert: ${newEntries.length}`); console.log(`Already existing (skipped): ${skipped.length}`); if (skipped.length > 0) { console.log(`\nFirst 10 skipped (of ${skipped.length}):`); skipped.slice(0, 10).forEach((s) => console.log(` - ${s}`)); } // 5. Insert new entries in batches if (newEntries.length === 0) { console.log("\n✅ No new diseases to insert."); closeDb(); return; } const BATCH_SIZE = 50; let inserted = 0; let errors = 0; for (let i = 0; i < newEntries.length; i += BATCH_SIZE) { const batch = newEntries.slice(i, i + BATCH_SIZE); const values = batch.map((entry) => { const causalAgent = inferCausalAgent(entry.name); const severity = inferSeverity(entry.name); return { id: toSlug(entry.name), plantId: "general", name: entry.name, scientificName: "", causalAgentType: causalAgent, description: `A plant disease known as "${entry.name}". Source: Wikipedia.`, symptoms: [], causes: [], treatment: [], prevention: [], lookalikeIds: [], severity, sourceUrl: entry.sourceUrl, imageUrl: "", }; }); try { await db.insert(diseases).values(values).onConflictDoNothing(); inserted += values.length; } catch (err) { // Fall back to individual inserts for this batch if batch fails console.log(` Batch failed, trying individually...`); for (const val of values) { try { await db.insert(diseases).values(val).onConflictDoNothing(); inserted++; } catch (e2) { // If it's a duplicate key, count it as skipped if (String(e2).includes("UNIQUE") || String(e2).includes("duplicate")) { // Already handled by onConflictDoNothing, shouldn't happen inserted++; } else { console.error(` Error inserting "${val.name}":`, e2); errors++; } } } } if ((i + BATCH_SIZE) % 200 === 0 || i + BATCH_SIZE >= newEntries.length) { console.log( ` Progress: ${Math.min(i + BATCH_SIZE, newEntries.length)}/${newEntries.length} (${inserted} inserted, ${errors} errors)`, ); } } // 6. Summary const totalDiseases = await db .select({ count: sql`COUNT(*)` }) .from(diseases) .get(); const totalPlants = await db .select({ count: sql`COUNT(*)` }) .from(plants) .get(); console.log(`\n📊 Results:`); console.log(` Inserted: ${inserted}`); console.log(` Errors: ${errors}`); console.log(` Skipped (already existed): ${skipped.length}`); console.log(`\n📊 Database now has:`); console.log(` ${totalPlants?.count ?? 0} plants`); console.log(` ${totalDiseases?.count ?? 0} diseases`); closeDb(); } main().catch((err) => { console.error("❌ Failed:", err); process.exit(1); });