Files
plant-disease-id/scripts/expand-diseases.ts
2026-06-08 16:42:04 -04:00

692 lines
20 KiB
TypeScript

/**
* Expand DB with comprehensive plant disease list from Wikipedia.
*
* Reads /tmp/plant_diseases/plant_diseases_comprehensive.txt,
* compares against existing DB entries (by name, case-insensitive),
* and inserts new entries with reasonable defaults.
*
* Usage:
* cd apps/web && export $(grep -v '^#' .env.development | xargs) && npx tsx scripts/expand-diseases.ts
*/
import "dotenv/config";
import { readFileSync } from "fs";
import { eq, sql } from "drizzle-orm";
import { getDb, closeDb } from "../src/lib/db/index";
import { plants, diseases } from "../src/lib/db/schema";
import type { CausalAgentType, Severity } from "../src/lib/types";
// ─── Parse the comprehensive list ─────────────────────────────────────────────
interface DiseaseEntry {
name: string;
sourceUrl: string;
}
function parseComprehensiveList(filePath: string): DiseaseEntry[] {
const content = readFileSync(filePath, "utf-8");
const entries: DiseaseEntry[] = [];
const lines = content.split("\n");
const nameRe = /^\d+\.\s+(.+)$/;
for (let i = 0; i < lines.length; i++) {
const nameMatch = lines[i].match(nameRe);
if (nameMatch) {
const name = nameMatch[1].trim();
const urlLine = lines[i + 1]?.trim() || "";
// Only add if the next line is a valid URL
if (urlLine.startsWith("http")) {
entries.push({ name, sourceUrl: urlLine });
i++; // skip the URL line
} else {
entries.push({ name, sourceUrl: "" });
}
}
}
return entries;
}
// ─── Infer causal agent type from disease name ────────────────────────────────
function inferCausalAgent(name: string): CausalAgentType {
const lower = name.toLowerCase();
// Bacterial indicators
if (
lower.startsWith("bacterial ") ||
lower.includes(" xanthomonas") ||
lower.includes(" pseudomonas") ||
lower.includes(" erwinia") ||
lower.includes(" ralstonia") ||
lower.includes(" clavibacter") ||
lower.includes(" streptomyces") ||
lower.includes(" agrobacterium") ||
lower.includes(" corynebacterium") ||
lower.includes(" pectobacterium") ||
lower.includes(" dickeya")
) {
return "bacterial";
}
// Viral indicators - strong signals
if (
lower.includes(" mosaic") ||
lower.includes(" yellows") ||
lower.includes(" leaf roll") ||
lower.includes(" leafroll") ||
lower.includes(" ringspot") ||
lower.includes(" ring spot") ||
lower.includes(" enation") ||
lower.includes(" phyllody") ||
lower.includes(" witches") ||
lower.includes(" witches'") ||
lower.includes(" crinkle") ||
lower.includes(" rosette") ||
lower.includes(" shoestring") ||
lower.includes(" tristeza") ||
lower.includes(" psorosis") ||
lower.includes(" stubborn") ||
lower.includes(" greening") ||
lower.includes(" vein banding") ||
lower.includes(" vein mottle") ||
lower.includes(" vein clearing") ||
lower.includes(" leaf pucker") ||
lower.includes(" pucker leaf") ||
lower.includes(" latent") ||
lower.includes(" motley") ||
lower.includes(" rugose")
) {
return "viral";
}
// Viral - names containing "virus" or "viroid"
if (lower.includes(" virus") || lower.includes(" viroid") || lower.includes(" virosis")) {
return "viral";
}
// Nematodes
if (
lower.includes(" nematode") ||
lower.includes(" nematodes") ||
lower.includes(" eelworm") ||
lower.includes(" root knot") ||
lower.includes(" root-knot") ||
lower.includes(" cyst ") ||
lower.includes(" dagger ") ||
lower.includes(" lance ") ||
lower.includes(" lesion ") ||
lower.includes(" ring ") ||
lower.includes(" spiral ") ||
lower.includes(" sting ") ||
lower.includes(" stubby ") ||
lower.includes(" needle ") ||
lower.includes(" foliar ") ||
lower.includes(" bulb ") ||
lower.includes(" reniform ") ||
lower.includes(" burrowing ")
) {
// Check if it's really a nematode name
if (lower.includes("nematode")) return "environmental";
}
// Fungal indicators
if (
lower.includes(" mildew") ||
lower.includes(" rust") ||
lower.includes(" smut") ||
lower.includes(" blight") ||
lower.includes(" canker") ||
lower.includes(" rot") ||
lower.includes(" scab") ||
lower.includes(" mold") ||
lower.includes(" anthracnose") ||
lower.includes(" bunt") ||
lower.includes(" ergot") ||
lower.includes(" dieback") ||
lower.includes(" scald") ||
lower.includes(" blotch") ||
lower.includes(" speckle") ||
lower.includes(" sooty") ||
lower.includes(" flyspeck") ||
lower.includes(" fusarium") ||
lower.includes(" alternaria") ||
lower.includes(" botrytis") ||
lower.includes(" rhizoctonia") ||
lower.includes(" pythium") ||
lower.includes(" phytophthora") ||
lower.includes(" sclerotinia") ||
lower.includes(" verticillium") ||
lower.includes(" ascochyta") ||
lower.includes(" cercospora") ||
lower.includes(" septoria") ||
lower.includes(" colletotrichum") ||
lower.includes(" phomopsis") ||
lower.includes(" diaporthe") ||
lower.includes(" diplodia") ||
lower.includes(" macrophomina") ||
lower.includes(" cylindrocladium") ||
lower.includes(" mycosphaerella") ||
lower.includes(" helminthosporium") ||
lower.includes(" curvularia") ||
lower.includes(" bipolaris") ||
lower.includes(" exserohilum") ||
lower.includes(" dothiorella") ||
lower.includes(" fusicoccum") ||
lower.includes(" pestalotia") ||
lower.includes(" glomerella") ||
lower.includes(" nectria") ||
lower.includes(" eutypa") ||
lower.includes(" armillaria") ||
lower.includes(" ganoderma") ||
lower.includes(" phoma") ||
lower.includes(" cladosporium") ||
lower.includes(" penicillium") ||
lower.includes(" aspergillus") ||
lower.includes(" rhizopus") ||
lower.includes(" mucor") ||
lower.includes(" downy mildew") ||
lower.includes(" powdery mildew") ||
lower.includes(" pink rot") ||
lower.includes(" pink mold") ||
lower.includes(" pink root") ||
lower.includes(" gray mold") ||
lower.includes(" grey mold") ||
lower.includes(" white rot") ||
lower.includes(" white mold") ||
lower.includes(" brown rot") ||
lower.includes(" black rot") ||
lower.includes(" soft rot") ||
lower.includes(" dry rot") ||
lower.includes(" fruit rot") ||
lower.includes(" root rot") ||
lower.includes(" stem rot") ||
lower.includes(" ear rot") ||
lower.includes(" crown rot") ||
lower.includes(" collar rot") ||
lower.includes(" pod rot") ||
lower.includes(" kernel rot") ||
lower.includes(" stalk rot") ||
lower.includes(" head rot") ||
lower.includes(" butt rot") ||
lower.includes(" stump rot") ||
lower.includes(" wood rot") ||
lower.includes(" seed rot") ||
lower.includes(" leaf spot") ||
lower.includes(" leaf blight") ||
lower.includes(" leaf blotch") ||
lower.includes(" leaf rust") ||
lower.includes(" brown spot") ||
lower.includes(" black spot") ||
lower.includes(" black leg") ||
lower.includes(" blackleg") ||
lower.includes(" black foot") ||
lower.includes(" white rust") ||
lower.includes(" white smut") ||
lower.includes(" white scab") ||
lower.includes(" tar spot") ||
lower.includes(" target spot") ||
lower.includes(" dollar spot") ||
lower.includes(" fairy ring") ||
lower.includes(" snow mold") ||
lower.includes(" pink disease") ||
lower.includes(" thread blight") ||
lower.includes(" web blight") ||
lower.includes(" sclerotial") ||
lower.includes(" sore shin") ||
lower.includes(" wart") ||
lower.includes(" scurf") ||
lower.includes(" silver scurf") ||
lower.includes(" shot hole") ||
lower.includes(" timber rot") ||
lower.includes(" cottony rot") ||
lower.includes(" watery rot") ||
lower.includes(" sour rot") ||
lower.includes(" seepage") ||
lower.includes(" bunch rot") ||
lower.includes(" noble rot") ||
lower.includes(" bitter rot") ||
lower.includes(" ripe rot") ||
lower.includes(" ring rot") ||
lower.includes(" coral spot") ||
lower.includes(" stem canker") ||
lower.includes(" branch canker") ||
lower.includes(" perennial canker") ||
lower.includes(" brand canker") ||
lower.includes(" blister canker") ||
lower.includes(" bleeding canker") ||
lower.includes(" bark canker") ||
lower.includes(" gum canker") ||
lower.includes(" collar crack") ||
lower.includes(" fasciation") ||
lower.includes(" exobasidium") ||
lower.includes(" mycorrhiza") ||
lower.includes(" lichen") ||
lower.includes(" algal") ||
lower.includes(" chlorosis") ||
lower.includes(" leaf blister") ||
lower.includes(" leaf curl")
) {
return "fungal";
}
// Physiological / environmental indicators
if (
lower.includes(" sunscald") ||
lower.includes(" sunburn") ||
lower.includes(" chilling") ||
lower.includes(" blossom end rot") ||
lower.includes(" edema") ||
lower.includes(" deficiency") ||
lower.includes(" toxicity") ||
lower.includes(" ozone") ||
lower.includes(" drought") ||
lower.includes(" frost") ||
lower.includes(" herbicide") ||
lower.includes(" pesticide") ||
lower.includes(" phytotoxicity") ||
lower.includes(" catface") ||
lower.includes(" fruit cracking") ||
lower.includes(" russeting") ||
lower.includes(" growth crack") ||
lower.includes(" mealiness") ||
lower.includes(" wind scar") ||
lower.includes(" hail") ||
lower.includes(" salt ") ||
lower.includes(" nutritional") ||
lower.includes(" mineral") ||
lower.includes(" overwatering") ||
lower.includes(" under watering") ||
lower.includes(" waterlogging") ||
lower.includes(" chemical injury") ||
lower.includes(" spray injury") ||
lower.includes(" fertilizer burn") ||
lower.includes(" lightning") ||
lower.includes(" bruising") ||
lower.includes(" pressure bruise") ||
lower.includes(" impact damage") ||
lower.includes(" transit rot")
) {
return "environmental";
}
// Insect/mite/pest indicators
if (
lower.includes(" mite") ||
lower.includes(" beetle") ||
lower.includes(" weevil") ||
lower.includes(" aphid") ||
lower.includes(" bollworm") ||
lower.includes(" leaf miner") ||
lower.includes(" mealybug") ||
lower.includes(" thrips") ||
lower.includes(" whitefly") ||
lower.includes(" caterpillar") ||
lower.includes(" sawfly") ||
lower.includes(" scale ") ||
lower.includes(" leafhopper") ||
lower.includes(" psylla") ||
lower.includes(" slug") ||
lower.includes(" snail") ||
lower.includes(" borer") ||
lower.includes(" maggot") ||
lower.includes(" grub") ||
lower.includes(" earwig") ||
lower.includes(" grasshopper")
) {
return "environmental";
}
// Fungal genus names
const fungalGenera = [
"armillaria",
"aspergillus",
"alternaria",
"botrytis",
"cercospora",
"cladosporium",
"colletotrichum",
"curvularia",
"cylindrocladium",
"diplodia",
"fusarium",
"ganoderma",
"glomerella",
"helminthosporium",
"macrophomina",
"mycosphaerella",
"nectria",
"penicillium",
"pestalotia",
"phoma",
"phomopsis",
"phytophthora",
"pythium",
"rhizoctonia",
"sclerotinia",
"septoria",
"verticillium",
"ascochyta",
"cercoseptoria",
"phaeoisariopsis",
"phaeoseptoria",
"stagonospora",
"stemphylium",
"myrothecium",
"myriogenospora",
"dactuliophora",
"dilophospora",
"coniothecium",
"coniosporium",
"cryptostictis",
"catacauma",
"botryodiplodia",
"botryosphaeria",
"cephalosporium",
"ceratocystis",
"chalara",
"choanephora",
"clitocybe",
"coprinus",
"cordana",
"corticium",
"corynespora",
"coryneum",
"cylindrocarpon",
"cylindrocladiella",
"cylindrosporium",
"cytospora",
"cytosporina",
"dematophora",
"didymella",
"dothiorella",
"drechslera",
"endothia",
"eutypa",
"eutypella",
"exobasidium",
"fusicladium",
"fusicoccum",
"gibberella",
"glomerella",
"gnomonia",
"graphiola",
"guignardia",
"hendersonia",
"hendersonula",
"hymenochaete",
"hypoxylon",
"lasiodiplodia",
"leptosphaeria",
"leucostoma",
"lophodermium",
"macrophoma",
"marasmiellus",
"marasmius",
"massaria",
"monilia",
"monosporascus",
"mystrosporium",
"neocosmospora",
"nigrospora",
"omphalia",
"ophiobolus",
"ovulinia",
"ozonium",
"panagrolaimus",
"periconia",
"pestalosphaeria",
"pestalotiopsis",
"phialophora",
"phymatotrichum",
"physalospora",
"phytophthora",
"plasmodiophora",
"plectosporium",
"polyporus",
"poria",
"pseudocercosporella",
"pseudopeziza",
"pseudoseptoria",
"puccinia",
"pyrenochaeta",
"pythium",
"ramularia",
"rhizoctonia",
"rhizopus",
"rhynchosporium",
"rosellinia",
"sclerophthora",
"sclerotinia",
"sclerotium",
"septoria",
"sphaceloma",
"sphaeropsis",
"spongospora",
"stagonospora",
"stemphylium",
"stereum",
"stigmina",
"thanatephorus",
"thielaviopsis",
"tippula",
"typhula",
"ulocladium",
"uredo",
"ustilago",
"valsa",
"venturia",
"verticillium",
"xylaria",
];
for (const genus of fungalGenera) {
if (lower.includes(genus)) return "fungal";
}
// Default to fungal (most plant diseases are fungal)
return "fungal";
}
// ─── Infer severity ───────────────────────────────────────────────────────────
function inferSeverity(name: string): Severity {
const lower = name.toLowerCase();
if (
lower.includes(" lethal") ||
lower.includes(" devastating") ||
lower.includes(" destructive") ||
lower.includes(" fatal") ||
lower.includes(" severe") ||
lower.includes(" blight") ||
lower.includes(" wilt") ||
lower.includes(" canker") ||
lower.includes(" dieback") ||
lower.includes(" decline") ||
lower.includes(" rot") ||
lower.includes(" gall") ||
lower.includes(" gummosis") ||
lower.includes(" necrosis") ||
lower.includes(" erwinia")
) {
return "high";
}
if (
lower.includes(" minor") ||
lower.includes(" mild") ||
lower.includes(" slight") ||
lower.includes(" speckle") ||
lower.includes(" fleck") ||
lower.includes(" freckle") ||
lower.includes(" chlorosis") ||
lower.includes(" translucence") ||
lower.includes(" superficial")
) {
return "low";
}
return "moderate";
}
// ─── Generate a deterministic slug ────────────────────────────────────────────
function toSlug(name: string): string {
return (
"wiki-" +
name
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-|-$/g, "")
.replace(/-+/g, "-")
);
}
// ─── Main ─────────────────────────────────────────────────────────────────────
async function main() {
const db = getDb();
// 1. Get existing disease names from DB
const existingDiseases = await db.select({ name: diseases.name }).from(diseases);
const existingNames = new Set(existingDiseases.map((d) => d.name.toLowerCase().trim()));
console.log(`Existing diseases in DB: ${existingNames.size}`);
// 2. Parse the comprehensive list
const entries = parseComprehensiveList("/tmp/plant_diseases/plant_diseases_comprehensive.txt");
console.log(`Total entries in comprehensive file: ${entries.length}`);
// 3. Find or create catch-all plants
for (const plantId of ["general", "unknown"]) {
const existing = await db.select().from(plants).where(eq(plants.id, plantId)).get();
if (!existing) {
console.log(`Creating '${plantId}' plant for catch-all diseases...`);
await db.insert(plants).values({
id: plantId,
commonName: plantId === "general" ? "General (Multiple Plants)" : "Unknown Plant",
scientificName: "Various",
family: "Various",
category: "houseplant",
careSummary:
plantId === "general"
? "General plant diseases affecting multiple species."
: "Plant disease with unknown host plant.",
imageUrl: "",
});
console.log(`Created '${plantId}' plant.`);
}
}
// 4. Filter new entries (deduplicate within file + against DB)
const newEntries: DiseaseEntry[] = [];
const skipped: string[] = [];
const seen = new Set<string>();
for (const entry of entries) {
const key = entry.name.toLowerCase().trim();
if (seen.has(key)) continue;
seen.add(key);
if (existingNames.has(key)) {
skipped.push(entry.name);
} else {
newEntries.push(entry);
}
}
console.log(`\nNew entries to insert: ${newEntries.length}`);
console.log(`Already existing (skipped): ${skipped.length}`);
if (skipped.length > 0) {
console.log(`\nFirst 10 skipped (of ${skipped.length}):`);
skipped.slice(0, 10).forEach((s) => console.log(` - ${s}`));
}
// 5. Insert new entries in batches
if (newEntries.length === 0) {
console.log("\n✅ No new diseases to insert.");
closeDb();
return;
}
const BATCH_SIZE = 50;
let inserted = 0;
let errors = 0;
for (let i = 0; i < newEntries.length; i += BATCH_SIZE) {
const batch = newEntries.slice(i, i + BATCH_SIZE);
const values = batch.map((entry) => {
const causalAgent = inferCausalAgent(entry.name);
const severity = inferSeverity(entry.name);
return {
id: toSlug(entry.name),
plantId: "general",
name: entry.name,
scientificName: "",
causalAgentType: causalAgent,
description: `A plant disease known as "${entry.name}". Source: Wikipedia.`,
symptoms: [],
causes: [],
treatment: [],
prevention: [],
lookalikeIds: [],
severity,
sourceUrl: entry.sourceUrl,
imageUrl: "",
};
});
try {
await db.insert(diseases).values(values).onConflictDoNothing();
inserted += values.length;
} catch (err) {
// Fall back to individual inserts for this batch if batch fails
console.log(` Batch failed, trying individually...`);
for (const val of values) {
try {
await db.insert(diseases).values(val).onConflictDoNothing();
inserted++;
} catch (e2) {
// If it's a duplicate key, count it as skipped
if (String(e2).includes("UNIQUE") || String(e2).includes("duplicate")) {
// Already handled by onConflictDoNothing, shouldn't happen
inserted++;
} else {
console.error(` Error inserting "${val.name}":`, e2);
errors++;
}
}
}
}
if ((i + BATCH_SIZE) % 200 === 0 || i + BATCH_SIZE >= newEntries.length) {
console.log(
` Progress: ${Math.min(i + BATCH_SIZE, newEntries.length)}/${newEntries.length} (${inserted} inserted, ${errors} errors)`,
);
}
}
// 6. Summary
const totalDiseases = await db
.select({ count: sql<number>`COUNT(*)` })
.from(diseases)
.get();
const totalPlants = await db
.select({ count: sql<number>`COUNT(*)` })
.from(plants)
.get();
console.log(`\n📊 Results:`);
console.log(` Inserted: ${inserted}`);
console.log(` Errors: ${errors}`);
console.log(` Skipped (already existed): ${skipped.length}`);
console.log(`\n📊 Database now has:`);
console.log(` ${totalPlants?.count ?? 0} plants`);
console.log(` ${totalDiseases?.count ?? 0} diseases`);
closeDb();
}
main().catch((err) => {
console.error("❌ Failed:", err);
process.exit(1);
});