#!/usr/bin/env node /** * fix-classifications.ts — Fix misclassified diseases in the DB. * * Fixes: * 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.) * that are incorrectly tagged as "fungal" * 2. Other suspicious patterns * * Usage: cd apps/web && npx tsx scripts/fix-classifications.ts */ import { readFileSync } from "fs"; import { resolve } from "path"; // Manually load .env.development const envPath = resolve(__dirname, "../.env.development"); try { const env = readFileSync(envPath, "utf-8"); for (const line of env.split("\n")) { const trimmed = line.trim(); if (trimmed && !trimmed.startsWith("#")) { const eqIdx = trimmed.indexOf("="); if (eqIdx > 0) { const key = trimmed.slice(0, eqIdx).trim(); const val = trimmed.slice(eqIdx + 1).trim(); if (!process.env[key]) process.env[key] = val; } } } } catch {} import { getDb, closeDb } from "../src/lib/db/index"; import { diseases } from "../src/lib/db/schema"; import { createClient } from "@libsql/client"; type AgentType = "fungal" | "bacterial" | "viral" | "environmental"; interface FixRule { test: (name: string) => boolean; correctAgent: AgentType; reason: string; } const FIX_RULES: FixRule[] = [ // Diseases explicitly named as "virus" or "viral" { test: (name) => /\b(virus|viral|viroid)\b/i.test(name), correctAgent: "viral", reason: "Name explicitly indicates viral disease", }, // Potexvirus, carlavirus, etc. { test: (name) => /\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test( name, ), correctAgent: "viral", reason: "Recognized virus genus in name", }, // "Mosaic" diseases (typically viral) { test: (name) => /\bmosaic\b/i.test(name), correctAgent: "viral", reason: "Mosaic symptoms are typically caused by viruses", }, // "Mottle" diseases (typically viral) { test: (name) => /\bmottle\b/i.test(name), correctAgent: "viral", reason: "Mottle symptoms are typically caused by viruses", }, // "Ringspot" diseases (typically viral) { test: (name) => /\bringspot\b/i.test(name), correctAgent: "viral", reason: "Ringspot symptoms are typically caused by viruses", }, // "Leaf curl" (many are viral) { test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name), correctAgent: "viral", reason: "Leaf curl/roll diseases are often viral", }, // "Rosette" (often viral or phytoplasma) { test: (name) => /\brosette\b/i.test(name), correctAgent: "viral", reason: "Rosette diseases are typically viral or phytoplasma", }, // "Yellows" (often phytoplasma/viral) { test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name), correctAgent: "viral", reason: "Yellows diseases are typically phytoplasma or viral", }, // "Stunt" / "Dwarf" (often viral) { test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name), correctAgent: "viral", reason: "Stunting/dwarfing diseases are often viral", }, // Explicit bacterial in name { test: (name) => /\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test( name, ), correctAgent: "bacterial", reason: "Name indicates bacterial disease", }, // Environmental/abiotic indicators { test: (name) => /\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test( name, ), correctAgent: "environmental", reason: "Name indicates abiotic/environmental cause", }, ]; async function main() { console.log("šŸ” Fixing disease classifications\n"); const db = getDb(); const allDiseases = await db .select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType }) .from(diseases) .all(); console.log(`šŸ“‹ ${allDiseases.length} total diseases\n`); const rawClient = createClient({ url: process.env.DATABASE_URL!, authToken: process.env.DATABASE_TOKEN!, }); const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = []; for (const d of allDiseases) { for (const rule of FIX_RULES) { if (rule.test(d.name)) { if (d.causalAgentType !== rule.correctAgent) { updates.push({ id: d.id, newAgent: rule.correctAgent, rule, oldAgent: d.causalAgentType, }); } break; // First matching rule wins } } } console.log(`Found ${updates.length} diseases needing reclassification:\n`); // Group by correction type const grouped: Record = {}; for (const u of updates) { const key = `${u.oldAgent}→${u.newAgent}`; if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] }; grouped[key].items.push(` ${u.id}`); } for (const [, g] of Object.entries(grouped)) { console.log(`${g.from} → ${g.to} (${g.items.length} diseases):`); g.items.slice(0, 10).forEach((l) => console.log(l)); if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`); console.log(); } // Apply updates if (updates.length === 0) { console.log("āœ… No corrections needed"); } else { console.log(`Applying ${updates.length} corrections...\n`); // Batch update in groups of 50 for (let i = 0; i < updates.length; i += 50) { const batch = updates.slice(i, i + 50); await rawClient.batch( batch.map((u) => ({ sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?", args: [u.newAgent, u.id], })), "write", ); process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`); } console.log(`\nāœ… ${updates.length} diseases reclassified`); } // Print summary stats const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all(); const counts: Record = {}; after.forEach((d) => { counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1; }); console.log("\nšŸ“Š Updated distribution:"); for (const [type, count] of Object.entries(counts).sort()) { console.log(` ${type}: ${count}`); } rawClient.close(); closeDb(); } main().catch((err) => { console.error("\nāŒ", err); process.exit(1); });