Files
plant-disease-id/apps/web/scripts/fix-classifications.ts
2026-06-06 15:09:46 -04:00

213 lines
6.5 KiB
JavaScript

#!/usr/bin/env node
/**
* fix-classifications.ts — Fix misclassified diseases in the DB.
*
* Fixes:
* 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.)
* that are incorrectly tagged as "fungal"
* 2. Other suspicious patterns
*
* Usage: cd apps/web && npx tsx scripts/fix-classifications.ts
*/
import { readFileSync } from "fs";
import { resolve } from "path";
// Manually load .env.development
const envPath = resolve(__dirname, "../.env.development");
try {
const env = readFileSync(envPath, "utf-8");
for (const line of env.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const eqIdx = trimmed.indexOf("=");
if (eqIdx > 0) {
const key = trimmed.slice(0, eqIdx).trim();
const val = trimmed.slice(eqIdx + 1).trim();
if (!process.env[key]) process.env[key] = val;
}
}
}
} catch {}
import { getDb, closeDb } from "../src/lib/db/index";
import { diseases } from "../src/lib/db/schema";
import { createClient } from "@libsql/client";
type AgentType = "fungal" | "bacterial" | "viral" | "environmental";
interface FixRule {
test: (name: string) => boolean;
correctAgent: AgentType;
reason: string;
}
const FIX_RULES: FixRule[] = [
// Diseases explicitly named as "virus" or "viral"
{
test: (name) => /\b(virus|viral|viroid)\b/i.test(name),
correctAgent: "viral",
reason: "Name explicitly indicates viral disease",
},
// Potexvirus, carlavirus, etc.
{
test: (name) =>
/\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test(
name,
),
correctAgent: "viral",
reason: "Recognized virus genus in name",
},
// "Mosaic" diseases (typically viral)
{
test: (name) => /\bmosaic\b/i.test(name),
correctAgent: "viral",
reason: "Mosaic symptoms are typically caused by viruses",
},
// "Mottle" diseases (typically viral)
{
test: (name) => /\bmottle\b/i.test(name),
correctAgent: "viral",
reason: "Mottle symptoms are typically caused by viruses",
},
// "Ringspot" diseases (typically viral)
{
test: (name) => /\bringspot\b/i.test(name),
correctAgent: "viral",
reason: "Ringspot symptoms are typically caused by viruses",
},
// "Leaf curl" (many are viral)
{
test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name),
correctAgent: "viral",
reason: "Leaf curl/roll diseases are often viral",
},
// "Rosette" (often viral or phytoplasma)
{
test: (name) => /\brosette\b/i.test(name),
correctAgent: "viral",
reason: "Rosette diseases are typically viral or phytoplasma",
},
// "Yellows" (often phytoplasma/viral)
{
test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name),
correctAgent: "viral",
reason: "Yellows diseases are typically phytoplasma or viral",
},
// "Stunt" / "Dwarf" (often viral)
{
test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name),
correctAgent: "viral",
reason: "Stunting/dwarfing diseases are often viral",
},
// Explicit bacterial in name
{
test: (name) =>
/\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test(
name,
),
correctAgent: "bacterial",
reason: "Name indicates bacterial disease",
},
// Environmental/abiotic indicators
{
test: (name) =>
/\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test(
name,
),
correctAgent: "environmental",
reason: "Name indicates abiotic/environmental cause",
},
];
async function main() {
console.log("🔍 Fixing disease classifications\n");
const db = getDb();
const allDiseases = await db
.select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType })
.from(diseases)
.all();
console.log(`📋 ${allDiseases.length} total diseases\n`);
const rawClient = createClient({
url: process.env.DATABASE_URL!,
authToken: process.env.DATABASE_TOKEN!,
});
const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = [];
for (const d of allDiseases) {
for (const rule of FIX_RULES) {
if (rule.test(d.name)) {
if (d.causalAgentType !== rule.correctAgent) {
updates.push({
id: d.id,
newAgent: rule.correctAgent,
rule,
oldAgent: d.causalAgentType,
});
}
break; // First matching rule wins
}
}
}
console.log(`Found ${updates.length} diseases needing reclassification:\n`);
// Group by correction type
const grouped: Record<string, { from: string; to: string; items: string[] }> = {};
for (const u of updates) {
const key = `${u.oldAgent}${u.newAgent}`;
if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] };
grouped[key].items.push(` ${u.id}`);
}
for (const [, g] of Object.entries(grouped)) {
console.log(`${g.from}${g.to} (${g.items.length} diseases):`);
g.items.slice(0, 10).forEach((l) => console.log(l));
if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`);
console.log();
}
// Apply updates
if (updates.length === 0) {
console.log("✅ No corrections needed");
} else {
console.log(`Applying ${updates.length} corrections...\n`);
// Batch update in groups of 50
for (let i = 0; i < updates.length; i += 50) {
const batch = updates.slice(i, i + 50);
await rawClient.batch(
batch.map((u) => ({
sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?",
args: [u.newAgent, u.id],
})),
"write",
);
process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`);
}
console.log(`\n${updates.length} diseases reclassified`);
}
// Print summary stats
const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all();
const counts: Record<string, number> = {};
after.forEach((d) => {
counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1;
});
console.log("\n📊 Updated distribution:");
for (const [type, count] of Object.entries(counts).sort()) {
console.log(` ${type}: ${count}`);
}
rawClient.close();
closeDb();
}
main().catch((err) => {
console.error("\n❌", err);
process.exit(1);
});