213 lines
6.5 KiB
JavaScript
213 lines
6.5 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* fix-classifications.ts — Fix misclassified diseases in the DB.
|
|
*
|
|
* Fixes:
|
|
* 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.)
|
|
* that are incorrectly tagged as "fungal"
|
|
* 2. Other suspicious patterns
|
|
*
|
|
* Usage: cd apps/web && npx tsx scripts/fix-classifications.ts
|
|
*/
|
|
|
|
import { readFileSync } from "fs";
|
|
import { resolve } from "path";
|
|
|
|
// Manually load .env.development
|
|
const envPath = resolve(__dirname, "../.env.development");
|
|
try {
|
|
const env = readFileSync(envPath, "utf-8");
|
|
for (const line of env.split("\n")) {
|
|
const trimmed = line.trim();
|
|
if (trimmed && !trimmed.startsWith("#")) {
|
|
const eqIdx = trimmed.indexOf("=");
|
|
if (eqIdx > 0) {
|
|
const key = trimmed.slice(0, eqIdx).trim();
|
|
const val = trimmed.slice(eqIdx + 1).trim();
|
|
if (!process.env[key]) process.env[key] = val;
|
|
}
|
|
}
|
|
}
|
|
} catch {}
|
|
|
|
import { getDb, closeDb } from "../src/lib/db/index";
|
|
import { diseases } from "../src/lib/db/schema";
|
|
import { createClient } from "@libsql/client";
|
|
|
|
type AgentType = "fungal" | "bacterial" | "viral" | "environmental";
|
|
|
|
interface FixRule {
|
|
test: (name: string) => boolean;
|
|
correctAgent: AgentType;
|
|
reason: string;
|
|
}
|
|
|
|
const FIX_RULES: FixRule[] = [
|
|
// Diseases explicitly named as "virus" or "viral"
|
|
{
|
|
test: (name) => /\b(virus|viral|viroid)\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Name explicitly indicates viral disease",
|
|
},
|
|
// Potexvirus, carlavirus, etc.
|
|
{
|
|
test: (name) =>
|
|
/\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test(
|
|
name,
|
|
),
|
|
correctAgent: "viral",
|
|
reason: "Recognized virus genus in name",
|
|
},
|
|
// "Mosaic" diseases (typically viral)
|
|
{
|
|
test: (name) => /\bmosaic\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Mosaic symptoms are typically caused by viruses",
|
|
},
|
|
// "Mottle" diseases (typically viral)
|
|
{
|
|
test: (name) => /\bmottle\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Mottle symptoms are typically caused by viruses",
|
|
},
|
|
// "Ringspot" diseases (typically viral)
|
|
{
|
|
test: (name) => /\bringspot\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Ringspot symptoms are typically caused by viruses",
|
|
},
|
|
// "Leaf curl" (many are viral)
|
|
{
|
|
test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Leaf curl/roll diseases are often viral",
|
|
},
|
|
// "Rosette" (often viral or phytoplasma)
|
|
{
|
|
test: (name) => /\brosette\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Rosette diseases are typically viral or phytoplasma",
|
|
},
|
|
// "Yellows" (often phytoplasma/viral)
|
|
{
|
|
test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Yellows diseases are typically phytoplasma or viral",
|
|
},
|
|
// "Stunt" / "Dwarf" (often viral)
|
|
{
|
|
test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name),
|
|
correctAgent: "viral",
|
|
reason: "Stunting/dwarfing diseases are often viral",
|
|
},
|
|
// Explicit bacterial in name
|
|
{
|
|
test: (name) =>
|
|
/\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test(
|
|
name,
|
|
),
|
|
correctAgent: "bacterial",
|
|
reason: "Name indicates bacterial disease",
|
|
},
|
|
// Environmental/abiotic indicators
|
|
{
|
|
test: (name) =>
|
|
/\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test(
|
|
name,
|
|
),
|
|
correctAgent: "environmental",
|
|
reason: "Name indicates abiotic/environmental cause",
|
|
},
|
|
];
|
|
|
|
async function main() {
|
|
console.log("🔍 Fixing disease classifications\n");
|
|
const db = getDb();
|
|
const allDiseases = await db
|
|
.select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType })
|
|
.from(diseases)
|
|
.all();
|
|
console.log(`📋 ${allDiseases.length} total diseases\n`);
|
|
|
|
const rawClient = createClient({
|
|
url: process.env.DATABASE_URL!,
|
|
authToken: process.env.DATABASE_TOKEN!,
|
|
});
|
|
|
|
const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = [];
|
|
|
|
for (const d of allDiseases) {
|
|
for (const rule of FIX_RULES) {
|
|
if (rule.test(d.name)) {
|
|
if (d.causalAgentType !== rule.correctAgent) {
|
|
updates.push({
|
|
id: d.id,
|
|
newAgent: rule.correctAgent,
|
|
rule,
|
|
oldAgent: d.causalAgentType,
|
|
});
|
|
}
|
|
break; // First matching rule wins
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Found ${updates.length} diseases needing reclassification:\n`);
|
|
|
|
// Group by correction type
|
|
const grouped: Record<string, { from: string; to: string; items: string[] }> = {};
|
|
for (const u of updates) {
|
|
const key = `${u.oldAgent}→${u.newAgent}`;
|
|
if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] };
|
|
grouped[key].items.push(` ${u.id}`);
|
|
}
|
|
|
|
for (const [, g] of Object.entries(grouped)) {
|
|
console.log(`${g.from} → ${g.to} (${g.items.length} diseases):`);
|
|
g.items.slice(0, 10).forEach((l) => console.log(l));
|
|
if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`);
|
|
console.log();
|
|
}
|
|
|
|
// Apply updates
|
|
if (updates.length === 0) {
|
|
console.log("✅ No corrections needed");
|
|
} else {
|
|
console.log(`Applying ${updates.length} corrections...\n`);
|
|
|
|
// Batch update in groups of 50
|
|
for (let i = 0; i < updates.length; i += 50) {
|
|
const batch = updates.slice(i, i + 50);
|
|
await rawClient.batch(
|
|
batch.map((u) => ({
|
|
sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?",
|
|
args: [u.newAgent, u.id],
|
|
})),
|
|
"write",
|
|
);
|
|
process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`);
|
|
}
|
|
|
|
console.log(`\n✅ ${updates.length} diseases reclassified`);
|
|
}
|
|
|
|
// Print summary stats
|
|
const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all();
|
|
const counts: Record<string, number> = {};
|
|
after.forEach((d) => {
|
|
counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1;
|
|
});
|
|
console.log("\n📊 Updated distribution:");
|
|
for (const [type, count] of Object.entries(counts).sort()) {
|
|
console.log(` ${type}: ${count}`);
|
|
}
|
|
|
|
rawClient.close();
|
|
closeDb();
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("\n❌", err);
|
|
process.exit(1);
|
|
});
|