beepboop
This commit is contained in:
212
apps/web/scripts/fix-classifications.ts
Normal file
212
apps/web/scripts/fix-classifications.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* fix-classifications.ts — Fix misclassified diseases in the DB.
|
||||
*
|
||||
* Fixes:
|
||||
* 1. Diseases named with viral indicators (mosaic, mottle, ringspot, virus, etc.)
|
||||
* that are incorrectly tagged as "fungal"
|
||||
* 2. Other suspicious patterns
|
||||
*
|
||||
* Usage: cd apps/web && npx tsx scripts/fix-classifications.ts
|
||||
*/
|
||||
|
||||
import { readFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
|
||||
// Manually load .env.development
|
||||
const envPath = resolve(__dirname, "../.env.development");
|
||||
try {
|
||||
const env = readFileSync(envPath, "utf-8");
|
||||
for (const line of env.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed && !trimmed.startsWith("#")) {
|
||||
const eqIdx = trimmed.indexOf("=");
|
||||
if (eqIdx > 0) {
|
||||
const key = trimmed.slice(0, eqIdx).trim();
|
||||
const val = trimmed.slice(eqIdx + 1).trim();
|
||||
if (!process.env[key]) process.env[key] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
import { getDb, closeDb } from "../src/lib/db/index";
|
||||
import { diseases } from "../src/lib/db/schema";
|
||||
import { createClient } from "@libsql/client";
|
||||
|
||||
type AgentType = "fungal" | "bacterial" | "viral" | "environmental";
|
||||
|
||||
interface FixRule {
|
||||
test: (name: string) => boolean;
|
||||
correctAgent: AgentType;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
const FIX_RULES: FixRule[] = [
|
||||
// Diseases explicitly named as "virus" or "viral"
|
||||
{
|
||||
test: (name) => /\b(virus|viral|viroid)\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Name explicitly indicates viral disease",
|
||||
},
|
||||
// Potexvirus, carlavirus, etc.
|
||||
{
|
||||
test: (name) =>
|
||||
/\b(virus\b|potex|carla|tobamo|poty|cucumo|ilar|nepo|tymovirus|geminivir|tom bushy stunt)\b/i.test(
|
||||
name,
|
||||
),
|
||||
correctAgent: "viral",
|
||||
reason: "Recognized virus genus in name",
|
||||
},
|
||||
// "Mosaic" diseases (typically viral)
|
||||
{
|
||||
test: (name) => /\bmosaic\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Mosaic symptoms are typically caused by viruses",
|
||||
},
|
||||
// "Mottle" diseases (typically viral)
|
||||
{
|
||||
test: (name) => /\bmottle\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Mottle symptoms are typically caused by viruses",
|
||||
},
|
||||
// "Ringspot" diseases (typically viral)
|
||||
{
|
||||
test: (name) => /\bringspot\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Ringspot symptoms are typically caused by viruses",
|
||||
},
|
||||
// "Leaf curl" (many are viral)
|
||||
{
|
||||
test: (name) => /\bleaf curl\b|\bleafroll\b|\bleaf-roll\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Leaf curl/roll diseases are often viral",
|
||||
},
|
||||
// "Rosette" (often viral or phytoplasma)
|
||||
{
|
||||
test: (name) => /\brosette\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Rosette diseases are typically viral or phytoplasma",
|
||||
},
|
||||
// "Yellows" (often phytoplasma/viral)
|
||||
{
|
||||
test: (name) => /\byellows\b/i.test(name) && !/\bpeach\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Yellows diseases are typically phytoplasma or viral",
|
||||
},
|
||||
// "Stunt" / "Dwarf" (often viral)
|
||||
{
|
||||
test: (name) => /\b(stunt|dwarf(ism)?)\b/i.test(name),
|
||||
correctAgent: "viral",
|
||||
reason: "Stunting/dwarfing diseases are often viral",
|
||||
},
|
||||
// Explicit bacterial in name
|
||||
{
|
||||
test: (name) =>
|
||||
/\bbacterial\b|\bbacterium\b|\berwinia\b|\bpseudomonas\b|\bxanthomonas\b|\bralstonia\b|\bclavibacter\b|\bstreptomyces\b|\bagrobacterium\b/i.test(
|
||||
name,
|
||||
),
|
||||
correctAgent: "bacterial",
|
||||
reason: "Name indicates bacterial disease",
|
||||
},
|
||||
// Environmental/abiotic indicators
|
||||
{
|
||||
test: (name) =>
|
||||
/\b(deficiency|abiotic|environmental|injury|damage|stress|sunscald|sunburn|chilling|freeze|frost|wind|hail|nutrient|toxicity|snow\s+(mold|scald)|winter\s+(injury|rot|kill))\b/i.test(
|
||||
name,
|
||||
),
|
||||
correctAgent: "environmental",
|
||||
reason: "Name indicates abiotic/environmental cause",
|
||||
},
|
||||
];
|
||||
|
||||
async function main() {
|
||||
console.log("🔍 Fixing disease classifications\n");
|
||||
const db = getDb();
|
||||
const allDiseases = await db
|
||||
.select({ id: diseases.id, name: diseases.name, causalAgentType: diseases.causalAgentType })
|
||||
.from(diseases)
|
||||
.all();
|
||||
console.log(`📋 ${allDiseases.length} total diseases\n`);
|
||||
|
||||
const rawClient = createClient({
|
||||
url: process.env.DATABASE_URL!,
|
||||
authToken: process.env.DATABASE_TOKEN!,
|
||||
});
|
||||
|
||||
const updates: { id: string; newAgent: AgentType; rule: FixRule; oldAgent: string }[] = [];
|
||||
|
||||
for (const d of allDiseases) {
|
||||
for (const rule of FIX_RULES) {
|
||||
if (rule.test(d.name)) {
|
||||
if (d.causalAgentType !== rule.correctAgent) {
|
||||
updates.push({
|
||||
id: d.id,
|
||||
newAgent: rule.correctAgent,
|
||||
rule,
|
||||
oldAgent: d.causalAgentType,
|
||||
});
|
||||
}
|
||||
break; // First matching rule wins
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Found ${updates.length} diseases needing reclassification:\n`);
|
||||
|
||||
// Group by correction type
|
||||
const grouped: Record<string, { from: string; to: string; items: string[] }> = {};
|
||||
for (const u of updates) {
|
||||
const key = `${u.oldAgent}→${u.newAgent}`;
|
||||
if (!grouped[key]) grouped[key] = { from: u.oldAgent, to: u.newAgent, items: [] };
|
||||
grouped[key].items.push(` ${u.id}`);
|
||||
}
|
||||
|
||||
for (const [, g] of Object.entries(grouped)) {
|
||||
console.log(`${g.from} → ${g.to} (${g.items.length} diseases):`);
|
||||
g.items.slice(0, 10).forEach((l) => console.log(l));
|
||||
if (g.items.length > 10) console.log(` ... and ${g.items.length - 10} more`);
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Apply updates
|
||||
if (updates.length === 0) {
|
||||
console.log("✅ No corrections needed");
|
||||
} else {
|
||||
console.log(`Applying ${updates.length} corrections...\n`);
|
||||
|
||||
// Batch update in groups of 50
|
||||
for (let i = 0; i < updates.length; i += 50) {
|
||||
const batch = updates.slice(i, i + 50);
|
||||
await rawClient.batch(
|
||||
batch.map((u) => ({
|
||||
sql: "UPDATE diseases SET causal_agent_type = ?, updated_at = datetime('now') WHERE id = ?",
|
||||
args: [u.newAgent, u.id],
|
||||
})),
|
||||
"write",
|
||||
);
|
||||
process.stdout.write(` ${Math.min(i + 50, updates.length)}/${updates.length}\n`);
|
||||
}
|
||||
|
||||
console.log(`\n✅ ${updates.length} diseases reclassified`);
|
||||
}
|
||||
|
||||
// Print summary stats
|
||||
const after = await db.select({ causalAgentType: diseases.causalAgentType }).from(diseases).all();
|
||||
const counts: Record<string, number> = {};
|
||||
after.forEach((d) => {
|
||||
counts[d.causalAgentType] = (counts[d.causalAgentType] || 0) + 1;
|
||||
});
|
||||
console.log("\n📊 Updated distribution:");
|
||||
for (const [type, count] of Object.entries(counts).sort()) {
|
||||
console.log(` ${type}: ${count}`);
|
||||
}
|
||||
|
||||
rawClient.close();
|
||||
closeDb();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("\n❌", err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user