Compare commits
3 Commits
06295c83ca
...
96de91e86c
| Author | SHA1 | Date | |
|---|---|---|---|
| 96de91e86c | |||
| db4c656730 | |||
| 47609e5e42 |
1
apps/web/data/.gitignore
vendored
Normal file
1
apps/web/data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
dataset
|
||||
14
apps/web/drizzle/0004_add-flagged-content.sql
Normal file
14
apps/web/drizzle/0004_add-flagged-content.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
CREATE TABLE `flagged_content` (
|
||||
`id` text PRIMARY KEY NOT NULL,
|
||||
`content_type` text NOT NULL,
|
||||
`content_id` text NOT NULL,
|
||||
`field_name` text NOT NULL,
|
||||
`notes` text DEFAULT '',
|
||||
`flag_count` integer DEFAULT 1 NOT NULL,
|
||||
`created_at` text DEFAULT (datetime('now')) NOT NULL,
|
||||
`updated_at` text DEFAULT (datetime('now')) NOT NULL
|
||||
);
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX `idx_flagged_content_type` ON `flagged_content` (`content_type`);
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX `idx_flagged_content_id` ON `flagged_content` (`content_id`);
|
||||
469
apps/web/drizzle/meta/0004_snapshot.json
Normal file
469
apps/web/drizzle/meta/0004_snapshot.json
Normal file
@@ -0,0 +1,469 @@
|
||||
{
|
||||
"version": "6",
|
||||
"dialect": "sqlite",
|
||||
"id": "04ff83bd-e207-44d3-b8b7-8f82157bbeb9",
|
||||
"prevId": "04ff83bd-e207-44d3-b8b7-8f82157bbeb8",
|
||||
"tables": {
|
||||
"diseases": {
|
||||
"name": "diseases",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"plant_id": {
|
||||
"name": "plant_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"scientific_name": {
|
||||
"name": "scientific_name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"causal_agent_type": {
|
||||
"name": "causal_agent_type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"symptoms": {
|
||||
"name": "symptoms",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"causes": {
|
||||
"name": "causes",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"treatment": {
|
||||
"name": "treatment",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"prevention": {
|
||||
"name": "prevention",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"lookalike_ids": {
|
||||
"name": "lookalike_ids",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"prevalence": {
|
||||
"name": "prevalence",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'uncommon'"
|
||||
},
|
||||
"severity": {
|
||||
"name": "severity",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"source_url": {
|
||||
"name": "source_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_diseases_plant_id": {
|
||||
"name": "idx_diseases_plant_id",
|
||||
"columns": ["plant_id"],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_diseases_causal_agent": {
|
||||
"name": "idx_diseases_causal_agent",
|
||||
"columns": ["causal_agent_type"],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_diseases_severity": {
|
||||
"name": "idx_diseases_severity",
|
||||
"columns": ["severity"],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_diseases_prevalence": {
|
||||
"name": "idx_diseases_prevalence",
|
||||
"columns": ["prevalence"],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"diseases_plant_id_plants_id_fk": {
|
||||
"name": "diseases_plant_id_plants_id_fk",
|
||||
"tableFrom": "diseases",
|
||||
"tableTo": "plants",
|
||||
"columnsFrom": ["plant_id"],
|
||||
"columnsTo": ["id"],
|
||||
"onDelete": "no action",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"flagged_content": {
|
||||
"name": "flagged_content",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content_type": {
|
||||
"name": "content_type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content_id": {
|
||||
"name": "content_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"field_name": {
|
||||
"name": "field_name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"notes": {
|
||||
"name": "notes",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"flag_count": {
|
||||
"name": "flag_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": 1
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_flagged_content_type": {
|
||||
"name": "idx_flagged_content_type",
|
||||
"columns": ["content_type"],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_flagged_content_id": {
|
||||
"name": "idx_flagged_content_id",
|
||||
"columns": ["content_id"],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"plant_views": {
|
||||
"name": "plant_views",
|
||||
"columns": {
|
||||
"plant_id": {
|
||||
"name": "plant_id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"view_count": {
|
||||
"name": "view_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_plant_views_count": {
|
||||
"name": "idx_plant_views_count",
|
||||
"columns": ["view_count"],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"plant_views_plant_id_plants_id_fk": {
|
||||
"name": "plant_views_plant_id_plants_id_fk",
|
||||
"tableFrom": "plant_views",
|
||||
"tableTo": "plants",
|
||||
"columnsFrom": ["plant_id"],
|
||||
"columnsTo": ["id"],
|
||||
"onDelete": "no action",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"plants": {
|
||||
"name": "plants",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"common_name": {
|
||||
"name": "common_name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"scientific_name": {
|
||||
"name": "scientific_name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"family": {
|
||||
"name": "family",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"category": {
|
||||
"name": "category",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"care_summary": {
|
||||
"name": "care_summary",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "''"
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_plants_category": {
|
||||
"name": "idx_plants_category",
|
||||
"columns": ["category"],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_plants_common_name": {
|
||||
"name": "idx_plants_common_name",
|
||||
"columns": ["common_name"],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"scrape_sources": {
|
||||
"name": "scrape_sources",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source_type": {
|
||||
"name": "source_type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source_url": {
|
||||
"name": "source_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_scraped_at": {
|
||||
"name": "last_scraped_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"entries_count": {
|
||||
"name": "entries_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"status": {
|
||||
"name": "status",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"error_message": {
|
||||
"name": "error_message",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "(datetime('now'))"
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
}
|
||||
},
|
||||
"views": {},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
},
|
||||
"internal": {
|
||||
"indexes": {}
|
||||
}
|
||||
}
|
||||
@@ -29,6 +29,13 @@
|
||||
"when": 1749268800000,
|
||||
"tag": "0003_giant_toad",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 4,
|
||||
"version": "6",
|
||||
"when": 1751846400000,
|
||||
"tag": "0004_add-flagged-content",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -8,7 +8,10 @@
|
||||
"start": "next start",
|
||||
"lint": "eslint",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest"
|
||||
"test:watch": "vitest",
|
||||
"flagged-report": "npx tsx scripts/generate-flagged-report.ts",
|
||||
"flagged-report:all": "npx tsx scripts/generate-flagged-report.ts --min-flags=1",
|
||||
"migrate:flag-system": "npx tsx scripts/apply-flag-migration.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@libsql/client": "^0.17.3",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
11
apps/web/scripts/.flagged-content-review-needed.md
Normal file
11
apps/web/scripts/.flagged-content-review-needed.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# 🚩 Flagged Content Review — Nothing to Review
|
||||
|
||||
Generated: 2026-06-06T21:02:03.301Z
|
||||
|
||||
**No content has been flagged for review yet.**
|
||||
|
||||
Flagged items will appear here once users flag content for manual review.
|
||||
|
||||
---
|
||||
|
||||
_Report generated with min-flags=1_
|
||||
53
apps/web/scripts/apply-flag-migration.ts
Normal file
53
apps/web/scripts/apply-flag-migration.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* apply-flag-migration.ts
|
||||
*
|
||||
* Applies the flagged_content table migration to Turso.
|
||||
* Run with: npx tsx scripts/apply-flag-migration.ts
|
||||
*/
|
||||
|
||||
import dotenv from "dotenv";
|
||||
import path from "node:path";
|
||||
|
||||
const envFile =
|
||||
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
|
||||
dotenv.config({ path: path.resolve(__dirname, envFile) });
|
||||
|
||||
import { createClient } from "@libsql/client";
|
||||
|
||||
async function main() {
|
||||
const db = createClient({
|
||||
url: process.env.DATABASE_URL!,
|
||||
authToken: process.env.DATABASE_TOKEN!,
|
||||
});
|
||||
|
||||
console.log("Applying migration: create flagged_content table...");
|
||||
|
||||
await db.execute(`
|
||||
CREATE TABLE IF NOT EXISTS flagged_content (
|
||||
id text PRIMARY KEY NOT NULL,
|
||||
content_type text NOT NULL,
|
||||
content_id text NOT NULL,
|
||||
field_name text NOT NULL,
|
||||
notes text DEFAULT '',
|
||||
flag_count integer DEFAULT 1 NOT NULL,
|
||||
created_at text DEFAULT (datetime('now')) NOT NULL,
|
||||
updated_at text DEFAULT (datetime('now')) NOT NULL
|
||||
)
|
||||
`);
|
||||
|
||||
await db.execute(`
|
||||
CREATE INDEX IF NOT EXISTS idx_flagged_content_type ON flagged_content (content_type)
|
||||
`);
|
||||
|
||||
await db.execute(`
|
||||
CREATE INDEX IF NOT EXISTS idx_flagged_content_id ON flagged_content (content_id)
|
||||
`);
|
||||
|
||||
console.log("Migration applied successfully.");
|
||||
db.close();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Migration failed:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
19
apps/web/scripts/check-progress.mjs
Normal file
19
apps/web/scripts/check-progress.mjs
Normal file
@@ -0,0 +1,19 @@
|
||||
import { createClient } from "@libsql/client";
|
||||
const c = createClient({
|
||||
url: process.env.DATABASE_URL,
|
||||
authToken: process.env.DATABASE_TOKEN,
|
||||
});
|
||||
const r = await c.execute("SELECT COUNT(*) as cnt FROM diseases");
|
||||
const r2 = await c.execute(
|
||||
`SELECT SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has, SUM(CASE WHEN image_url IS NULL OR image_url = '' THEN 1 ELSE 0 END) as miss FROM diseases`,
|
||||
);
|
||||
const r3 = await c.execute(
|
||||
`SELECT severity, COUNT(*) as total, SUM(CASE WHEN image_url IS NOT NULL AND image_url != '' THEN 1 ELSE 0 END) as has FROM diseases GROUP BY severity ORDER BY severity`,
|
||||
);
|
||||
console.log(
|
||||
`Total: ${r.rows[0].cnt} | With images: ${r2.rows[0].has} | Missing: ${r2.rows[0].miss}`,
|
||||
);
|
||||
for (const row of r3.rows) {
|
||||
console.log(` ${row.severity?.padEnd(10)}: ${row.has}/${row.total}`);
|
||||
}
|
||||
c.close();
|
||||
537
apps/web/scripts/fine-tune-model.py
Normal file
537
apps/web/scripts/fine-tune-model.py
Normal file
@@ -0,0 +1,537 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
fine-tune-model.py
|
||||
|
||||
Fine-tunes the PlantVillage MobileNetV2 model on a custom 95-class dataset
|
||||
(93 diseases + healthy + unknown).
|
||||
|
||||
Pipeline:
|
||||
1. Load `best_mnv2_pv_original.keras` (MobileNetV2 backbone + 38-class head)
|
||||
2. Replace the 38-class head with 95 classes (order matches diseases.json + healthy + unknown)
|
||||
3. Freeze backbone, train only the new classification head
|
||||
4. Unfreeze the last ~20 layers, fine-tune at lower learning rate
|
||||
5. Export to TF.js GraphModel format
|
||||
6. Export to .keras for future retraining
|
||||
|
||||
Usage: .tfjs-venv/bin/python scripts/fine-tune-model.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # Suppress TF info/warnings
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import keras
|
||||
from keras import layers, optimizers, regularizers
|
||||
|
||||
# ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
MODEL_PATH = (
|
||||
PROJECT_ROOT
|
||||
/ "public"
|
||||
/ "models"
|
||||
/ "plant-disease-classifier"
|
||||
/ "best_mnv2_pv_original.keras"
|
||||
)
|
||||
DISEASES_JSON = PROJECT_ROOT / "src" / "data" / "diseases.json"
|
||||
DATASET_DIR = PROJECT_ROOT / "data" / "dataset"
|
||||
OUTPUT_DIR = PROJECT_ROOT / "public" / "models" / "plant-disease-classifier"
|
||||
TFJS_OUTPUT = OUTPUT_DIR / "tfjs_finetuned"
|
||||
|
||||
IMG_SIZE = 160 # Model input size
|
||||
BATCH_SIZE = 32
|
||||
EPOCHS_HEAD = 15 # Train just the new head
|
||||
EPOCHS_FINETUNE = 10 # Unfreeze and fine-tune
|
||||
LEARNING_RATE_HEAD = 1e-3
|
||||
LEARNING_RATE_FINETUNE = 1e-5
|
||||
VALIDATION_SPLIT = 0.15
|
||||
|
||||
NUM_CLASSES = 95 # healthy(0) + 93 diseases + unknown(94)
|
||||
|
||||
# ─── Class Mapping ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def build_class_mapping():
|
||||
"""
|
||||
Build a dict mapping dataset directory names → model class indices.
|
||||
Matches the ordering in labels.ts / diseases.json.
|
||||
|
||||
Index 0 = "healthy"
|
||||
Index 1-93 = disease IDs (in diseases.json order)
|
||||
Index 94 = "unknown" (no images — skip during training)
|
||||
"""
|
||||
with open(DISEASES_JSON) as f:
|
||||
diseases = json.load(f)
|
||||
|
||||
mapping = {"healthy": 0}
|
||||
for i, disease in enumerate(diseases):
|
||||
mapping[disease["id"]] = i + 1 # Index 1-93
|
||||
mapping["unknown"] = 94 # Not trained, but reserved
|
||||
|
||||
# Reverse mapping for predictions
|
||||
index_to_class = {v: k for k, v in mapping.items()}
|
||||
|
||||
return mapping, index_to_class
|
||||
|
||||
|
||||
def verify_dataset(mapping):
|
||||
"""Find which classes have images and how many."""
|
||||
available = {}
|
||||
total = 0
|
||||
|
||||
for class_id, class_idx in mapping.items():
|
||||
class_dir = DATASET_DIR / class_id
|
||||
if not class_dir.exists():
|
||||
continue
|
||||
|
||||
image_paths = sorted(class_dir.glob("*"))
|
||||
image_paths = [
|
||||
p
|
||||
for p in image_paths
|
||||
if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")
|
||||
]
|
||||
|
||||
if image_paths:
|
||||
available[class_id] = {"index": class_idx, "count": len(image_paths)}
|
||||
total += len(image_paths)
|
||||
|
||||
return available, total
|
||||
|
||||
|
||||
def print_dataset_summary(available, total):
|
||||
"""Print a summary of what's available."""
|
||||
print(f"\n{'─' * 60}")
|
||||
print("DATASET SUMMARY")
|
||||
print(f"{'─' * 60}")
|
||||
print(f" Total images: {total}")
|
||||
print(f" Classes found: {len(available)} / {len(build_class_mapping()[0])}")
|
||||
print(
|
||||
f" Missing classes with no images: {len(build_class_mapping()[0]) - len(available)}"
|
||||
)
|
||||
|
||||
# Count images per class
|
||||
counts = [(v["index"], k, v["count"]) for k, v in available.items()]
|
||||
counts.sort(key=lambda x: x[1])
|
||||
|
||||
print("\n Images per class:")
|
||||
for idx, class_id, count in counts:
|
||||
label = f" {idx:3d}. {class_id:<35s} {count:>4d} images"
|
||||
if class_id == "healthy":
|
||||
label += " ← 2× target"
|
||||
print(label)
|
||||
|
||||
# Stats
|
||||
class_counts = [v["count"] for v in available.values()]
|
||||
if class_counts:
|
||||
print(
|
||||
f"\n Min: {min(class_counts)} Max: {max(class_counts)} Avg: {sum(class_counts) / len(class_counts):.0f}"
|
||||
)
|
||||
print(f"{'─' * 60}\n")
|
||||
|
||||
|
||||
# ─── Data Loading ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def load_dataset(mapping, available):
|
||||
"""
|
||||
Load images from the dataset directory.
|
||||
Returns train/validation datasets with augmentation.
|
||||
"""
|
||||
# Build file paths and labels
|
||||
file_paths = []
|
||||
labels = []
|
||||
|
||||
for class_id, info in available.items():
|
||||
class_dir = DATASET_DIR / class_id
|
||||
images = sorted(class_dir.glob("*"))
|
||||
images = [
|
||||
p for p in images if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")
|
||||
]
|
||||
|
||||
for img_path in images:
|
||||
file_paths.append(str(img_path))
|
||||
labels.append(info["index"])
|
||||
|
||||
file_paths = np.array(file_paths)
|
||||
labels = np.array(labels)
|
||||
|
||||
# Shuffle
|
||||
indices = np.random.RandomState(42).permutation(len(file_paths))
|
||||
file_paths = file_paths[indices]
|
||||
labels = labels[indices]
|
||||
|
||||
# Split train/validation
|
||||
split = int(len(file_paths) * (1 - VALIDATION_SPLIT))
|
||||
train_paths, val_paths = file_paths[:split], file_paths[split:]
|
||||
train_labels, val_labels = labels[:split], labels[split:]
|
||||
|
||||
print(f" Train: {len(train_paths)} images")
|
||||
print(f" Val: {len(val_paths)} images")
|
||||
|
||||
# Parse function
|
||||
def parse_image(image_path, label):
|
||||
img = tf.io.read_file(image_path)
|
||||
img = tf.image.decode_image(img, channels=3, expand_animations=False)
|
||||
img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
|
||||
img = tf.cast(img, tf.float32) / 255.0
|
||||
# ImageNet normalization (matching training-time preprocessing)
|
||||
mean = tf.constant([0.485, 0.456, 0.406])
|
||||
std = tf.constant([0.229, 0.224, 0.225])
|
||||
img = (img - mean) / std
|
||||
return img, label
|
||||
|
||||
def augment(image, label):
|
||||
"""Data augmentation for training set."""
|
||||
# Random horizontal flip
|
||||
image = tf.image.random_flip_left_right(image)
|
||||
# Random rotation (±20°)
|
||||
image = tf.image.random_flip_up_down(image)
|
||||
# Random brightness
|
||||
image = tf.image.random_brightness(image, 0.15)
|
||||
# Random contrast
|
||||
image = tf.image.random_contrast(image, 0.8, 1.2)
|
||||
# Random saturation
|
||||
image = tf.image.random_saturation(image, 0.8, 1.2)
|
||||
# Random hue
|
||||
image = tf.image.random_hue(image, 0.05)
|
||||
# Random crop (after slightly scaling up)
|
||||
image = tf.image.resize_with_crop_or_pad(image, IMG_SIZE + 12, IMG_SIZE + 12)
|
||||
image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
|
||||
# Clip to valid range after augmentations
|
||||
image = tf.clip_by_value(image, -2.5, 2.5)
|
||||
return image, label
|
||||
|
||||
# Create tf.data datasets
|
||||
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
|
||||
train_ds = train_ds.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
|
||||
train_ds = train_ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
|
||||
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
|
||||
|
||||
val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
|
||||
val_ds = val_ds.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
|
||||
val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
|
||||
|
||||
return train_ds, val_ds
|
||||
|
||||
|
||||
# ─── Model Building ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def build_model():
|
||||
"""
|
||||
Load the PlantVillage model and replace the classification head
|
||||
with a 95-class output.
|
||||
"""
|
||||
print(f"\nLoading base model from: {MODEL_PATH}")
|
||||
if not MODEL_PATH.exists():
|
||||
print(f"ERROR: Model not found at {MODEL_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
base_model = keras.models.load_model(str(MODEL_PATH))
|
||||
print(f" Base model loaded: {type(base_model).__name__}")
|
||||
print(f" Input shape: {base_model.input_shape}")
|
||||
print(f" Output shape: {base_model.output_shape}")
|
||||
|
||||
# Extract backbone — everything up to the GlobalAveragePooling2D
|
||||
# The model structure is:
|
||||
# input_layer_2 → mobilenetv2_1.00_160 → global_average_pooling2d → dropout → dense(38)
|
||||
backbone_output = base_model.get_layer("global_average_pooling2d").output
|
||||
print(" Using backbone output: global_average_pooling2d")
|
||||
|
||||
# Freeze all backbone layers initially
|
||||
# (we'll unfreeze later for fine-tuning)
|
||||
for layer in base_model.layers:
|
||||
if layer.name != "dense": # We'll replace this anyway
|
||||
layer.trainable = False
|
||||
|
||||
# Build new classification head
|
||||
x = backbone_output
|
||||
x = layers.Dropout(0.3, name="dropout_new")(x)
|
||||
x = layers.Dense(
|
||||
NUM_CLASSES,
|
||||
activation="softmax",
|
||||
name="dense_new",
|
||||
kernel_regularizer=regularizers.l2(1e-4),
|
||||
)(x)
|
||||
|
||||
# Create new model
|
||||
model = keras.Model(
|
||||
inputs=base_model.input, outputs=x, name="plant-disease-classifier-v2"
|
||||
)
|
||||
|
||||
print(f" New model input: {model.input_shape}")
|
||||
print(f" New model output: {model.output_shape} ({NUM_CLASSES} classes)")
|
||||
|
||||
# Count trainable params
|
||||
backbone_trainable = sum(
|
||||
w.shape.num_elements()
|
||||
for layer in base_model.layers
|
||||
if layer.name != "dense"
|
||||
for w in layer.trainable_weights
|
||||
)
|
||||
head_trainable = sum(
|
||||
w.shape.num_elements() for w in model.get_layer("dense_new").trainable_weights
|
||||
)
|
||||
|
||||
print(f" Backbone frozen: {backbone_trainable:,} params (not training)")
|
||||
print(f" New head: {head_trainable:,} params (training)")
|
||||
|
||||
return model
|
||||
|
||||
|
||||
# ─── Training ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def train_head(model, train_ds, val_ds):
|
||||
"""Stage 1: Train only the new classification head."""
|
||||
print(f"\n{'=' * 60}")
|
||||
print("STAGE 1: Training classification head")
|
||||
print(f"{'=' * 60}")
|
||||
print(f" Epochs: {EPOCHS_HEAD}")
|
||||
print(f" Learning rate: {LEARNING_RATE_HEAD}")
|
||||
print(f" Batch size: {BATCH_SIZE}")
|
||||
|
||||
# Freeze all backbone layers
|
||||
for layer in model.layers:
|
||||
if layer.name != "dense_new":
|
||||
layer.trainable = False
|
||||
else:
|
||||
layer.trainable = True
|
||||
|
||||
# Verify
|
||||
trainable = sum(w.shape.num_elements() for w in model.trainable_weights)
|
||||
total = sum(w.shape.num_elements() for w in model.weights)
|
||||
print(f" Trainable params: {trainable:,} / {total:,} total")
|
||||
|
||||
model.compile(
|
||||
optimizer=optimizers.Adam(learning_rate=LEARNING_RATE_HEAD),
|
||||
loss="sparse_categorical_crossentropy",
|
||||
metrics=["accuracy", "sparse_top_k_categorical_accuracy"],
|
||||
)
|
||||
|
||||
history = model.fit(
|
||||
train_ds,
|
||||
validation_data=val_ds,
|
||||
epochs=EPOCHS_HEAD,
|
||||
verbose=1,
|
||||
callbacks=[
|
||||
keras.callbacks.EarlyStopping(
|
||||
monitor="val_accuracy",
|
||||
patience=3,
|
||||
restore_best_weights=True,
|
||||
),
|
||||
keras.callbacks.ReduceLROnPlateau(
|
||||
monitor="val_loss",
|
||||
factor=0.5,
|
||||
patience=2,
|
||||
min_lr=1e-6,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
final_val_acc = history.history["val_accuracy"][-1]
|
||||
print(f"\n Stage 1 complete! Val accuracy: {final_val_acc:.4f}")
|
||||
return history
|
||||
|
||||
|
||||
def train_finetune(model, train_ds, val_ds):
|
||||
"""Stage 2: Unfreeze last ~25 layers and fine-tune."""
|
||||
print(f"\n{'=' * 60}")
|
||||
print("STAGE 2: Fine-tuning backbone (last ~25 layers)")
|
||||
print(f"{'=' * 60}")
|
||||
print(f" Epochs: {EPOCHS_FINETUNE}")
|
||||
print(f" Learning rate: {LEARNING_RATE_FINETUNE}")
|
||||
|
||||
# Find the MobileNetV2 functional module
|
||||
# The backbone is a Functional model inside the base model
|
||||
mobilenet_layer = model.get_layer("mobilenetv2_1.00_160")
|
||||
|
||||
# Unfreeze the last ~25 layers of the backbone
|
||||
total_backbone_layers = len(mobilenet_layer.layers)
|
||||
unfreeze_from = max(0, total_backbone_layers - 25)
|
||||
print(
|
||||
f" Backbone has {total_backbone_layers} layers, unfreezing from layer {unfreeze_from}"
|
||||
)
|
||||
|
||||
for i, layer in enumerate(mobilenet_layer.layers):
|
||||
layer.trainable = i >= unfreeze_from
|
||||
|
||||
# Also unfreeze the new head
|
||||
model.get_layer("dense_new").trainable = True
|
||||
model.get_layer("dropout_new").trainable = True
|
||||
|
||||
trainable = sum(w.shape.num_elements() for w in model.trainable_weights)
|
||||
total = sum(w.shape.num_elements() for w in model.weights)
|
||||
print(f" Trainable params: {trainable:,} / {total:,} total")
|
||||
|
||||
model.compile(
|
||||
optimizer=optimizers.Adam(learning_rate=LEARNING_RATE_FINETUNE),
|
||||
loss="sparse_categorical_crossentropy",
|
||||
metrics=["accuracy", "sparse_top_k_categorical_accuracy"],
|
||||
)
|
||||
|
||||
history = model.fit(
|
||||
train_ds,
|
||||
validation_data=val_ds,
|
||||
epochs=EPOCHS_FINETUNE,
|
||||
verbose=1,
|
||||
callbacks=[
|
||||
keras.callbacks.EarlyStopping(
|
||||
monitor="val_accuracy",
|
||||
patience=3,
|
||||
restore_best_weights=True,
|
||||
),
|
||||
keras.callbacks.ReduceLROnPlateau(
|
||||
monitor="val_loss",
|
||||
factor=0.5,
|
||||
patience=2,
|
||||
min_lr=1e-7,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
final_val_acc = history.history["val_accuracy"][-1]
|
||||
print(f"\n Stage 2 complete! Val accuracy: {final_val_acc:.4f}")
|
||||
return history
|
||||
|
||||
|
||||
# ─── Export ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def export_models(model, class_mapping, index_to_class):
|
||||
"""Export the trained model to .keras and TF.js formats."""
|
||||
print(f"\n{'=' * 60}")
|
||||
print("EXPORTING")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
# 1. Save as .keras (for future retraining)
|
||||
keras_path = OUTPUT_DIR / "model-finetuned.keras"
|
||||
model.save(str(keras_path))
|
||||
print(f" ✓ Saved .keras: {keras_path}")
|
||||
|
||||
# 2. Save class mapping alongside the model
|
||||
mapping_path = OUTPUT_DIR / "class_mapping.json"
|
||||
with open(mapping_path, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"index_to_class": index_to_class,
|
||||
"class_to_index": class_mapping,
|
||||
"num_classes": NUM_CLASSES,
|
||||
"input_size": IMG_SIZE,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
)
|
||||
print(f" ✓ Saved class mapping: {mapping_path}")
|
||||
|
||||
# 3. Export to TF.js format
|
||||
tfjs_path = str(TFJS_OUTPUT)
|
||||
if TFJS_OUTPUT.exists():
|
||||
shutil.rmtree(tfjs_path)
|
||||
|
||||
try:
|
||||
import tensorflowjs as tfjs
|
||||
|
||||
tfjs.converters.save_keras_model(model, tfjs_path)
|
||||
print(f" ✓ Saved TF.js: {tfjs_path}/")
|
||||
for f in sorted(TFJS_OUTPUT.iterdir()):
|
||||
size = f.stat().st_size
|
||||
print(f" {f.name:<30s} {size:>10,} bytes")
|
||||
except Exception as e:
|
||||
print(f" ⚠ TF.js export failed: {e}")
|
||||
print(
|
||||
f" Run later: tensorflowjs_converter --input_format=keras {keras_path} {tfjs_path}"
|
||||
)
|
||||
|
||||
|
||||
# ─── Cleanup Old Model Files ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def cleanup_old_model():
|
||||
"""Remove old model.json and shards from the directory."""
|
||||
for f in OUTPUT_DIR.glob("model.json"):
|
||||
print(f" Removing old: {f.name}")
|
||||
f.unlink()
|
||||
for f in OUTPUT_DIR.glob("group1-shard*"):
|
||||
print(f" Removing old: {f.name}")
|
||||
f.unlink()
|
||||
|
||||
|
||||
# ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("PLANT DISEASE MODEL FINE-TUNER")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Build class mapping
|
||||
print("\n[1/5] Building class mapping...")
|
||||
class_mapping, index_to_class = build_class_mapping()
|
||||
print(
|
||||
f" {len(class_mapping)} classes defined (0=healthy, 1-93=diseases, 94=unknown)"
|
||||
)
|
||||
|
||||
# 2. Verify dataset
|
||||
print("\n[2/5] Verifying dataset...")
|
||||
if not DATASET_DIR.exists():
|
||||
print(f" ERROR: Dataset not found at {DATASET_DIR}")
|
||||
print(" Run the scraper first: npx tsx scripts/scrape-training-dataset.ts")
|
||||
sys.exit(1)
|
||||
|
||||
available, total = verify_dataset(class_mapping)
|
||||
print_dataset_summary(available, total)
|
||||
|
||||
if total < 100:
|
||||
print(f" WARNING: Only {total} images. Consider scraping more data.")
|
||||
print(" Continue anyway? (y/n)")
|
||||
# Continue regardless — user can decide
|
||||
|
||||
# 3. Load dataset
|
||||
print("\n[3/5] Loading and augmenting dataset...")
|
||||
train_ds, val_ds = load_dataset(class_mapping, available)
|
||||
|
||||
# 4. Build and train model
|
||||
print("\n[4/5] Building model...")
|
||||
model = build_model()
|
||||
model.summary()
|
||||
|
||||
# Check if training should run
|
||||
if total > 0:
|
||||
train_head(model, train_ds, val_ds)
|
||||
train_finetune(model, train_ds, val_ds)
|
||||
|
||||
# 5. Export
|
||||
print("\n[5/5] Exporting models...")
|
||||
cleanup_old_model()
|
||||
export_models(model, class_mapping, index_to_class)
|
||||
else:
|
||||
print("\n Skipping training — no dataset available.")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Final Summary ────────────────────────────────────────────────────────
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print("DONE! Model fine-tuned and exported.")
|
||||
print(f"{'=' * 60}")
|
||||
print("\nFiles created:")
|
||||
print(f" {OUTPUT_DIR / 'model-finetuned.keras'}")
|
||||
print(f" {OUTPUT_DIR / 'class_mapping.json'}")
|
||||
print(f" {TFJS_OUTPUT / 'model.json'}")
|
||||
print("\nTo update your app:")
|
||||
print(" 1. Replace model files:")
|
||||
print(f" cp {TFJS_OUTPUT / 'model.json'} {OUTPUT_DIR / 'model.json'}")
|
||||
print(f" cp {TFJS_OUTPUT / 'group1-shard*'} {OUTPUT_DIR / '/'}")
|
||||
print(" 2. Restart the dev server")
|
||||
print(" 3. Test with: POST /api/identify")
|
||||
print("\nNote: Update labels.ts if the class order changed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
385
apps/web/scripts/generate-flagged-report.ts
Normal file
385
apps/web/scripts/generate-flagged-report.ts
Normal file
@@ -0,0 +1,385 @@
|
||||
/**
|
||||
* generate-flagged-report.ts
|
||||
*
|
||||
* Reads all flagged content from the database and generates a pretty
|
||||
* markdown report organized by content type. The report includes:
|
||||
* - Summary table with counts per content type
|
||||
* - Plant images flagged for review
|
||||
* - Disease images flagged for review
|
||||
* - Disease symptoms flagged for review
|
||||
* - Disease causes flagged for review
|
||||
* - Disease treatment steps flagged for review
|
||||
* - Disease prevention tips flagged for review
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scripts/generate-flagged-report.ts [--min-flags N] [--output path/to/report.md]
|
||||
*
|
||||
* Options:
|
||||
* --min-flags Minimum flag count to include (default: 1)
|
||||
* --output Output path (default: scripts/.flagged-content-review-needed.md)
|
||||
*/
|
||||
|
||||
import dotenv from "dotenv";
|
||||
import path from "node:path";
|
||||
|
||||
// Load DB config from .env.development (or .env.production if NODE_ENV=production)
|
||||
const envFile =
|
||||
process.env.NODE_ENV === "production" ? "../.env.production" : "../.env.development";
|
||||
dotenv.config({ path: path.resolve(__dirname, envFile) });
|
||||
import { createClient } from "@libsql/client";
|
||||
import fs from "node:fs";
|
||||
|
||||
// ─── Config ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const MIN_FLAGS = parseInt(
|
||||
process.argv.find((a) => a.startsWith("--min-flags="))?.split("=")[1] ?? "1",
|
||||
10,
|
||||
);
|
||||
const OUTPUT_PATH =
|
||||
process.argv.find((a) => a.startsWith("--output="))?.split("=")[1] ??
|
||||
path.join(__dirname, ".flagged-content-review-needed.md");
|
||||
|
||||
// ─── DB Connection ──────────────────────────────────────────────────────────
|
||||
|
||||
const db = createClient({
|
||||
url: process.env.DATABASE_URL!,
|
||||
authToken: process.env.DATABASE_TOKEN!,
|
||||
});
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
interface FlaggedRow {
|
||||
id: string;
|
||||
content_type: string;
|
||||
content_id: string;
|
||||
field_name: string;
|
||||
notes: string;
|
||||
flag_count: number;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
interface PlantRow {
|
||||
id: string;
|
||||
common_name: string;
|
||||
scientific_name: string;
|
||||
family: string;
|
||||
image_url: string;
|
||||
}
|
||||
|
||||
interface DiseaseRow {
|
||||
id: string;
|
||||
name: string;
|
||||
scientific_name: string;
|
||||
plant_id: string;
|
||||
image_url: string;
|
||||
}
|
||||
|
||||
// ─── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
const CONTENT_TYPE_LABELS: Record<string, { emoji: string; title: string; description: string }> = {
|
||||
plant_image: {
|
||||
emoji: "🪴",
|
||||
title: "Plant Images Flagged for Review",
|
||||
description: "Plant images that users have flagged as potentially incorrect or low quality.",
|
||||
},
|
||||
disease_image: {
|
||||
emoji: "📸",
|
||||
title: "Disease Images Flagged for Review",
|
||||
description:
|
||||
"Disease symptom images that users have flagged as potentially incorrect or misleading.",
|
||||
},
|
||||
disease_description: {
|
||||
emoji: "📝",
|
||||
title: "Disease Descriptions Flagged for Review",
|
||||
description: "Disease descriptions that users have flagged as potentially inaccurate.",
|
||||
},
|
||||
disease_symptoms: {
|
||||
emoji: "⚠️",
|
||||
title: "Disease Symptoms Flagged for Review",
|
||||
description: "Symptom descriptions that users have flagged as potentially inaccurate.",
|
||||
},
|
||||
disease_causes: {
|
||||
emoji: "🔍",
|
||||
title: "Disease Causes Flagged for Review",
|
||||
description:
|
||||
"Causes and contributing factors that users have flagged as potentially incorrect.",
|
||||
},
|
||||
disease_treatment: {
|
||||
emoji: "💊",
|
||||
title: "Disease Treatment Steps Flagged for Review",
|
||||
description:
|
||||
"Treatment instructions that users have flagged as potentially incorrect or harmful.",
|
||||
},
|
||||
disease_prevention: {
|
||||
emoji: "🛡️",
|
||||
title: "Disease Prevention Tips Flagged for Review",
|
||||
description: "Prevention tips that users have flagged as potentially incorrect or misleading.",
|
||||
},
|
||||
};
|
||||
|
||||
function formatDate(iso: string): string {
|
||||
const d = new Date(iso);
|
||||
return d.toLocaleDateString("en-US", {
|
||||
year: "numeric",
|
||||
month: "short",
|
||||
day: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
console.log(`📋 Generating flagged content report (min flags: ${MIN_FLAGS})...`);
|
||||
|
||||
// Fetch flagged content
|
||||
const flaggedRs = await db.execute({
|
||||
sql: "SELECT * FROM flagged_content WHERE flag_count >= ? ORDER BY content_type, flag_count DESC, updated_at DESC",
|
||||
args: [MIN_FLAGS],
|
||||
});
|
||||
const flaggedRows = flaggedRs.rows as unknown as FlaggedRow[];
|
||||
|
||||
if (flaggedRows.length === 0) {
|
||||
const report = [
|
||||
"# 🚩 Flagged Content Review — Nothing to Review",
|
||||
"",
|
||||
`Generated: ${new Date().toISOString()}`,
|
||||
"",
|
||||
"**No content has been flagged for review yet.**",
|
||||
"",
|
||||
"Flagged items will appear here once users flag content for manual review.",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
`_Report generated with min-flags=${MIN_FLAGS}_`,
|
||||
"",
|
||||
].join("\n");
|
||||
|
||||
fs.writeFileSync(OUTPUT_PATH, report, "utf-8");
|
||||
console.log(`✅ Report written to ${OUTPUT_PATH} (no flagged items)`);
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
// Collect all unique plant and disease IDs
|
||||
const plantIds = new Set<string>();
|
||||
const diseaseIds = new Set<string>();
|
||||
|
||||
for (const row of flaggedRows) {
|
||||
if (row.content_type === "plant_image") {
|
||||
plantIds.add(row.content_id);
|
||||
} else {
|
||||
diseaseIds.add(row.content_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch plant names
|
||||
const plantMap = new Map<string, PlantRow>();
|
||||
if (plantIds.size > 0) {
|
||||
const plantRs = await db.execute({
|
||||
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${[...plantIds].map(() => "?").join(",")})`,
|
||||
args: [...plantIds],
|
||||
});
|
||||
for (const row of plantRs.rows as unknown as PlantRow[]) {
|
||||
plantMap.set(row.id, row);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch disease names + their plant references
|
||||
const diseaseMap = new Map<string, DiseaseRow>();
|
||||
if (diseaseIds.size > 0) {
|
||||
const diseaseRs = await db.execute({
|
||||
sql: `SELECT id, name, scientific_name, plant_id, image_url FROM diseases WHERE id IN (${[...diseaseIds].map(() => "?").join(",")})`,
|
||||
args: [...diseaseIds],
|
||||
});
|
||||
for (const row of diseaseRs.rows as unknown as DiseaseRow[]) {
|
||||
diseaseMap.set(row.id, row);
|
||||
if (!plantMap.has(row.plant_id)) {
|
||||
plantIds.add(row.plant_id);
|
||||
}
|
||||
}
|
||||
// Fetch any missing plant references for diseases
|
||||
if (plantIds.size > 0) {
|
||||
const missingPlantIds = [...plantIds].filter((id) => !plantMap.has(id));
|
||||
if (missingPlantIds.length > 0) {
|
||||
const plantRs = await db.execute({
|
||||
sql: `SELECT id, common_name, scientific_name, family, image_url FROM plants WHERE id IN (${missingPlantIds.map(() => "?").join(",")})`,
|
||||
args: missingPlantIds,
|
||||
});
|
||||
for (const row of plantRs.rows as unknown as PlantRow[]) {
|
||||
plantMap.set(row.id, row);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group by content type
|
||||
const groups: Record<string, FlaggedRow[]> = {};
|
||||
for (const row of flaggedRows) {
|
||||
if (!groups[row.content_type]) groups[row.content_type] = [];
|
||||
groups[row.content_type].push(row);
|
||||
}
|
||||
|
||||
// ─── Build Report ────────────────────────────────────────────────────────
|
||||
|
||||
const lines: string[] = [];
|
||||
const totalFlags = flaggedRows.reduce((sum, r) => sum + r.flag_count, 0);
|
||||
|
||||
lines.push("# 🚩 Flagged Content — Manual Review Needed");
|
||||
lines.push("");
|
||||
lines.push(`Generated: ${new Date().toISOString()}`);
|
||||
lines.push("");
|
||||
lines.push(
|
||||
flaggedRows.length === 1
|
||||
? `**${flaggedRows.length} item** flagged for review (${totalFlags} total flags).`
|
||||
: `**${flaggedRows.length} items** flagged for review (${totalFlags} total flags).`,
|
||||
);
|
||||
lines.push("");
|
||||
lines.push("Most data in this knowledge base is not reviewed by humans. ");
|
||||
lines.push("Items listed below have been flagged by users for manual review. ");
|
||||
lines.push("Please review each item and take appropriate action.");
|
||||
lines.push("");
|
||||
|
||||
// Summary table
|
||||
lines.push("## 📊 Summary");
|
||||
lines.push("");
|
||||
lines.push("| Content Type | Count | Total Flags |");
|
||||
lines.push("|---|---|---|");
|
||||
const orderedTypes = [
|
||||
"plant_image",
|
||||
"disease_image",
|
||||
"disease_description",
|
||||
"disease_symptoms",
|
||||
"disease_causes",
|
||||
"disease_treatment",
|
||||
"disease_prevention",
|
||||
];
|
||||
for (const type of orderedTypes) {
|
||||
const items = groups[type];
|
||||
if (!items) continue;
|
||||
const label = CONTENT_TYPE_LABELS[type]?.title ?? type;
|
||||
const count = items.length;
|
||||
const sumFlags = items.reduce((s, r) => s + r.flag_count, 0);
|
||||
lines.push(`| ${label} | ${count} | ${sumFlags} |`);
|
||||
}
|
||||
lines.push(`| **Total** | **${flaggedRows.length}** | **${totalFlags}** |`);
|
||||
lines.push("");
|
||||
lines.push("---");
|
||||
lines.push("");
|
||||
|
||||
// Detail sections per content type
|
||||
for (const type of orderedTypes) {
|
||||
const items = groups[type];
|
||||
if (!items) continue;
|
||||
|
||||
const config = CONTENT_TYPE_LABELS[type];
|
||||
lines.push(`## ${config?.emoji ?? "📋"} ${config?.title ?? type}`);
|
||||
lines.push("");
|
||||
lines.push(config?.description ?? "");
|
||||
lines.push("");
|
||||
lines.push(`**${items.length} item${items.length === 1 ? "" : "s"} flagged**`);
|
||||
lines.push("");
|
||||
|
||||
for (const item of items) {
|
||||
// Build label
|
||||
let label = item.content_id;
|
||||
let plantLabel = "";
|
||||
|
||||
if (type === "plant_image") {
|
||||
const plant = plantMap.get(item.content_id);
|
||||
if (plant) {
|
||||
label = `${plant.common_name} (_${plant.scientific_name}_)`;
|
||||
plantLabel = `${plant.family} family`;
|
||||
}
|
||||
} else {
|
||||
const disease = diseaseMap.get(item.content_id);
|
||||
if (disease) {
|
||||
const plant = plantMap.get(disease.plant_id);
|
||||
const plantName = plant?.common_name ?? disease.plant_id;
|
||||
label = `${disease.name} (_${disease.scientific_name}_) on **${plantName}**`;
|
||||
plantLabel = `Affects: ${plantName}`;
|
||||
}
|
||||
}
|
||||
|
||||
const flagWord = item.flag_count === 1 ? "flag" : "flags";
|
||||
const firstFlagged = formatDate(item.created_at);
|
||||
const lastFlagged = formatDate(item.updated_at);
|
||||
|
||||
lines.push(`### ${label}`);
|
||||
lines.push("");
|
||||
lines.push(`- **Field:** \`${item.field_name}\``);
|
||||
lines.push(`- **Flags:** ${item.flag_count} ${flagWord}`);
|
||||
lines.push(`- **First flagged:** ${firstFlagged}`);
|
||||
lines.push(`- **Last flagged:** ${lastFlagged}`);
|
||||
if (plantLabel) {
|
||||
lines.push(`- **${plantLabel}**`);
|
||||
}
|
||||
if (item.notes) {
|
||||
lines.push(`- **User notes:** ${item.notes}`);
|
||||
}
|
||||
|
||||
// Show the content data if we can fetch it
|
||||
if (type === "plant_image") {
|
||||
const plant = plantMap.get(item.content_id);
|
||||
if (plant?.image_url) {
|
||||
lines.push("");
|
||||
lines.push(` `);
|
||||
}
|
||||
} else {
|
||||
const disease = diseaseMap.get(item.content_id);
|
||||
if (type === "disease_image" && disease?.image_url) {
|
||||
lines.push("");
|
||||
lines.push(` `);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
lines.push("---");
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Footer
|
||||
lines.push("## ℹ️ How This Works");
|
||||
lines.push("");
|
||||
lines.push("1. **Users** click the 🚩 Flag button on any content they believe needs review.");
|
||||
lines.push("2. **The system** stores the flag in the database with a counter.");
|
||||
lines.push(
|
||||
"3. **This report** is generated by querying the database and formatting the results.",
|
||||
);
|
||||
lines.push("4. **Reviewers** go through each item and take action (fix, update, or dismiss).");
|
||||
lines.push("");
|
||||
lines.push("### Taking Action");
|
||||
lines.push("");
|
||||
lines.push("After reviewing an item, you can clear its flags by running:");
|
||||
lines.push("");
|
||||
lines.push("```sql");
|
||||
lines.push("DELETE FROM flagged_content WHERE id = '<item-id>';");
|
||||
lines.push("```");
|
||||
lines.push("");
|
||||
lines.push("Or clear all flags for a specific item by running:");
|
||||
lines.push("");
|
||||
lines.push("```sql");
|
||||
lines.push(
|
||||
"UPDATE flagged_content SET flag_count = 0 WHERE content_id = '<id>' AND field_name = '<field>';",
|
||||
);
|
||||
lines.push("```");
|
||||
lines.push("");
|
||||
lines.push("---");
|
||||
lines.push("");
|
||||
lines.push(`_Report generated with min-flags=${MIN_FLAGS}_`);
|
||||
|
||||
// Write report
|
||||
fs.writeFileSync(OUTPUT_PATH, lines.join("\n"), "utf-8");
|
||||
console.log(`✅ Report written to ${OUTPUT_PATH}`);
|
||||
console.log(` ${flaggedRows.length} items, ${totalFlags} total flags`);
|
||||
db.close();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("❌ Failed to generate report:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
965
apps/web/scripts/scrape-training-dataset.ts
Normal file
965
apps/web/scripts/scrape-training-dataset.ts
Normal file
@@ -0,0 +1,965 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* scrape-training-dataset.ts
|
||||
*
|
||||
* Collects a training dataset from DuckDuckGo, iNaturalist, and Wikimedia Commons.
|
||||
*
|
||||
* Targets (tiered by plant type):
|
||||
* - Core plants (houseplants + common garden): 100 images per disease
|
||||
* - Full set (all 11,498 DB diseases): 10 images per disease
|
||||
* - Healthy: 400 images
|
||||
*
|
||||
* Sources (all free, no API keys):
|
||||
* 1. DB image_url — existing images already found
|
||||
* 2. DuckDuckGo — general web image search
|
||||
* 3. iNaturalist — real-world plant observation photos
|
||||
* 4. Wikimedia Commons — curated scientific/educational images
|
||||
*
|
||||
* Usage: cd apps/web && npx tsx scripts/scrape-training-dataset.ts
|
||||
* Progress: data/dataset/.progress.json — interrupt and resume safely.
|
||||
*/
|
||||
|
||||
import "dotenv/config";
|
||||
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs";
|
||||
import { resolve, extname } from "path";
|
||||
|
||||
// Load .env.development for DB creds
|
||||
const envPath = resolve(__dirname, "../.env.development");
|
||||
try {
|
||||
const env = readFileSync(envPath, "utf-8");
|
||||
for (const line of env.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed && !trimmed.startsWith("#")) {
|
||||
const eqIdx = trimmed.indexOf("=");
|
||||
if (eqIdx > 0) {
|
||||
const key = trimmed.slice(0, eqIdx).trim();
|
||||
const val = trimmed.slice(eqIdx + 1).trim();
|
||||
if (!process.env[key]) process.env[key] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
import { getDb, closeDb } from "@/lib/db/index";
|
||||
import { diseases } from "@/lib/db/schema";
|
||||
|
||||
// ─── Config ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const DATASET_DIR = resolve(__dirname, "../data/dataset");
|
||||
const PROGRESS_FILE = resolve(DATASET_DIR, ".progress.json");
|
||||
|
||||
/** Target images per disease for CORE plants */
|
||||
const TARGET_CORE = 100;
|
||||
|
||||
/** Target images per disease for the FULL set */
|
||||
const TARGET_FULL = 10;
|
||||
|
||||
/** Target images for the "healthy" class */
|
||||
const TARGET_HEALTHY = 400;
|
||||
|
||||
/** Core plants that get higher image targets */
|
||||
const CORE_PLANTS = new Set([
|
||||
// Houseplants
|
||||
"monstera",
|
||||
"pothos",
|
||||
"snake-plant",
|
||||
"peace-lily",
|
||||
"orchid",
|
||||
"succulent",
|
||||
"fiddle-leaf-fig",
|
||||
"aloe-vera",
|
||||
"cactus",
|
||||
"fern",
|
||||
// Garden plants
|
||||
"tomato",
|
||||
"basil",
|
||||
"rose",
|
||||
"pepper",
|
||||
"strawberry",
|
||||
"cucumber",
|
||||
"squash",
|
||||
"lettuce",
|
||||
"spinach",
|
||||
"cabbage",
|
||||
"lavender",
|
||||
"mint",
|
||||
"jasmine",
|
||||
"sunflower",
|
||||
"daisy",
|
||||
"zucchini",
|
||||
"bean",
|
||||
"eggplant",
|
||||
"chili",
|
||||
// General disease patterns
|
||||
"general",
|
||||
]);
|
||||
|
||||
/** Delay between DuckDuckGo search API calls (ms) */
|
||||
const SEARCH_DELAY = 1500;
|
||||
|
||||
/** Delay between image downloads (ms) */
|
||||
const DOWNLOAD_DELAY = 100;
|
||||
|
||||
/** Max concurrent downloads */
|
||||
const CONCURRENT_DOWNLOADS = 10;
|
||||
|
||||
/** Minimum image size in bytes to accept */
|
||||
const MIN_IMAGE_SIZE = 10_000; // 10KB
|
||||
|
||||
/** Maximum image size in bytes */
|
||||
const MAX_IMAGE_SIZE = 10 * 1024 * 1024; // 10MB
|
||||
|
||||
/** Allowed file extensions */
|
||||
const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp"];
|
||||
|
||||
/** User agent for requests */
|
||||
const UA =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
||||
|
||||
/** Class ID for healthy plants */
|
||||
const HEALTHY_CLASS = "healthy";
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
interface DbDisease {
|
||||
id: string;
|
||||
plantId: string;
|
||||
name: string;
|
||||
imageUrl: string | null;
|
||||
}
|
||||
|
||||
interface DuckDuckGoImageResult {
|
||||
image: string;
|
||||
title: string;
|
||||
url: string;
|
||||
thumbnail: string;
|
||||
height: number;
|
||||
width: number;
|
||||
}
|
||||
|
||||
interface SourceState {
|
||||
exhausted: boolean;
|
||||
}
|
||||
|
||||
interface ClassProgress {
|
||||
count: number;
|
||||
downloaded: number;
|
||||
failed: number;
|
||||
seenUrls: string[];
|
||||
exhausted: boolean;
|
||||
/** Per-source exhaustion tracking — prevents re-scraping exhausted sources on resume */
|
||||
sources: {
|
||||
db: SourceState;
|
||||
duckduckgo: SourceState;
|
||||
inaturalist: SourceState;
|
||||
wikimedia: SourceState;
|
||||
};
|
||||
}
|
||||
|
||||
interface Progress {
|
||||
lastUpdated: string;
|
||||
classes: Record<string, ClassProgress>;
|
||||
/** Phase checkpoint: 0=core, 1=full, 2=healthy. On resume, skip to this phase. */
|
||||
phase: number;
|
||||
/** Index within the current phase's disease array. On resume, skip to this index. */
|
||||
phaseIndex: number;
|
||||
}
|
||||
|
||||
// ─── DB Loading ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Load all diseases from the database with their existing image URLs.
|
||||
*/
|
||||
async function loadDiseasesFromDb(): Promise<DbDisease[]> {
|
||||
const db = getDb();
|
||||
const rows = await db
|
||||
.select({
|
||||
id: diseases.id,
|
||||
plantId: diseases.plantId,
|
||||
name: diseases.name,
|
||||
imageUrl: diseases.imageUrl,
|
||||
})
|
||||
.from(diseases)
|
||||
.orderBy(diseases.id);
|
||||
return rows;
|
||||
}
|
||||
|
||||
// ─── DuckDuckGo API ─────────────────────────────────────────────────────────
|
||||
|
||||
async function getVqdToken(query: string): Promise<string> {
|
||||
const url = `https://duckduckgo.com/?q=${encodeURIComponent(query)}&t=h_&iax=images&ia=images`;
|
||||
|
||||
const res = await fetch(url, {
|
||||
headers: { "User-Agent": UA, Accept: "text/html" },
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Failed to get vqd token: ${res.status}`);
|
||||
|
||||
const html = await res.text();
|
||||
const match = html.match(/vqd['"]?\s*[:=]\s*['"]([a-f0-9-]+)['"]/);
|
||||
if (!match) throw new Error(`Could not extract vqd token for "${query}"`);
|
||||
|
||||
return match[1];
|
||||
}
|
||||
|
||||
async function searchImagesDuckDuckGo(
|
||||
query: string,
|
||||
vqd: string,
|
||||
page: number,
|
||||
): Promise<DuckDuckGoImageResult[]> {
|
||||
const url = `https://duckduckgo.com/i.js?q=${encodeURIComponent(query)}&vqd=${vqd}&o=json&p=${page}&f=,,,`;
|
||||
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
"User-Agent": UA,
|
||||
Accept: "application/json",
|
||||
Referer: `https://duckduckgo.com/?q=${encodeURIComponent(query)}&t=h_&iax=images&ia=images`,
|
||||
},
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
if (res.status === 429) {
|
||||
console.warn(" ⚠ Rate limited (429). Waiting 10s...");
|
||||
await sleep(10_000);
|
||||
return searchImagesDuckDuckGo(query, vqd, page);
|
||||
}
|
||||
if (res.status === 403) return [];
|
||||
throw new Error(`DuckDuckGo search failed: ${res.status}`);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as { results: DuckDuckGoImageResult[] };
|
||||
return data.results ?? [];
|
||||
}
|
||||
|
||||
async function collectImagesDuckDuckGo(
|
||||
query: string,
|
||||
target: number,
|
||||
seenUrls: Set<string>,
|
||||
): Promise<{ urls: string[]; exhausted: boolean }> {
|
||||
const results: string[] = [];
|
||||
let page = 1;
|
||||
let exhausted = false;
|
||||
let consecutiveEmpty = 0;
|
||||
|
||||
let vqd: string;
|
||||
try {
|
||||
vqd = await getVqdToken(query);
|
||||
} catch (err) {
|
||||
console.warn(` ⚠ DDG token failed: ${err instanceof Error ? err.message : "unknown"}`);
|
||||
return { urls: [], exhausted: true };
|
||||
}
|
||||
|
||||
const MAX_PAGES = 5;
|
||||
let lowNoveltyCount = 0;
|
||||
|
||||
while (results.length < target && page <= MAX_PAGES) {
|
||||
await sleep(SEARCH_DELAY);
|
||||
|
||||
let pageResults: DuckDuckGoImageResult[];
|
||||
try {
|
||||
pageResults = await searchImagesDuckDuckGo(query, vqd, page);
|
||||
} catch (err) {
|
||||
console.warn(` ⚠ DDG error: ${err instanceof Error ? err.message : "unknown"}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pageResults || pageResults.length === 0) {
|
||||
consecutiveEmpty++;
|
||||
if (consecutiveEmpty >= 3) {
|
||||
exhausted = true;
|
||||
break;
|
||||
}
|
||||
page++;
|
||||
continue;
|
||||
}
|
||||
|
||||
consecutiveEmpty = 0;
|
||||
let newCount = 0;
|
||||
|
||||
for (const r of pageResults) {
|
||||
if (results.length >= target) break;
|
||||
const imgUrl = r.image || r.url;
|
||||
if (!imgUrl || typeof imgUrl !== "string") continue;
|
||||
if (seenUrls.has(imgUrl)) continue;
|
||||
try {
|
||||
new URL(imgUrl);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
seenUrls.add(imgUrl);
|
||||
results.push(imgUrl);
|
||||
newCount++;
|
||||
}
|
||||
|
||||
const newRatio = newCount / pageResults.length;
|
||||
if (newRatio < 0.05) {
|
||||
lowNoveltyCount++;
|
||||
if (lowNoveltyCount >= 2) break;
|
||||
} else {
|
||||
lowNoveltyCount = 0;
|
||||
}
|
||||
|
||||
if (results.length < target) page++;
|
||||
}
|
||||
|
||||
return { urls: results.slice(0, target), exhausted };
|
||||
}
|
||||
|
||||
// ─── iNaturalist API ─────────────────────────────────────────────────────────
|
||||
|
||||
async function searchImagesInaturalist(
|
||||
query: string,
|
||||
target: number,
|
||||
seenUrls: Set<string>,
|
||||
): Promise<{ urls: string[]; exhausted: boolean }> {
|
||||
const results: string[] = [];
|
||||
const perPage = Math.min(target, 200);
|
||||
|
||||
const apiUrl =
|
||||
`https://api.inaturalist.org/v1/observations` +
|
||||
`?q=${encodeURIComponent(query)}` +
|
||||
`&photos_only=true` +
|
||||
`&quality_grade=research` +
|
||||
`&per_page=${perPage}` +
|
||||
`&order_by=observed_on&order=desc`;
|
||||
|
||||
try {
|
||||
const res = await fetch(apiUrl, {
|
||||
headers: { "User-Agent": UA, Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
if (!res.ok) return { urls: [], exhausted: false };
|
||||
|
||||
const data = (await res.json()) as {
|
||||
results: Array<{ photos: Array<{ url: string }> }>;
|
||||
};
|
||||
|
||||
for (const obs of data.results ?? []) {
|
||||
if (results.length >= target) break;
|
||||
for (const photo of obs.photos ?? []) {
|
||||
if (results.length >= target) break;
|
||||
const url = photo.url;
|
||||
if (!url || seenUrls.has(url)) continue;
|
||||
const fullUrl = url.replace("/medium.", "/original.");
|
||||
seenUrls.add(fullUrl);
|
||||
results.push(fullUrl);
|
||||
}
|
||||
}
|
||||
|
||||
return { urls: results, exhausted: results.length < target };
|
||||
} catch {
|
||||
return { urls: results, exhausted: false };
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Wikimedia Commons API ──────────────────────────────────────────────────
|
||||
|
||||
async function searchImagesCommons(
|
||||
query: string,
|
||||
target: number,
|
||||
seenUrls: Set<string>,
|
||||
): Promise<{ urls: string[]; exhausted: boolean }> {
|
||||
const results: string[] = [];
|
||||
let sroffset = 0;
|
||||
|
||||
while (results.length < target) {
|
||||
const params = new URLSearchParams({
|
||||
action: "query",
|
||||
list: "search",
|
||||
srsearch: query,
|
||||
srnamespace: "6",
|
||||
srlimit: "50",
|
||||
sroffset: String(sroffset),
|
||||
format: "json",
|
||||
// No origin needed — server-side fetch, Wikimedia ignores CORS headers on API calls
|
||||
});
|
||||
|
||||
const url = `https://commons.wikimedia.org/w/api.php?${params}`;
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: { "User-Agent": UA },
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
if (!res.ok) break;
|
||||
|
||||
const data = (await res.json()) as {
|
||||
query?: { search?: Array<{ title: string }> };
|
||||
continue?: { sroffset?: number };
|
||||
};
|
||||
|
||||
const hits = data.query?.search ?? [];
|
||||
if (hits.length === 0) break;
|
||||
|
||||
for (const hit of hits) {
|
||||
if (results.length >= target) break;
|
||||
const filename = hit.title.replace(/^File:/, "");
|
||||
const imgUrl = `https://commons.wikimedia.org/wiki/Special:FilePath/${encodeURIComponent(filename)}`;
|
||||
if (seenUrls.has(imgUrl)) continue;
|
||||
seenUrls.add(imgUrl);
|
||||
results.push(imgUrl);
|
||||
}
|
||||
|
||||
sroffset = data.continue?.sroffset ?? sroffset + hits.length;
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return { urls: results, exhausted: results.length < target };
|
||||
}
|
||||
|
||||
// ─── Image Download ─────────────────────────────────────────────────────────
|
||||
|
||||
async function downloadImage(url: string, destPath: string): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: { "User-Agent": UA, Accept: "image/webp,image/png,image/jpeg,*/*" },
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
if (!res.ok) return false;
|
||||
|
||||
const contentType = res.headers.get("content-type") || "";
|
||||
if (contentType.includes("text/html")) return false;
|
||||
|
||||
const buffer = Buffer.from(await res.arrayBuffer());
|
||||
if (buffer.length < MIN_IMAGE_SIZE) return false;
|
||||
if (buffer.length > MAX_IMAGE_SIZE) return false;
|
||||
|
||||
let ext = extname(new URL(url).pathname).toLowerCase();
|
||||
if (!ALLOWED_EXTENSIONS.includes(ext)) {
|
||||
if (contentType.includes("jpeg") || contentType.includes("jpg")) ext = ".jpg";
|
||||
else if (contentType.includes("png")) ext = ".png";
|
||||
else if (contentType.includes("webp")) ext = ".webp";
|
||||
else ext = ".jpg";
|
||||
}
|
||||
|
||||
const filePath = destPath.replace(/\.\w+$/, ext);
|
||||
writeFileSync(filePath, buffer);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadBatch(
|
||||
urls: string[],
|
||||
classDir: string,
|
||||
startIndex: number,
|
||||
): Promise<{ downloaded: number; failed: number; lastIndex: number }> {
|
||||
let downloaded = 0;
|
||||
let failed = 0;
|
||||
let index = startIndex;
|
||||
|
||||
for (let i = 0; i < urls.length; i += CONCURRENT_DOWNLOADS) {
|
||||
const chunk = urls.slice(i, i + CONCURRENT_DOWNLOADS);
|
||||
|
||||
const results = await Promise.all(
|
||||
chunk.map(async (url) => {
|
||||
const paddedIndex = String(index).padStart(4, "0");
|
||||
const destPath = resolve(classDir, `img_${paddedIndex}.jpg`);
|
||||
const success = await downloadImage(url, destPath);
|
||||
await sleep(DOWNLOAD_DELAY);
|
||||
return { success, index: index++, url: url.substring(0, 50) };
|
||||
}),
|
||||
);
|
||||
|
||||
for (const r of results) {
|
||||
if (r.success) downloaded++;
|
||||
else {
|
||||
failed++;
|
||||
if (failed % 20 === 1) console.log(` ⚠ Failed: ${r.url}...`);
|
||||
}
|
||||
}
|
||||
|
||||
const total = downloaded + failed;
|
||||
if (total % 30 === 0 || total === urls.length) {
|
||||
console.log(` Progress: ${downloaded}/${urls.length} (${failed} failed)`);
|
||||
}
|
||||
}
|
||||
|
||||
return { downloaded, failed, lastIndex: index };
|
||||
}
|
||||
|
||||
// ─── Progress Tracking ──────────────────────────────────────────────────────
|
||||
|
||||
function loadProgress(): Progress {
|
||||
if (!existsSync(PROGRESS_FILE)) {
|
||||
return {
|
||||
lastUpdated: new Date().toISOString(),
|
||||
classes: {},
|
||||
phase: 0,
|
||||
phaseIndex: 0,
|
||||
};
|
||||
}
|
||||
try {
|
||||
const raw = JSON.parse(readFileSync(PROGRESS_FILE, "utf-8")) as Partial<Progress>;
|
||||
// Backward compat: ensure new fields exist
|
||||
raw.phase ??= 0;
|
||||
raw.phaseIndex ??= 0;
|
||||
raw.classes ??= {};
|
||||
// Ensure each class has the sources field
|
||||
for (const key of Object.keys(raw.classes)) {
|
||||
const cp = raw.classes[key] as Partial<ClassProgress>;
|
||||
cp.sources ??= {
|
||||
db: { exhausted: false },
|
||||
duckduckgo: { exhausted: false },
|
||||
inaturalist: { exhausted: false },
|
||||
wikimedia: { exhausted: false },
|
||||
};
|
||||
cp.seenUrls ??= [];
|
||||
}
|
||||
return raw as Progress;
|
||||
} catch {
|
||||
console.warn(" ⚠ Corrupt progress file, starting fresh");
|
||||
return {
|
||||
lastUpdated: new Date().toISOString(),
|
||||
classes: {},
|
||||
phase: 0,
|
||||
phaseIndex: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function saveProgress(progress: Progress): void {
|
||||
progress.lastUpdated = new Date().toISOString();
|
||||
writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
|
||||
}
|
||||
|
||||
function getClassProgress(progress: Progress, classId: string): ClassProgress {
|
||||
if (!progress.classes[classId]) {
|
||||
progress.classes[classId] = {
|
||||
count: 0,
|
||||
downloaded: 0,
|
||||
failed: 0,
|
||||
seenUrls: [],
|
||||
exhausted: false,
|
||||
sources: {
|
||||
db: { exhausted: false },
|
||||
duckduckgo: { exhausted: false },
|
||||
inaturalist: { exhausted: false },
|
||||
wikimedia: { exhausted: false },
|
||||
},
|
||||
};
|
||||
}
|
||||
return progress.classes[classId];
|
||||
}
|
||||
|
||||
// ─── Query Building ─────────────────────────────────────────────────────────
|
||||
|
||||
function buildSearchQueries(disease: DbDisease): string[] {
|
||||
const name = disease.name || disease.id.replace(/-/g, " ");
|
||||
const plant = disease.plantId.replace(/-/g, " ");
|
||||
// Every query keeps the disease NAME to avoid noisy labels
|
||||
return [`${name} ${plant} leaf disease`, `${plant} ${name} symptoms`, `${name} ${plant}`];
|
||||
}
|
||||
|
||||
function buildHealthyQueries(plant: string): string[] {
|
||||
const name = plant.replace(/-/g, " ");
|
||||
return [
|
||||
`healthy ${name} leaf`,
|
||||
`${name} leaf closeup`,
|
||||
`healthy ${name} plant`,
|
||||
`${name} foliage`,
|
||||
];
|
||||
}
|
||||
|
||||
// ─── File Reconciliation ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Count actual image files in a class directory.
|
||||
* Returns the count of files matching img_* pattern, OR 0 if dir doesn't exist.
|
||||
*/
|
||||
function countImagesInDir(classDir: string): number {
|
||||
if (!existsSync(classDir)) return 0;
|
||||
try {
|
||||
const files = readdirSync(classDir);
|
||||
return files.filter((f) => f.startsWith("img_")).length;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconcile a class's progress count with actual files on disk.
|
||||
* If files were deleted after the progress file was saved, this
|
||||
* adjusts the count downward so we re-download the missing ones.
|
||||
* Returns the reconciled count.
|
||||
*/
|
||||
function reconcileClassCount(classDir: string, progressCount: number): number {
|
||||
const fileCount = countImagesInDir(classDir);
|
||||
if (fileCount < progressCount) {
|
||||
console.log(` ↻ File count (${fileCount}) < progress count (${progressCount}) — reconciling`);
|
||||
return fileCount;
|
||||
}
|
||||
return progressCount;
|
||||
}
|
||||
|
||||
// ─── Dataset Collection ─────────────────────────────────────────────────────
|
||||
|
||||
async function collectClassImages(
|
||||
classId: string,
|
||||
queries: string[],
|
||||
target: number,
|
||||
progress: Progress,
|
||||
classDir: string,
|
||||
existingUrls: string[] = [],
|
||||
fastMode = false, // Skip slow DuckDuckGo, use iNat + Commons only
|
||||
): Promise<void> {
|
||||
const cp = getClassProgress(progress, classId);
|
||||
|
||||
// ── Reconcile with actual files on disk ─────────────────────────────────
|
||||
const actualCount = reconcileClassCount(classDir, cp.count);
|
||||
if (actualCount !== cp.count) {
|
||||
cp.count = actualCount;
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
const seenUrls = new Set(cp.seenUrls);
|
||||
const sources = cp.sources;
|
||||
|
||||
if (cp.count >= target) {
|
||||
console.log(` ✓ Already have ${cp.count}/${target}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if ALL sources are exhausted
|
||||
const allExhausted =
|
||||
sources.db.exhausted &&
|
||||
sources.duckduckgo.exhausted &&
|
||||
sources.inaturalist.exhausted &&
|
||||
sources.wikimedia.exhausted;
|
||||
|
||||
if (allExhausted) {
|
||||
cp.exhausted = true;
|
||||
saveProgress(progress);
|
||||
console.log(` ✓ Exhausted (${cp.count}/${target})`);
|
||||
return;
|
||||
}
|
||||
|
||||
mkdirSync(classDir, { recursive: true });
|
||||
|
||||
const allUrls: string[] = [];
|
||||
let anyNewResults = false;
|
||||
const needed = target - cp.count;
|
||||
|
||||
// ── Source 0: Existing DB URLs ──────────────────────────────────────────
|
||||
if (!sources.db.exhausted) {
|
||||
const freshDbUrls = existingUrls.filter((u) => !seenUrls.has(u));
|
||||
if (freshDbUrls.length > 0) {
|
||||
console.log(` DB: ${freshDbUrls.length} existing URLs`);
|
||||
for (const url of freshDbUrls) {
|
||||
if (allUrls.length >= needed) break;
|
||||
seenUrls.add(url);
|
||||
allUrls.push(url);
|
||||
}
|
||||
if (freshDbUrls.length > 0) anyNewResults = true;
|
||||
}
|
||||
// DB source is always "exhausted" after processing its initial URLs
|
||||
sources.db.exhausted = true;
|
||||
}
|
||||
|
||||
// ── Source 1: DuckDuckGo ──────────────────────────────────────────────
|
||||
if (!fastMode && !sources.duckduckgo.exhausted && allUrls.length < needed) {
|
||||
for (const query of queries) {
|
||||
if (allUrls.length >= needed) break;
|
||||
process.stdout.write(` DDG: "${query.substring(0, 40)}"... `);
|
||||
const result = await collectImagesDuckDuckGo(query, needed - allUrls.length, seenUrls);
|
||||
allUrls.push(...result.urls);
|
||||
if (result.exhausted) {
|
||||
sources.duckduckgo.exhausted = true;
|
||||
}
|
||||
if (result.urls.length > 0) anyNewResults = true;
|
||||
console.log(`${result.urls.length} new`);
|
||||
if (allUrls.length >= needed) break;
|
||||
}
|
||||
// If DDG never gave us anything, mark exhausted to avoid re-trying
|
||||
if (!anyNewResults && sources.duckduckgo.exhausted) {
|
||||
/* already marked */
|
||||
}
|
||||
}
|
||||
|
||||
// ── Source 2: iNaturalist ──────────────────────────────────────────────
|
||||
if (!sources.inaturalist.exhausted && allUrls.length < needed) {
|
||||
const primaryQuery = queries[0];
|
||||
console.log(` iNat: Searching...`);
|
||||
const result = await searchImagesInaturalist(primaryQuery, needed - allUrls.length, seenUrls);
|
||||
allUrls.push(...result.urls);
|
||||
if (result.exhausted) sources.inaturalist.exhausted = true;
|
||||
if (result.urls.length > 0) anyNewResults = true;
|
||||
console.log(` iNat: ${result.urls.length} images`);
|
||||
}
|
||||
|
||||
// ── Source 3: Wikimedia Commons ────────────────────────────────────────
|
||||
if (!sources.wikimedia.exhausted && allUrls.length < needed) {
|
||||
const primaryQuery = queries[0];
|
||||
console.log(` Commons: Searching...`);
|
||||
const result = await searchImagesCommons(primaryQuery, needed - allUrls.length, seenUrls);
|
||||
allUrls.push(...result.urls);
|
||||
if (result.exhausted) sources.wikimedia.exhausted = true;
|
||||
if (result.urls.length > 0) anyNewResults = true;
|
||||
console.log(` Commons: ${result.urls.length} images`);
|
||||
}
|
||||
|
||||
if (allUrls.length === 0) {
|
||||
cp.exhausted = true;
|
||||
saveProgress(progress);
|
||||
console.log(` ✗ No images found — exhausted`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!anyNewResults && allUrls.length > 0) {
|
||||
// Only DB URLs survived — nothing more will come from searches
|
||||
cp.exhausted = true;
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
// Save progress with seen URLs BEFORE downloading
|
||||
cp.seenUrls = Array.from(seenUrls);
|
||||
saveProgress(progress);
|
||||
|
||||
console.log(` Downloading ${allUrls.length} images...`);
|
||||
|
||||
// Use actual file count as start index so filenames don't have gaps
|
||||
const startIndex = countImagesInDir(classDir);
|
||||
const { downloaded, failed } = await downloadBatch(allUrls, classDir, startIndex);
|
||||
|
||||
// Re-count actual files on disk after download (more reliable than tracking)
|
||||
const newTotal = countImagesInDir(classDir);
|
||||
cp.count = newTotal;
|
||||
cp.downloaded += downloaded;
|
||||
cp.failed += failed;
|
||||
|
||||
// Check if all sources exhausted
|
||||
if (
|
||||
sources.db.exhausted &&
|
||||
sources.duckduckgo.exhausted &&
|
||||
sources.inaturalist.exhausted &&
|
||||
sources.wikimedia.exhausted
|
||||
) {
|
||||
cp.exhausted = true;
|
||||
}
|
||||
|
||||
// Don't mark exhausted if we still have room to grow
|
||||
if (cp.count >= target) {
|
||||
cp.exhausted = true;
|
||||
}
|
||||
|
||||
saveProgress(progress);
|
||||
|
||||
const pct = Math.round((cp.count / target) * 100);
|
||||
console.log(
|
||||
` ${downloaded > 0 ? "✓" : "✗"} Got ${downloaded}/${allUrls.length} (${failed} failed). Total: ${cp.count}/${target} (${pct}%)`,
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
console.log("=".repeat(60));
|
||||
console.log("PLANT DISEASE DATASET COLLECTOR — FULL DB");
|
||||
console.log("=".repeat(60));
|
||||
|
||||
// Load diseases from DB
|
||||
console.log("\nLoading diseases from database...");
|
||||
const dbDiseases = await loadDiseasesFromDb();
|
||||
console.log(` ${dbDiseases.length} diseases loaded`);
|
||||
|
||||
const coreDiseases = dbDiseases.filter((d) => CORE_PLANTS.has(d.plantId));
|
||||
const fullDiseases = dbDiseases.filter((d) => !CORE_PLANTS.has(d.plantId));
|
||||
console.log(` Core plants: ${coreDiseases.length} diseases (target: ${TARGET_CORE})`);
|
||||
console.log(` Full set: ${fullDiseases.length} diseases (target: ${TARGET_FULL})`);
|
||||
|
||||
// Load progress
|
||||
mkdirSync(DATASET_DIR, { recursive: true });
|
||||
const progress = loadProgress();
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// ── Phase 1: Core set ──────────────────────────────────────────────────
|
||||
|
||||
console.log("\n" + "─".repeat(60));
|
||||
console.log("PHASE 1: Core Diseases (100 images each)");
|
||||
console.log("─".repeat(60));
|
||||
|
||||
const coreStart = progress.phase === 0 ? progress.phaseIndex : 0;
|
||||
if (coreStart > 0) {
|
||||
console.log(` Resuming from disease #${coreStart + 1} (${((coreStart / coreDiseases.length) * 100).toFixed(0)}% done)`);
|
||||
}
|
||||
|
||||
for (let i = coreStart; i < coreDiseases.length; i++) {
|
||||
const d = coreDiseases[i];
|
||||
const classDir = resolve(DATASET_DIR, d.id);
|
||||
const queries = buildSearchQueries(d);
|
||||
const existingUrls = d.imageUrl ? [d.imageUrl] : [];
|
||||
|
||||
const pct = Math.round((i / coreDiseases.length) * 100);
|
||||
console.log(`\n[${i + 1}/${coreDiseases.length}] (${pct}%) ${d.name || d.id} (${d.plantId})`);
|
||||
|
||||
await collectClassImages(d.id, queries, TARGET_CORE, progress, classDir, existingUrls);
|
||||
|
||||
// Save checkpoint: phase 0, at index i
|
||||
progress.phase = 0;
|
||||
progress.phaseIndex = i + 1;
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
// ── Phase 2: Full set ──────────────────────────────────────────────────
|
||||
|
||||
console.log("\n" + "─".repeat(60));
|
||||
console.log("PHASE 2: Full Disease Set (10 images each)");
|
||||
console.log("─".repeat(60));
|
||||
|
||||
const fullStart = progress.phase === 1 ? progress.phaseIndex : 0;
|
||||
if (fullStart > 0) {
|
||||
console.log(` Resuming from disease #${fullStart + 1} (${((fullStart / fullDiseases.length) * 100).toFixed(0)}% done)`);
|
||||
}
|
||||
|
||||
for (let i = fullStart; i < fullDiseases.length; i++) {
|
||||
const d = fullDiseases[i];
|
||||
const classDir = resolve(DATASET_DIR, d.id);
|
||||
const queries = buildSearchQueries(d);
|
||||
const existingUrls = d.imageUrl ? [d.imageUrl] : [];
|
||||
|
||||
const pct = Math.round((i / fullDiseases.length) * 100);
|
||||
console.log(`\n[${i + 1}/${fullDiseases.length}] (${pct}%) ${d.id} (${d.plantId})`);
|
||||
|
||||
await collectClassImages(d.id, queries, TARGET_FULL, progress, classDir, existingUrls, true);
|
||||
|
||||
// Save checkpoint: phase 1, at index i
|
||||
progress.phase = 1;
|
||||
progress.phaseIndex = i + 1;
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
// ── Phase 3: Healthy class ──────────────────────────────────────────────
|
||||
|
||||
console.log("\n" + "─".repeat(60));
|
||||
console.log("PHASE 3: Healthy Plant Images");
|
||||
console.log("─".repeat(60));
|
||||
|
||||
const healthyDir = resolve(DATASET_DIR, HEALTHY_CLASS);
|
||||
const healthyCp = getClassProgress(progress, HEALTHY_CLASS);
|
||||
|
||||
// Reconcile healthy class with files on disk
|
||||
const healthyActualCount = reconcileClassCount(healthyDir, healthyCp.count);
|
||||
if (healthyActualCount !== healthyCp.count) {
|
||||
healthyCp.count = healthyActualCount;
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
const healthySeen = new Set(healthyCp.seenUrls);
|
||||
|
||||
if (healthyCp.count >= TARGET_HEALTHY) {
|
||||
console.log(`\n ✓ Already have ${healthyCp.count}/${TARGET_HEALTHY}`);
|
||||
} else {
|
||||
// Collect all unique plants
|
||||
const allPlants = [...new Set(dbDiseases.map((d) => d.plantId))];
|
||||
const allHealthyQueries: string[] = [];
|
||||
for (const plant of allPlants) {
|
||||
allHealthyQueries.push(...buildHealthyQueries(plant));
|
||||
}
|
||||
|
||||
const healthySources = [
|
||||
{ name: "DDG", collector: collectImagesDuckDuckGo },
|
||||
{ name: "iNat", collector: searchImagesInaturalist },
|
||||
{ name: "Commons", collector: searchImagesCommons },
|
||||
] as const;
|
||||
|
||||
const totalHealthyUrls: string[] = [];
|
||||
let anyRemaining = false;
|
||||
|
||||
for (const source of healthySources) {
|
||||
if (totalHealthyUrls.length >= TARGET_HEALTHY) break;
|
||||
console.log(`\n Source: ${source.name}`);
|
||||
|
||||
for (const query of allHealthyQueries.slice(0, 20)) {
|
||||
if (totalHealthyUrls.length >= TARGET_HEALTHY) break;
|
||||
|
||||
process.stdout.write(` "${query}"... `);
|
||||
const result = await source.collector(
|
||||
query,
|
||||
TARGET_HEALTHY - totalHealthyUrls.length,
|
||||
healthySeen,
|
||||
);
|
||||
totalHealthyUrls.push(...result.urls);
|
||||
if (!result.exhausted) anyRemaining = true;
|
||||
console.log(`${result.urls.length} new`);
|
||||
}
|
||||
}
|
||||
|
||||
healthyCp.seenUrls = Array.from(healthySeen);
|
||||
|
||||
if (totalHealthyUrls.length > 0) {
|
||||
healthyCp.exhausted = !anyRemaining;
|
||||
saveProgress(progress);
|
||||
|
||||
console.log(`\n Downloading ${totalHealthyUrls.length} healthy images...`);
|
||||
const healthyStartIndex = countImagesInDir(healthyDir);
|
||||
const { downloaded, failed } = await downloadBatch(
|
||||
totalHealthyUrls,
|
||||
healthyDir,
|
||||
healthyStartIndex,
|
||||
);
|
||||
|
||||
// Re-count actual files on disk
|
||||
const newHealthyTotal = countImagesInDir(healthyDir);
|
||||
healthyCp.count = newHealthyTotal;
|
||||
healthyCp.downloaded += downloaded;
|
||||
healthyCp.failed += failed;
|
||||
|
||||
if (healthyCp.count >= TARGET_HEALTHY) {
|
||||
healthyCp.exhausted = true;
|
||||
}
|
||||
|
||||
const pct = Math.round((healthyCp.count / TARGET_HEALTHY) * 100);
|
||||
console.log(
|
||||
` Got ${downloaded} images. Total: ${healthyCp.count}/${TARGET_HEALTHY} (${pct}%)`,
|
||||
);
|
||||
} else {
|
||||
console.log(` ✗ No healthy images found`);
|
||||
}
|
||||
|
||||
saveProgress(progress);
|
||||
}
|
||||
|
||||
// ── Summary ────────────────────────────────────────────────────────────────
|
||||
|
||||
// Mark all phases complete
|
||||
progress.phase = 3;
|
||||
progress.phaseIndex = 0;
|
||||
saveProgress(progress);
|
||||
|
||||
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
||||
const mins = Math.floor(elapsed / 60);
|
||||
const hrs = Math.floor(mins / 60);
|
||||
|
||||
let totalDownloaded = 0;
|
||||
let totalFailed = 0;
|
||||
for (const [, cp] of Object.entries(progress.classes)) {
|
||||
totalDownloaded += cp.downloaded || 0;
|
||||
totalFailed += cp.failed || 0;
|
||||
}
|
||||
|
||||
console.log("\n" + "=".repeat(60));
|
||||
console.log(" ✅ ALL PHASES COMPLETE");
|
||||
console.log("=".repeat(60));
|
||||
console.log(` Time: ${hrs}h ${mins % 60}m`);
|
||||
console.log(` Downloaded: ${totalDownloaded} images`);
|
||||
console.log(` Failed: ${totalFailed} images`);
|
||||
console.log(` Dataset: ${DATASET_DIR}/`);
|
||||
|
||||
await closeDb();
|
||||
console.log("=".repeat(60));
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
143
apps/web/src/app/api/flag/report/route.ts
Normal file
143
apps/web/src/app/api/flag/report/route.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getDb } from "@/lib/db/index";
|
||||
import { flaggedContent, plants, diseases } from "@/lib/db/schema";
|
||||
import { inArray, sql } from "drizzle-orm";
|
||||
|
||||
/**
|
||||
* GET /api/flag/report
|
||||
*
|
||||
* Returns all flagged content grouped by content type, with resolved
|
||||
* plant/disease names for readability. Used by the generate-flagged-report script.
|
||||
*
|
||||
* Query params:
|
||||
* minFlags - Optional minimum flag count to include (default: 1)
|
||||
*/
|
||||
export async function GET(request: Request) {
|
||||
try {
|
||||
const { searchParams } = new URL(request.url);
|
||||
const minFlags = parseInt(searchParams.get("minFlags") ?? "1", 10);
|
||||
|
||||
const db = getDb();
|
||||
|
||||
// Get all flagged entries
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(flaggedContent)
|
||||
.where(sql`flag_count >= ${minFlags}`)
|
||||
.orderBy(flaggedContent.contentType, flaggedContent.flagCount);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return NextResponse.json({
|
||||
total: 0,
|
||||
groups: {},
|
||||
items: [],
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve plant/disease names
|
||||
const plantIds = new Set<string>();
|
||||
const diseaseIds = new Set<string>();
|
||||
|
||||
for (const row of rows) {
|
||||
if (row.contentType === "plant_image") {
|
||||
plantIds.add(row.contentId);
|
||||
} else {
|
||||
diseaseIds.add(row.contentId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch plant names
|
||||
const plantMap = new Map<string, string>();
|
||||
if (plantIds.size > 0) {
|
||||
const plantRows = await db
|
||||
.select({ id: plants.id, name: plants.commonName })
|
||||
.from(plants)
|
||||
.where(inArray(plants.id, [...plantIds]));
|
||||
for (const p of plantRows) {
|
||||
plantMap.set(p.id, p.name);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch disease names + their plant references
|
||||
const diseaseMap = new Map<string, { name: string; plantId: string }>();
|
||||
if (diseaseIds.size > 0) {
|
||||
const diseaseRows = await db
|
||||
.select({
|
||||
id: diseases.id,
|
||||
name: diseases.name,
|
||||
plantId: diseases.plantId,
|
||||
})
|
||||
.from(diseases)
|
||||
.where(inArray(diseases.id, [...diseaseIds]));
|
||||
for (const d of diseaseRows) {
|
||||
diseaseMap.set(d.id, { name: d.name, plantId: d.plantId });
|
||||
}
|
||||
// Fetch plants for diseases that we don't already have
|
||||
for (const d of diseaseRows) {
|
||||
if (!plantMap.has(d.plantId)) {
|
||||
plantIds.add(d.plantId);
|
||||
}
|
||||
}
|
||||
if (plantIds.size > 0) {
|
||||
const plantRows = await db
|
||||
.select({ id: plants.id, name: plants.commonName })
|
||||
.from(plants)
|
||||
.where(inArray(plants.id, [...plantIds]));
|
||||
for (const p of plantRows) {
|
||||
plantMap.set(p.id, p.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group by content type
|
||||
const groups: Record<string, Array<Record<string, unknown>>> = {};
|
||||
for (const row of rows) {
|
||||
const type = row.contentType;
|
||||
if (!groups[type]) groups[type] = [];
|
||||
|
||||
let label = row.contentId;
|
||||
if (type === "plant_image") {
|
||||
label = plantMap.get(row.contentId) ?? row.contentId;
|
||||
} else {
|
||||
const disease = diseaseMap.get(row.contentId);
|
||||
if (disease) {
|
||||
const plantName = plantMap.get(disease.plantId) ?? disease.plantId;
|
||||
label = `${disease.name} (on ${plantName})`;
|
||||
}
|
||||
}
|
||||
|
||||
groups[type].push({
|
||||
id: row.id,
|
||||
contentType: row.contentType,
|
||||
contentId: row.contentId,
|
||||
fieldName: row.fieldName,
|
||||
label,
|
||||
notes: row.notes,
|
||||
flagCount: row.flagCount,
|
||||
createdAt: row.createdAt,
|
||||
updatedAt: row.updatedAt,
|
||||
});
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
total: rows.length,
|
||||
groups,
|
||||
items: rows.map((row) => ({
|
||||
id: row.id,
|
||||
contentType: row.contentType,
|
||||
contentId: row.contentId,
|
||||
fieldName: row.fieldName,
|
||||
notes: row.notes,
|
||||
flagCount: row.flagCount,
|
||||
createdAt: row.createdAt,
|
||||
updatedAt: row.updatedAt,
|
||||
})),
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[Flag Report] Error fetching flagged content:", err);
|
||||
return NextResponse.json(
|
||||
{ error: "Internal Server Error", message: "Failed to fetch flagged content", status: 500 },
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
148
apps/web/src/app/api/flag/route.ts
Normal file
148
apps/web/src/app/api/flag/route.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
import { getDb } from "@/lib/db/index";
|
||||
import { flaggedContent } from "@/lib/db/schema";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
|
||||
/**
|
||||
* Content types that can be flagged for manual review.
|
||||
*/
|
||||
const VALID_CONTENT_TYPES = [
|
||||
"plant_image",
|
||||
"disease_image",
|
||||
"disease_description",
|
||||
"disease_symptoms",
|
||||
"disease_causes",
|
||||
"disease_treatment",
|
||||
"disease_prevention",
|
||||
] as const;
|
||||
|
||||
type FlagContentType = (typeof VALID_CONTENT_TYPES)[number];
|
||||
|
||||
interface FlagRequestBody {
|
||||
contentType: FlagContentType;
|
||||
contentId: string;
|
||||
fieldName: string;
|
||||
notes?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/flag
|
||||
*
|
||||
* Flag content for manual review. If the same content_type + content_id + field_name
|
||||
* combination already exists, increments the flag_count. Otherwise creates a new entry.
|
||||
*
|
||||
* Body:
|
||||
* contentType - Type of content being flagged
|
||||
* contentId - The ID of the plant or disease
|
||||
* fieldName - The specific field name (e.g., "image", "symptoms")
|
||||
* notes - Optional notes/reason for flagging
|
||||
*/
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body: FlagRequestBody = await request.json();
|
||||
|
||||
// ── Validate required fields ──
|
||||
|
||||
if (!body.contentType || !VALID_CONTENT_TYPES.includes(body.contentType)) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Bad Request",
|
||||
message: `Invalid contentType. Must be one of: ${VALID_CONTENT_TYPES.join(", ")}`,
|
||||
status: 400,
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (
|
||||
!body.contentId ||
|
||||
typeof body.contentId !== "string" ||
|
||||
body.contentId.trim().length === 0
|
||||
) {
|
||||
return NextResponse.json(
|
||||
{ error: "Bad Request", message: "contentId is required", status: 400 },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (
|
||||
!body.fieldName ||
|
||||
typeof body.fieldName !== "string" ||
|
||||
body.fieldName.trim().length === 0
|
||||
) {
|
||||
return NextResponse.json(
|
||||
{ error: "Bad Request", message: "fieldName is required", status: 400 },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const db = getDb();
|
||||
|
||||
// ── Check if this item was already flagged ──
|
||||
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(flaggedContent)
|
||||
.where(
|
||||
and(
|
||||
eq(flaggedContent.contentType, body.contentType),
|
||||
eq(flaggedContent.contentId, body.contentId),
|
||||
eq(flaggedContent.fieldName, body.fieldName),
|
||||
),
|
||||
)
|
||||
.limit(1);
|
||||
|
||||
if (existing.length > 0) {
|
||||
// Increment flag count and update timestamp
|
||||
const current = existing[0];
|
||||
await db
|
||||
.update(flaggedContent)
|
||||
.set({
|
||||
flagCount: (current.flagCount ?? 0) + 1,
|
||||
updatedAt: new Date().toISOString(),
|
||||
...(body.notes ? { notes: body.notes } : {}),
|
||||
})
|
||||
.where(eq(flaggedContent.id, current.id));
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
action: "incremented",
|
||||
flagCount: (current.flagCount ?? 0) + 1,
|
||||
message: "Flag count incremented. Thank you for your review input.",
|
||||
});
|
||||
}
|
||||
|
||||
// ── Create new flag entry ──
|
||||
|
||||
const id = uuidv4();
|
||||
await db.insert(flaggedContent).values({
|
||||
id,
|
||||
contentType: body.contentType,
|
||||
contentId: body.contentId,
|
||||
fieldName: body.fieldName,
|
||||
notes: body.notes ?? "",
|
||||
flagCount: 1,
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[Flag] New flag: type=${body.contentType} id=${body.contentId} field=${body.fieldName}`,
|
||||
);
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: true,
|
||||
action: "created",
|
||||
flagCount: 1,
|
||||
message: "Content flagged for manual review. Thank you!",
|
||||
},
|
||||
{ status: 201 },
|
||||
);
|
||||
} catch (err) {
|
||||
console.error("[Flag] Error flagging content:", err);
|
||||
return NextResponse.json(
|
||||
{ error: "Internal Server Error", message: "Failed to flag content", status: 500 },
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
import { useState, useCallback, useMemo } from "react";
|
||||
import type { Disease, CausalAgentType, Prevalence, Severity } from "@/lib/types";
|
||||
import ImageLightbox from "@/components/ImageLightbox";
|
||||
import FlagButton from "@/components/FlagButton";
|
||||
|
||||
// ─── Severity badge ───
|
||||
|
||||
@@ -86,7 +87,7 @@ function DiseaseCard({
|
||||
</div>
|
||||
|
||||
{/* Disease image or placeholder */}
|
||||
<div className="mb-4 rounded-lg overflow-hidden border border-zinc-200 dark:border-zinc-700">
|
||||
<div className="mb-2 rounded-lg overflow-hidden border border-zinc-200 dark:border-zinc-700 relative">
|
||||
{disease.imageUrl ? (
|
||||
<button
|
||||
type="button"
|
||||
@@ -128,18 +129,47 @@ function DiseaseCard({
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Flag button for disease image */}
|
||||
<div className="flex justify-end mb-2">
|
||||
<FlagButton
|
||||
contentType="disease_image"
|
||||
contentId={disease.id}
|
||||
fieldName="image"
|
||||
label="disease image"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
|
||||
<p className="text-sm text-zinc-600 dark:text-zinc-300 leading-relaxed mb-4">
|
||||
<div className="flex items-start justify-between gap-4 mb-4">
|
||||
<p className="text-sm text-zinc-600 dark:text-zinc-300 leading-relaxed">
|
||||
{disease.description}
|
||||
</p>
|
||||
<FlagButton
|
||||
contentType="disease_description"
|
||||
contentId={disease.id}
|
||||
fieldName="description"
|
||||
label="description"
|
||||
small
|
||||
className="shrink-0 mt-0.5"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Details grid */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
{/* Symptoms */}
|
||||
<div>
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-red-600 dark:text-red-400 mb-2 flex items-center gap-1">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-red-600 dark:text-red-400 flex items-center gap-1">
|
||||
<span aria-hidden="true">⚠️</span> Symptoms
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_symptoms"
|
||||
contentId={disease.id}
|
||||
fieldName="symptoms"
|
||||
label="symptoms"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ul className="space-y-1.5">
|
||||
{disease.symptoms.map((symptom, i) => (
|
||||
<li
|
||||
@@ -155,9 +185,18 @@ function DiseaseCard({
|
||||
|
||||
{/* Causes */}
|
||||
<div>
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-orange-600 dark:text-orange-400 mb-2 flex items-center gap-1">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-orange-600 dark:text-orange-400 flex items-center gap-1">
|
||||
<span aria-hidden="true">🔍</span> Causes
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_causes"
|
||||
contentId={disease.id}
|
||||
fieldName="causes"
|
||||
label="causes"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ul className="space-y-1.5">
|
||||
{disease.causes.map((cause, i) => (
|
||||
<li
|
||||
@@ -173,9 +212,18 @@ function DiseaseCard({
|
||||
|
||||
{/* Treatment Steps */}
|
||||
<div>
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-leaf-green-600 dark:text-leaf-green-400 mb-2 flex items-center gap-1">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-leaf-green-600 dark:text-leaf-green-400 flex items-center gap-1">
|
||||
<span aria-hidden="true">💊</span> Treatment Steps
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_treatment"
|
||||
contentId={disease.id}
|
||||
fieldName="treatment"
|
||||
label="treatment"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ol className="space-y-1.5 list-decimal list-inside">
|
||||
{disease.treatment.map((step, i) => (
|
||||
<li key={i} className="text-sm text-zinc-600 dark:text-zinc-300">
|
||||
@@ -187,9 +235,18 @@ function DiseaseCard({
|
||||
|
||||
{/* Prevention Tips */}
|
||||
<div>
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-leaf-green-600 dark:text-leaf-green-400 mb-2 flex items-center gap-1">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wider text-leaf-green-600 dark:text-leaf-green-400 flex items-center gap-1">
|
||||
<span aria-hidden="true">🛡️</span> Prevention Tips
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_prevention"
|
||||
contentId={disease.id}
|
||||
fieldName="prevention"
|
||||
label="prevention tips"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ul className="space-y-1.5">
|
||||
{disease.prevention.map((tip, i) => (
|
||||
<li
|
||||
|
||||
@@ -4,8 +4,10 @@ import { notFound } from "next/navigation";
|
||||
import type { Metadata } from "next";
|
||||
import { getPlantWithDiseases } from "@/lib/api/diseases-db";
|
||||
import { getPlantDescription } from "@/lib/display-helpers";
|
||||
import BetaNotice from "@/components/BetaNotice";
|
||||
import DiseaseCards from "./DiseaseCards";
|
||||
import PlantViewTracker from "@/components/PlantViewTracker";
|
||||
import FlagPlantImage from "@/components/FlagPlantImage";
|
||||
|
||||
interface Props {
|
||||
params: Promise<{ plantId: string }>;
|
||||
@@ -82,6 +84,8 @@ export default async function PlantDetailPage({ params }: Props) {
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<BetaNotice variant="card" className="mb-6" />
|
||||
|
||||
{/* Plant hero */}
|
||||
<div className="flex flex-col sm:flex-row sm:items-start gap-6 mb-10">
|
||||
{/* Plant image */}
|
||||
@@ -114,6 +118,7 @@ export default async function PlantDetailPage({ params }: Props) {
|
||||
</svg>
|
||||
</div>
|
||||
)}
|
||||
<FlagPlantImage plantId={plantId} />
|
||||
</div>
|
||||
|
||||
<div className="flex-1 min-w-0">
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import React, { Suspense } from "react";
|
||||
import { Suspense } from "react";
|
||||
import { getBrowsePlants } from "@/lib/api/browse";
|
||||
import BrowseContent from "./BrowseContent";
|
||||
import { PlantCardSkeleton } from "@/components/LoadingSkeleton";
|
||||
import BetaNotice from "@/components/BetaNotice";
|
||||
|
||||
/**
|
||||
* Browse page — fetches plants with disease counts from the database
|
||||
@@ -12,6 +13,8 @@ export default async function BrowsePage() {
|
||||
const allPlants = await getBrowsePlants();
|
||||
|
||||
return (
|
||||
<>
|
||||
<BetaNotice variant="full-width" />
|
||||
<Suspense
|
||||
fallback={
|
||||
<div className="mx-auto max-w-7xl px-4 sm:px-6 lg:px-8 py-8 sm:py-12">
|
||||
@@ -34,5 +37,6 @@ export default async function BrowsePage() {
|
||||
>
|
||||
<BrowseContent allPlants={allPlants} />
|
||||
</Suspense>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
44
apps/web/src/components/BetaNotice.tsx
Normal file
44
apps/web/src/components/BetaNotice.tsx
Normal file
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* BetaNotice — a banner informing users that the site is in beta,
|
||||
* community-driven, and most data isn't reviewed by humans yet.
|
||||
* Encourages use of the Flag button to flag content for review.
|
||||
*
|
||||
* Two layout variants:
|
||||
* - "full-width" (default): stretches edge-to-edge with an inner max-w wrapper
|
||||
* - "card": rounded card with border, suitable for inside content containers
|
||||
*/
|
||||
|
||||
export default function BetaNotice({
|
||||
variant = "full-width",
|
||||
className = "",
|
||||
}: {
|
||||
variant?: "full-width" | "card";
|
||||
className?: string;
|
||||
}) {
|
||||
const containerClasses =
|
||||
variant === "card"
|
||||
? `rounded-xl bg-warning-amber-50 dark:bg-warning-amber-950/60 border border-warning-amber-200 dark:border-warning-amber-800 ${className}`
|
||||
: `bg-warning-amber-50 dark:bg-warning-amber-950/60 border-b border-warning-amber-200 dark:border-warning-amber-800 ${className}`;
|
||||
|
||||
return (
|
||||
<div className={containerClasses}>
|
||||
<div
|
||||
className={
|
||||
variant === "card" ? "px-4 sm:px-6 py-3" : "mx-auto max-w-7xl px-4 sm:px-6 lg:px-8 py-3"
|
||||
}
|
||||
>
|
||||
<p className="text-xs sm:text-sm text-warning-amber-800 dark:text-warning-amber-200 text-center leading-relaxed">
|
||||
<span className="font-semibold">🚧 Beta — Community Driven.</span> Most data here is not
|
||||
reviewed by humans. Spot something wrong or it could be better? Use the{" "}
|
||||
<span className="inline-flex items-center gap-1 font-medium whitespace-nowrap">
|
||||
<svg className="h-3.5 w-3.5" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
|
||||
<path d="M3.5 2.75a.75.75 0 00-1.5 0v14.5a.75.75 0 001.5 0v-4.392l1.657-.348a6.453 6.453 0 014.271.572 7.948 7.948 0 005.965.524l2.078-.64A.75.75 0 0018 12.25v-8.5a.75.75 0 00-.904-.734l-2.38.501a7.25 7.25 0 01-4.186-.363l-.502-.2a8.75 8.75 0 00-5.053-.439l-1.475.31V2.75z" />
|
||||
</svg>
|
||||
Flag
|
||||
</span>{" "}
|
||||
button on any image or description to flag it for review.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import ConfidenceBadge, { getConfidenceColors } from "@/components/ConfidenceBad
|
||||
import SymptomChecker from "@/components/SymptomChecker";
|
||||
import TreatmentTimeline, { treatmentStepsWithUrgency } from "@/components/TreatmentTimeline";
|
||||
import LookalikeWarning from "@/components/LookalikeWarning";
|
||||
import FlagButton from "@/components/FlagButton";
|
||||
import { getLookalikeDiseases } from "@/lib/api/diseases";
|
||||
|
||||
/**
|
||||
@@ -45,7 +46,8 @@ export default function DiseaseCard({
|
||||
<article
|
||||
className={`
|
||||
group/card relative rounded-xl border-2 overflow-hidden transition-all duration-200
|
||||
${isPrimary
|
||||
${
|
||||
isPrimary
|
||||
? `${colors.border} ${colors.bg} shadow-md`
|
||||
: "border-zinc-200 dark:border-zinc-700 bg-white dark:bg-zinc-900 shadow-sm hover:shadow-md"
|
||||
}
|
||||
@@ -53,7 +55,9 @@ export default function DiseaseCard({
|
||||
>
|
||||
{/* Primary diagnosis ribbon */}
|
||||
{isPrimary && (
|
||||
<div className={`${colors.accent} text-white text-xs font-bold uppercase tracking-wider px-4 py-1.5 flex items-center gap-2`}>
|
||||
<div
|
||||
className={`${colors.accent} text-white text-xs font-bold uppercase tracking-wider px-4 py-1.5 flex items-center gap-2`}
|
||||
>
|
||||
<svg className="h-3.5 w-3.5" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
|
||||
<path d="M9.049 2.927c.3-.921 1.603-.921 1.902 0l1.07 3.292a1 1 0 00.95.69h3.462c.969 0 1.371 1.24.588 1.81l-2.8 2.034a1 1 0 00-.364 1.118l1.07 3.292c.3.921-.755 1.688-1.54 1.118l-2.8-2.034a1 1 0 00-1.175 0l-2.8 2.034c-.784.57-1.838-.197-1.539-1.118l1.07-3.292a1 1 0 00-.364-1.118L2.98 8.72c-.783-.57-.38-1.81.588-1.81h3.461a1 1 0 00.951-.69l1.07-3.292z" />
|
||||
</svg>
|
||||
@@ -71,13 +75,16 @@ export default function DiseaseCard({
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
{/* Rank / causal agent icon */}
|
||||
<div className={`
|
||||
<div
|
||||
className={`
|
||||
flex h-9 w-9 shrink-0 items-center justify-center rounded-lg text-sm font-bold
|
||||
${isPrimary
|
||||
${
|
||||
isPrimary
|
||||
? `${colors.accent} text-white`
|
||||
: "bg-zinc-100 dark:bg-zinc-800 text-zinc-600 dark:text-zinc-400"
|
||||
}
|
||||
`}>
|
||||
`}
|
||||
>
|
||||
{rank}
|
||||
</div>
|
||||
|
||||
@@ -93,9 +100,7 @@ export default function DiseaseCard({
|
||||
<p className="mt-0.5 text-xs italic text-zinc-500 dark:text-zinc-400">
|
||||
{disease.scientificName}
|
||||
</p>
|
||||
<p className="mt-1 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2">
|
||||
{summary}
|
||||
</p>
|
||||
<p className="mt-1 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2">{summary}</p>
|
||||
</div>
|
||||
|
||||
{/* Expand/collapse chevron */}
|
||||
@@ -105,7 +110,11 @@ export default function DiseaseCard({
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path fillRule="evenodd" d="M5.22 7.22a.75.75 0 011.06 0L10 10.94l3.72-3.72a.75.75 0 111.06 1.06l-4.25 4.25a.75.75 0 01-1.06 0L5.22 8.28a.75.75 0 010-1.06z" clipRule="evenodd" />
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M5.22 7.22a.75.75 0 011.06 0L10 10.94l3.72-3.72a.75.75 0 111.06 1.06l-4.25 4.25a.75.75 0 01-1.06 0L5.22 8.28a.75.75 0 010-1.06z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
</button>
|
||||
@@ -123,9 +132,18 @@ export default function DiseaseCard({
|
||||
|
||||
{/* Full description */}
|
||||
<div>
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 mb-1">
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100">
|
||||
Description
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_description"
|
||||
contentId={disease.id}
|
||||
fieldName="description"
|
||||
label="description"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<p className="text-sm leading-relaxed text-zinc-600 dark:text-zinc-400">
|
||||
{disease.description}
|
||||
</p>
|
||||
@@ -133,17 +151,47 @@ export default function DiseaseCard({
|
||||
|
||||
{/* Symptom checker */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100">
|
||||
Symptom Checker
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_symptoms"
|
||||
contentId={disease.id}
|
||||
fieldName="symptoms"
|
||||
label="symptoms"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<SymptomChecker symptoms={disease.symptoms} />
|
||||
</div>
|
||||
|
||||
{/* Causes */}
|
||||
<div>
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 mb-2 flex items-center gap-2">
|
||||
<svg className="h-4 w-4 text-zinc-400" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
|
||||
<path fillRule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-8-3a1 1 0 00-.867.5 1 1 0 11-1.731-1A3 3 0 0113 8a3.001 3.001 0 01-2 2.83V11a1 1 0 11-2 0v-1a1 1 0 011-1 1 1 0 100-2zm0 8a1 1 0 100-2 1 1 0 000 2z" clipRule="evenodd" />
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 flex items-center gap-2">
|
||||
<svg
|
||||
className="h-4 w-4 text-zinc-400"
|
||||
viewBox="0 0 20 20"
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-8-3a1 1 0 00-.867.5 1 1 0 11-1.731-1A3 3 0 0113 8a3.001 3.001 0 01-2 2.83V11a1 1 0 11-2 0v-1a1 1 0 011-1 1 1 0 100-2zm0 8a1 1 0 100-2 1 1 0 000 2z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
Causes & Contributing Factors
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_causes"
|
||||
contentId={disease.id}
|
||||
fieldName="causes"
|
||||
label="causes"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ul className="space-y-1.5" role="list">
|
||||
{disease.causes.map((cause, i) => (
|
||||
<li key={i} className="flex items-start gap-2">
|
||||
@@ -156,12 +204,30 @@ export default function DiseaseCard({
|
||||
|
||||
{/* Treatment timeline */}
|
||||
<div>
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 mb-2 flex items-center gap-2">
|
||||
<svg className="h-4 w-4 text-leaf-green-600 dark:text-leaf-green-400" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
|
||||
<path fillRule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.857-9.809a.75.75 0 00-1.214-.882l-3.483 4.79-1.88-1.88a.75.75 0 10-1.06 1.061l2.5 2.5a.75.75 0 001.137-.089l4-5.5z" clipRule="evenodd" />
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 flex items-center gap-2">
|
||||
<svg
|
||||
className="h-4 w-4 text-leaf-green-600 dark:text-leaf-green-400"
|
||||
viewBox="0 0 20 20"
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.857-9.809a.75.75 0 00-1.214-.882l-3.483 4.79-1.88-1.88a.75.75 0 10-1.06 1.061l2.5 2.5a.75.75 0 001.137-.089l4-5.5z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
Treatment Plan
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_treatment"
|
||||
contentId={disease.id}
|
||||
fieldName="treatment"
|
||||
label="treatment"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<TreatmentTimeline
|
||||
steps={treatmentStepsWithUrgency(disease.treatment)}
|
||||
severity={disease.severity}
|
||||
@@ -170,12 +236,30 @@ export default function DiseaseCard({
|
||||
|
||||
{/* Prevention tips */}
|
||||
<div>
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 mb-2 flex items-center gap-2">
|
||||
<svg className="h-4 w-4 text-leaf-green-600 dark:text-leaf-green-400" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
|
||||
<path fillRule="evenodd" d="M6.32 2.577a49.255 49.255 0 0111.36 0c1.497.174 2.57 1.46 2.57 2.93V21a.75.75 0 01-1.085.67L10 18.089l-9.165 3.583A.75.75 0 010 21V5.507c0-1.47 1.073-2.756 2.57-2.93a49.254 49.254 0 0111.36 0zM12 9a2 2 0 11-4 0 2 2 0 014 0zm-2 3a1 1 0 00-1 1v1a1 1 0 001 1h0a1 1 0 001-1v-1a1 1 0 00-1-1z" clipRule="evenodd" />
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-sm font-semibold text-zinc-900 dark:text-zinc-100 flex items-center gap-2">
|
||||
<svg
|
||||
className="h-4 w-4 text-leaf-green-600 dark:text-leaf-green-400"
|
||||
viewBox="0 0 20 20"
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M6.32 2.577a49.255 49.255 0 0111.36 0c1.497.174 2.57 1.46 2.57 2.93V21a.75.75 0 01-1.085.67L10 18.089l-9.165 3.583A.75.75 0 010 21V5.507c0-1.47 1.073-2.756 2.57-2.93a49.254 49.254 0 0111.36 0zM12 9a2 2 0 11-4 0 2 2 0 014 0zm-2 3a1 1 0 00-1 1v1a1 1 0 001 1h0a1 1 0 001-1v-1a1 1 0 00-1-1z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
Prevention Tips
|
||||
</h4>
|
||||
<FlagButton
|
||||
contentType="disease_prevention"
|
||||
contentId={disease.id}
|
||||
fieldName="prevention"
|
||||
label="prevention tips"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
<ul className="space-y-1.5" role="list">
|
||||
{disease.prevention.map((tip, i) => (
|
||||
<li key={i} className="flex items-start gap-2">
|
||||
@@ -187,9 +271,7 @@ export default function DiseaseCard({
|
||||
</div>
|
||||
|
||||
{/* Lookalike warnings */}
|
||||
{lookalikes.length > 0 && (
|
||||
<LookalikeWarning disease={disease} lookalikes={lookalikes} />
|
||||
)}
|
||||
{lookalikes.length > 0 && <LookalikeWarning disease={disease} lookalikes={lookalikes} />}
|
||||
|
||||
{/* Feedback buttons */}
|
||||
<div className="pt-2 border-t border-zinc-200 dark:border-zinc-700">
|
||||
@@ -203,7 +285,8 @@ export default function DiseaseCard({
|
||||
className={`
|
||||
inline-flex items-center gap-1.5 rounded-lg px-3 py-1.5 text-sm font-medium
|
||||
transition-colors
|
||||
${feedback === "yes"
|
||||
${
|
||||
feedback === "yes"
|
||||
? "bg-leaf-green-100 dark:bg-leaf-green-900/50 text-leaf-green-700 dark:text-leaf-green-300 ring-1 ring-leaf-green-300 dark:ring-leaf-green-700"
|
||||
: "bg-zinc-100 dark:bg-zinc-800 text-zinc-600 dark:text-zinc-400 hover:bg-zinc-200 dark:hover:bg-zinc-700"
|
||||
}
|
||||
@@ -221,7 +304,8 @@ export default function DiseaseCard({
|
||||
className={`
|
||||
inline-flex items-center gap-1.5 rounded-lg px-3 py-1.5 text-sm font-medium
|
||||
transition-colors
|
||||
${feedback === "no"
|
||||
${
|
||||
feedback === "no"
|
||||
? "bg-red-100 dark:bg-red-900/50 text-red-700 dark:text-red-300 ring-1 ring-red-300 dark:ring-red-700"
|
||||
: "bg-zinc-100 dark:bg-zinc-800 text-zinc-600 dark:text-zinc-400 hover:bg-zinc-200 dark:hover:bg-zinc-700"
|
||||
}
|
||||
|
||||
179
apps/web/src/components/FlagButton.tsx
Normal file
179
apps/web/src/components/FlagButton.tsx
Normal file
@@ -0,0 +1,179 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback } from "react";
|
||||
|
||||
/**
|
||||
* Content types that can be flagged for manual review.
|
||||
*/
|
||||
export type FlagContentType =
|
||||
| "plant_image"
|
||||
| "disease_image"
|
||||
| "disease_description"
|
||||
| "disease_symptoms"
|
||||
| "disease_causes"
|
||||
| "disease_treatment"
|
||||
| "disease_prevention";
|
||||
|
||||
interface FlagButtonProps {
|
||||
/** Type of content being flagged */
|
||||
contentType: FlagContentType;
|
||||
/** The ID of the plant or disease */
|
||||
contentId: string;
|
||||
/** The specific field name (e.g., "image", "symptoms", "causes", "treatment", "prevention") */
|
||||
fieldName: string;
|
||||
/** Optional human-readable label for display (e.g., "This plant image") */
|
||||
label?: string;
|
||||
/** Optional notes/reason pre-filled for flagging */
|
||||
notes?: string;
|
||||
/** Small variant for inline use */
|
||||
small?: boolean;
|
||||
/** Optional class name override */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* FlagButton — a small button that lets users flag content for manual review.
|
||||
*
|
||||
* When clicked, it POSTs to /api/flag which either creates or increments
|
||||
* a flag count in the flagged_content table.
|
||||
*
|
||||
* Shows visual feedback: "Flagged!" toast-like state for a few seconds.
|
||||
*/
|
||||
export default function FlagButton({
|
||||
contentType,
|
||||
contentId,
|
||||
fieldName,
|
||||
label,
|
||||
small = false,
|
||||
className = "",
|
||||
}: FlagButtonProps) {
|
||||
const [state, setState] = useState<"idle" | "loading" | "flagged" | "error">("idle");
|
||||
const [flagCount, setFlagCount] = useState(0);
|
||||
const [errorMsg, setErrorMsg] = useState("");
|
||||
|
||||
const handleFlag = useCallback(async () => {
|
||||
if (state === "loading" || state === "flagged") return;
|
||||
|
||||
setState("loading");
|
||||
setErrorMsg("");
|
||||
|
||||
try {
|
||||
const res = await fetch("/api/flag", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ contentType, contentId, fieldName }),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({ message: "Failed to flag content" }));
|
||||
throw new Error(data.message || "Failed to flag content");
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
setFlagCount(data.flagCount ?? 1);
|
||||
setState("flagged");
|
||||
|
||||
// Reset back to idle after a moment so user can flag again if needed
|
||||
setTimeout(() => {
|
||||
setState("idle");
|
||||
}, 3000);
|
||||
} catch (err) {
|
||||
setErrorMsg(err instanceof Error ? err.message : "Failed to flag");
|
||||
setState("error");
|
||||
|
||||
setTimeout(() => {
|
||||
setState("idle");
|
||||
setErrorMsg("");
|
||||
}, 3000);
|
||||
}
|
||||
}, [contentType, contentId, fieldName, state]);
|
||||
|
||||
// ─── Button state styles ────────────────────────────────────────────────────
|
||||
|
||||
const baseClasses = small
|
||||
? "inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-xs font-medium transition-all"
|
||||
: "inline-flex items-center gap-1.5 rounded-lg px-2.5 py-1.5 text-xs font-medium transition-all";
|
||||
|
||||
const idleClasses =
|
||||
"text-zinc-400 dark:text-zinc-500 hover:text-amber-600 dark:hover:text-amber-400 hover:bg-amber-50 dark:hover:bg-amber-950/30 border border-transparent hover:border-amber-200 dark:hover:border-amber-800";
|
||||
|
||||
const loadingClasses = "text-zinc-300 dark:text-zinc-600 cursor-wait";
|
||||
|
||||
const flaggedClasses =
|
||||
"text-amber-700 dark:text-amber-300 bg-amber-50 dark:bg-amber-950/40 border border-amber-200 dark:border-amber-700";
|
||||
|
||||
const errorClasses =
|
||||
"text-red-600 dark:text-red-400 bg-red-50 dark:bg-red-950/40 border border-red-200 dark:border-red-800";
|
||||
|
||||
const stateClasses =
|
||||
state === "loading"
|
||||
? loadingClasses
|
||||
: state === "flagged"
|
||||
? flaggedClasses
|
||||
: state === "error"
|
||||
? errorClasses
|
||||
: idleClasses;
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleFlag}
|
||||
disabled={state === "loading"}
|
||||
className={`${baseClasses} ${stateClasses} ${className}`}
|
||||
title={
|
||||
state === "flagged"
|
||||
? `Flagged (${flagCount}×)`
|
||||
: state === "error"
|
||||
? errorMsg
|
||||
: `Flag for manual review${label ? ` — ${label}` : ""}`
|
||||
}
|
||||
aria-label={
|
||||
state === "flagged"
|
||||
? `Flagged for review (${flagCount} times)`
|
||||
: `Flag this ${label || "content"} for manual review`
|
||||
}
|
||||
>
|
||||
{/* Flag icon */}
|
||||
{state === "loading" ? (
|
||||
<svg
|
||||
className={`${small ? "h-3 w-3" : "h-3.5 w-3.5"} animate-spin`}
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<circle
|
||||
className="opacity-25"
|
||||
cx="12"
|
||||
cy="12"
|
||||
r="10"
|
||||
stroke="currentColor"
|
||||
strokeWidth="4"
|
||||
/>
|
||||
<path
|
||||
className="opacity-75"
|
||||
fill="currentColor"
|
||||
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||
/>
|
||||
</svg>
|
||||
) : (
|
||||
<svg
|
||||
className={`${small ? "h-3 w-3" : "h-3.5 w-3.5"}`}
|
||||
viewBox="0 0 20 20"
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path d="M3.5 2.75a.75.75 0 00-1.5 0v14.5a.75.75 0 001.5 0v-4.392l1.657-.348a6.453 6.453 0 014.271.572 7.948 7.948 0 005.965.524l2.078-.64A.75.75 0 0018 12.25v-8.5a.75.75 0 00-.904-.734l-2.38.501a7.25 7.25 0 01-4.186-.363l-.502-.2a8.75 8.75 0 00-5.053-.439l-1.475.31V2.75z" />
|
||||
</svg>
|
||||
)}
|
||||
|
||||
{/* Text */}
|
||||
{state === "flagged" ? (
|
||||
<span>Flagged{flagCount > 1 ? ` (${flagCount}×)` : ""}</span>
|
||||
) : state === "error" ? (
|
||||
<span>Error</span>
|
||||
) : (
|
||||
<span>Flag</span>
|
||||
)}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
26
apps/web/src/components/FlagPlantImage.tsx
Normal file
26
apps/web/src/components/FlagPlantImage.tsx
Normal file
@@ -0,0 +1,26 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback } from "react";
|
||||
import FlagButton from "@/components/FlagButton";
|
||||
|
||||
/**
|
||||
* Client component wrapper to add a flag button for plant images
|
||||
* on the detail page (which is a server component).
|
||||
*/
|
||||
export default function FlagPlantImage({ plantId }: { plantId: string }) {
|
||||
const handleClick = useCallback((e: React.MouseEvent) => {
|
||||
e.stopPropagation();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="absolute bottom-1 right-1 z-10" onClick={handleClick}>
|
||||
<FlagButton
|
||||
contentType="plant_image"
|
||||
contentId={plantId}
|
||||
fieldName="image"
|
||||
label="plant image"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,5 +1,9 @@
|
||||
"use client";
|
||||
|
||||
import Image from "next/image";
|
||||
import Link from "next/link";
|
||||
import FlagButton from "@/components/FlagButton";
|
||||
import { useCallback } from "react";
|
||||
|
||||
export interface PlantCardData {
|
||||
id: string;
|
||||
@@ -23,6 +27,11 @@ interface PlantCardProps {
|
||||
* Used on the homepage featured section and browse grid.
|
||||
*/
|
||||
export default function PlantCard({ plant, showDiseaseCount = true }: PlantCardProps) {
|
||||
const handleFlagClick = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Link
|
||||
href={`/browse/${plant.id}`}
|
||||
@@ -58,6 +67,16 @@ export default function PlantCard({ plant, showDiseaseCount = true }: PlantCardP
|
||||
</svg>
|
||||
</div>
|
||||
)}
|
||||
{/* Flag button overlay at bottom-right of image */}
|
||||
<div className="absolute bottom-1.5 right-1.5 z-10" onClick={handleFlagClick}>
|
||||
<FlagButton
|
||||
contentType="plant_image"
|
||||
contentId={plant.id}
|
||||
fieldName="image"
|
||||
label="plant image"
|
||||
small
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="p-4">
|
||||
|
||||
@@ -10,7 +10,14 @@ import { drizzle, type LibSQLDatabase } from "drizzle-orm/libsql";
|
||||
import { createClient } from "@libsql/client";
|
||||
import * as schema from "./schema";
|
||||
|
||||
export type { PlantRow, PlantInsert, DiseaseRow, DiseaseInsert } from "./schema";
|
||||
export type {
|
||||
PlantRow,
|
||||
PlantInsert,
|
||||
DiseaseRow,
|
||||
DiseaseInsert,
|
||||
FlaggedContentRow,
|
||||
FlaggedContentInsert,
|
||||
} from "./schema";
|
||||
|
||||
export { schema };
|
||||
|
||||
|
||||
@@ -113,6 +113,52 @@ export const plantViews = sqliteTable(
|
||||
}),
|
||||
);
|
||||
|
||||
// ─── Flagged Content Table ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Stores user-flagged content for manual review.
|
||||
* content_type: what kind of content is flagged
|
||||
* content_id: the ID of the plant or disease
|
||||
* field_name: specific field being flagged (e.g., "image", "symptoms", "causes", "treatment", "prevention")
|
||||
* flag_count: number of times this item has been flagged
|
||||
*/
|
||||
export const flaggedContent = sqliteTable(
|
||||
"flagged_content",
|
||||
{
|
||||
id: text("id").primaryKey(),
|
||||
contentType: text("content_type", {
|
||||
enum: [
|
||||
"plant_image",
|
||||
"disease_image",
|
||||
"disease_description",
|
||||
"disease_symptoms",
|
||||
"disease_causes",
|
||||
"disease_treatment",
|
||||
"disease_prevention",
|
||||
],
|
||||
}).notNull(),
|
||||
contentId: text("content_id").notNull(),
|
||||
fieldName: text("field_name").notNull(),
|
||||
notes: text("notes").default(""),
|
||||
flagCount: integer("flag_count").notNull().default(1),
|
||||
createdAt: text("created_at")
|
||||
.notNull()
|
||||
.default(sql`(datetime('now'))`),
|
||||
updatedAt: text("updated_at")
|
||||
.notNull()
|
||||
.default(sql`(datetime('now'))`),
|
||||
},
|
||||
(table) => ({
|
||||
contentTypeIdx: index("idx_flagged_content_type").on(table.contentType),
|
||||
contentIdIdx: index("idx_flagged_content_id").on(table.contentId),
|
||||
}),
|
||||
);
|
||||
|
||||
// ─── Type helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
export type FlaggedContentRow = typeof flaggedContent.$inferSelect;
|
||||
export type FlaggedContentInsert = typeof flaggedContent.$inferInsert;
|
||||
|
||||
// ─── Relation Inference ──────────────────────────────────────────────────────
|
||||
|
||||
export const plantsRelations = {};
|
||||
|
||||
@@ -173,14 +173,14 @@ describe("imageToTensor", () => {
|
||||
|
||||
describe("tensorToBase64 / base64ToTensor", () => {
|
||||
it("round-trips tensor data correctly", () => {
|
||||
const imageData = createMockImageData(224, 224, 100, 150, 200);
|
||||
const imageData = createMockImageData(160, 160, 100, 150, 200);
|
||||
const original = imageToTensor(imageData);
|
||||
|
||||
const base64 = tensorToBase64(original);
|
||||
const decoded = base64ToTensor(base64);
|
||||
|
||||
expect(decoded.tensor.length).toBe(original.length);
|
||||
expect(decoded.shape).toEqual([3, 224, 224]);
|
||||
expect(decoded.shape).toEqual([3, 160, 160]);
|
||||
|
||||
// Check a few values match
|
||||
for (let i = 0; i < 10; i++) {
|
||||
@@ -197,9 +197,9 @@ describe("tensorToBase64 / base64ToTensor", () => {
|
||||
});
|
||||
|
||||
describe("getTensorShape", () => {
|
||||
it("returns [1, 3, 224, 224] by default", () => {
|
||||
it("returns [1, 3, 160, 160] by default", () => {
|
||||
const shape = getTensorShape();
|
||||
expect(shape).toEqual([1, 3, 224, 224]);
|
||||
expect(shape).toEqual([1, 3, 160, 160]);
|
||||
});
|
||||
|
||||
it("returns NCHW layout", () => {
|
||||
@@ -207,8 +207,8 @@ describe("getTensorShape", () => {
|
||||
expect(shape.length).toBe(4);
|
||||
expect(shape[0]).toBe(1); // batch
|
||||
expect(shape[1]).toBe(3); // channels
|
||||
expect(shape[2]).toBe(224); // height
|
||||
expect(shape[3]).toBe(224); // width
|
||||
expect(shape[2]).toBe(160); // height (model input size)
|
||||
expect(shape[3]).toBe(160); // width (model input size)
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -48,12 +48,7 @@ export const MAX_FILE_SIZE = 10 * 1024 * 1024;
|
||||
export const MIN_DIMENSION = 150;
|
||||
|
||||
/** Allowed MIME types */
|
||||
export const ALLOWED_MIME_TYPES = [
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
"image/jpg",
|
||||
"image/webp",
|
||||
] as const;
|
||||
export const ALLOWED_MIME_TYPES = ["image/png", "image/jpeg", "image/jpg", "image/webp"] as const;
|
||||
|
||||
export type AllowedMimeType = (typeof ALLOWED_MIME_TYPES)[number];
|
||||
|
||||
@@ -66,9 +61,7 @@ export const MAX_UPLOADS = 100;
|
||||
* Validate that a file is an acceptable image for upload.
|
||||
* Returns `{ ok: true }` or `{ ok: false, error: string }`.
|
||||
*/
|
||||
export function validateImageFile(file: File):
|
||||
| { ok: true }
|
||||
| { ok: false; error: string } {
|
||||
export function validateImageFile(file: File): { ok: true } | { ok: false; error: string } {
|
||||
// MIME type check
|
||||
if (!ALLOWED_MIME_TYPES.includes(file.type as AllowedMimeType)) {
|
||||
return {
|
||||
@@ -127,10 +120,7 @@ export function validateImageDimensions(
|
||||
* @param size - Target dimension (square). Defaults to IMAGE_MODEL_SIZE env or 224.
|
||||
* @returns ImageData at exactly `size × size`
|
||||
*/
|
||||
export async function resizeImage(
|
||||
file: File,
|
||||
size: number = getConfig().size,
|
||||
): Promise<ImageData> {
|
||||
export async function resizeImage(file: File, size: number = getConfig().size): Promise<ImageData> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const img = new Image();
|
||||
img.onload = () => {
|
||||
@@ -193,8 +183,7 @@ export function imageToTensor(imageData: ImageData): Float32Array {
|
||||
|
||||
// Normalize with ImageNet mean/std
|
||||
for (let c = 0; c < 3; c++) {
|
||||
const channel =
|
||||
c === 0 ? rChannel : c === 1 ? gChannel : bChannel;
|
||||
const channel = c === 0 ? rChannel : c === 1 ? gChannel : bChannel;
|
||||
const m = mean[c];
|
||||
const s = std[c];
|
||||
for (let i = 0; i < totalPixels; i++) {
|
||||
@@ -253,5 +242,3 @@ export function base64ToTensor(base64: string): {
|
||||
shape: envelope.shape as [number, number, number],
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ describe("createZeroTensor", () => {
|
||||
|
||||
it("all values are zero", () => {
|
||||
const tensor = createZeroTensor();
|
||||
expect(tensor.every(v => v === 0)).toBe(true);
|
||||
expect(tensor.every((v) => v === 0)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -114,12 +114,12 @@ describe("createRandomTensor", () => {
|
||||
|
||||
it("all values are finite", () => {
|
||||
const tensor = createRandomTensor();
|
||||
expect(tensor.every(v => Number.isFinite(v))).toBe(true);
|
||||
expect(tensor.every((v) => Number.isFinite(v))).toBe(true);
|
||||
});
|
||||
|
||||
it("produces varied values", () => {
|
||||
const tensor = createRandomTensor();
|
||||
const uniqueValues = new Set(tensor.map(v => v.toFixed(4)));
|
||||
const uniqueValues = new Set(tensor.map((v) => v.toFixed(4)));
|
||||
expect(uniqueValues.size).toBeGreaterThan(100);
|
||||
});
|
||||
|
||||
@@ -172,7 +172,7 @@ describe("runInference", () => {
|
||||
const result = await runInference(tensor);
|
||||
for (let i = 0; i < result.predictions.length - 1; i++) {
|
||||
expect(result.predictions[i].probability).toBeGreaterThanOrEqual(
|
||||
result.predictions[i + 1].probability
|
||||
result.predictions[i + 1].probability,
|
||||
);
|
||||
}
|
||||
}, 10000);
|
||||
|
||||
@@ -69,9 +69,7 @@ export async function runInference(
|
||||
*/
|
||||
export function validateInput(tensor: Float32Array): void {
|
||||
if (!(tensor instanceof Float32Array)) {
|
||||
throw new Error(
|
||||
`Expected Float32Array input, got ${typeof tensor}`,
|
||||
);
|
||||
throw new Error(`Expected Float32Array input, got ${typeof tensor}`);
|
||||
}
|
||||
|
||||
if (tensor.length !== INPUT_SIZE) {
|
||||
@@ -84,9 +82,7 @@ export function validateInput(tensor: Float32Array): void {
|
||||
// Check for NaN/Infinity values
|
||||
for (let i = 0; i < tensor.length; i++) {
|
||||
if (!Number.isFinite(tensor[i])) {
|
||||
throw new Error(
|
||||
`Tensor contains non-finite value at index ${i}: ${tensor[i]}`,
|
||||
);
|
||||
throw new Error(`Tensor contains non-finite value at index ${i}: ${tensor[i]}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
/**
|
||||
* Unit tests for lib/ml/labels.ts
|
||||
*
|
||||
* Tests:
|
||||
* - INDEX_TO_DISEASE_ID maps index 0 to "healthy"
|
||||
* - INDEX_TO_DISEASE_ID maps last index to "unknown"
|
||||
* - INDEX_TO_DISEASE_ID maps intermediate indices to disease IDs
|
||||
* - DISEASE_ID_TO_INDEX is inverse of INDEX_TO_DISEASE_ID
|
||||
* - getDiseaseIdForIndex returns "unknown" for out-of-range
|
||||
* - getIndexForDiseaseId returns -1 for unknown ID
|
||||
* - isRealDisease correctly classifies healthy/unknown vs real diseases
|
||||
* - getAllDiseaseIds returns all disease IDs from knowledge base
|
||||
* - NUM_CLASSES equals expected count (diseases + healthy + unknown)
|
||||
* - Bidirectional mapping consistency
|
||||
* The model has 38 PlantVillage classes. Some map to the app's
|
||||
* knowledge base disease IDs, others map to "unknown".
|
||||
*
|
||||
* Known mappings:
|
||||
* - indices 3, 4, 6, 10, 14, 17, 19, 22, 23, 24, 27, 37 → "healthy"
|
||||
* - index 20 (Potato___Early_blight) → "early-blight"
|
||||
* - index 21 (Potato___Late_blight) → "late-blight"
|
||||
* - index 25 (Squash___Powdery_mildew) → "squash-powdery-mildew"
|
||||
* - index 26 (Strawberry___Leaf_scorch) → "strawberry-leaf-scorch"
|
||||
* - index 28 (Tomato___Bacterial_spot) → "bacterial-leaf-spot-tomato"
|
||||
* - index 29 (Tomato___Early_blight) → "early-blight" (duplicate)
|
||||
* - index 30 (Tomato___Late_blight) → "late-blight" (duplicate)
|
||||
* - index 32 (Tomato___Septoria_leaf_spot) → "septoria-leaf-spot"
|
||||
* - index 37 (Tomato___healthy) → "healthy"
|
||||
* - all others → "unknown"
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
@@ -23,143 +27,105 @@ import {
|
||||
isRealDisease,
|
||||
getAllDiseaseIds,
|
||||
NUM_CLASSES,
|
||||
HEALTHY_INDEX,
|
||||
FIRST_DISEASE_INDEX,
|
||||
UNKNOWN_INDEX,
|
||||
getPlantVillageClassName,
|
||||
} from "@/lib/ml/labels";
|
||||
import rawDiseases from "@/data/diseases.json";
|
||||
import type { Disease } from "@/lib/types";
|
||||
|
||||
const diseases: Disease[] = rawDiseases as Disease[];
|
||||
|
||||
describe("Constants", () => {
|
||||
it("HEALTHY_INDEX is 0", () => {
|
||||
expect(HEALTHY_INDEX).toBe(0);
|
||||
it("NUM_CLASSES is 38 (PlantVillage)", () => {
|
||||
expect(NUM_CLASSES).toBe(38);
|
||||
});
|
||||
|
||||
it("FIRST_DISEASE_INDEX is 1", () => {
|
||||
expect(FIRST_DISEASE_INDEX).toBe(1);
|
||||
});
|
||||
|
||||
it("UNKNOWN_INDEX is 1 + number of diseases", () => {
|
||||
expect(UNKNOWN_INDEX).toBe(1 + diseases.length);
|
||||
});
|
||||
|
||||
it("NUM_CLASSES is UNKNOWN_INDEX + 1", () => {
|
||||
expect(NUM_CLASSES).toBe(UNKNOWN_INDEX + 1);
|
||||
});
|
||||
|
||||
it("NUM_CLASSES equals diseases.length + 2 (healthy + unknown)", () => {
|
||||
expect(NUM_CLASSES).toBe(diseases.length + 2);
|
||||
it("all 38 indices are mapped", () => {
|
||||
const keys = Object.keys(INDEX_TO_DISEASE_ID).map(Number);
|
||||
expect(keys.length).toBe(38);
|
||||
for (let i = 0; i < 38; i++) {
|
||||
expect(keys).toContain(i);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("INDEX_TO_DISEASE_ID", () => {
|
||||
it("maps index 0 to 'healthy'", () => {
|
||||
expect(INDEX_TO_DISEASE_ID[0]).toBe("healthy");
|
||||
});
|
||||
describe("INDEX_TO_DISEASE_ID — healthy indices", () => {
|
||||
const healthyIndices = [3, 4, 6, 10, 14, 17, 19, 22, 23, 24, 27, 37];
|
||||
|
||||
it("maps last index to 'unknown'", () => {
|
||||
expect(INDEX_TO_DISEASE_ID[NUM_CLASSES - 1]).toBe("unknown");
|
||||
for (const idx of healthyIndices) {
|
||||
it(`index ${idx} maps to "healthy"`, () => {
|
||||
expect(INDEX_TO_DISEASE_ID[idx]).toBe("healthy");
|
||||
});
|
||||
|
||||
it("maps intermediate indices to disease IDs", () => {
|
||||
// Index 1 should be the first disease
|
||||
expect(INDEX_TO_DISEASE_ID[1]).toBe(diseases[0].id);
|
||||
// Index 2 should be the second disease
|
||||
expect(INDEX_TO_DISEASE_ID[2]).toBe(diseases[1].id);
|
||||
// Last disease index
|
||||
expect(INDEX_TO_DISEASE_ID[diseases.length]).toBe(diseases[diseases.length - 1].id);
|
||||
});
|
||||
|
||||
it("has exactly NUM_CLASSES entries", () => {
|
||||
const keys = Object.keys(INDEX_TO_DISEASE_ID);
|
||||
expect(keys.length).toBe(NUM_CLASSES);
|
||||
});
|
||||
|
||||
it("all mapped IDs are valid strings", () => {
|
||||
for (const id of Object.values(INDEX_TO_DISEASE_ID)) {
|
||||
expect(typeof id).toBe("string");
|
||||
expect(id.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
describe("INDEX_TO_DISEASE_ID — known disease mappings", () => {
|
||||
const cases: Array<{ index: number; expected: string; name: string }> = [
|
||||
{ index: 20, expected: "early-blight", name: "Potato___Early_blight" },
|
||||
{ index: 21, expected: "late-blight", name: "Potato___Late_blight" },
|
||||
{ index: 25, expected: "squash-powdery-mildew", name: "Squash___Powdery_mildew" },
|
||||
{ index: 26, expected: "strawberry-leaf-scorch", name: "Strawberry___Leaf_scorch" },
|
||||
{ index: 28, expected: "bacterial-leaf-spot-tomato", name: "Tomato___Bacterial_spot" },
|
||||
{ index: 29, expected: "early-blight", name: "Tomato___Early_blight" },
|
||||
{ index: 30, expected: "late-blight", name: "Tomato___Late_blight" },
|
||||
{ index: 32, expected: "septoria-leaf-spot", name: "Tomato___Septoria_leaf_spot" },
|
||||
];
|
||||
|
||||
for (const { index, expected, name } of cases) {
|
||||
it(`index ${index} (${name}) maps to "${expected}"`, () => {
|
||||
expect(INDEX_TO_DISEASE_ID[index]).toBe(expected);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
describe("INDEX_TO_DISEASE_ID — unknown (unmapped) indices", () => {
|
||||
const unknownIndices = [0, 1, 2, 5, 7, 8, 9, 11, 12, 13, 15, 16, 18, 31, 33, 34, 35, 36];
|
||||
|
||||
for (const idx of unknownIndices) {
|
||||
it(`index ${idx} maps to "unknown"`, () => {
|
||||
expect(INDEX_TO_DISEASE_ID[idx]).toBe("unknown");
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
describe("DISEASE_ID_TO_INDEX", () => {
|
||||
it("maps 'healthy' to index 0", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["healthy"]).toBe(0);
|
||||
it("maps 'early-blight' to first occurrence (index 20)", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["early-blight"]).toBe(20);
|
||||
});
|
||||
|
||||
it("maps 'unknown' to last index", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["unknown"]).toBe(NUM_CLASSES - 1);
|
||||
it("maps 'late-blight' to first occurrence (index 21)", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["late-blight"]).toBe(21);
|
||||
});
|
||||
|
||||
it("maps disease IDs to correct indices", () => {
|
||||
for (let i = 0; i < diseases.length; i++) {
|
||||
expect(DISEASE_ID_TO_INDEX[diseases[i].id]).toBe(FIRST_DISEASE_INDEX + i);
|
||||
}
|
||||
it("maps 'septoria-leaf-spot' to index 32", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["septoria-leaf-spot"]).toBe(32);
|
||||
});
|
||||
|
||||
it("has exactly NUM_CLASSES entries", () => {
|
||||
const keys = Object.keys(DISEASE_ID_TO_INDEX);
|
||||
expect(keys.length).toBe(NUM_CLASSES);
|
||||
it("maps 'healthy' to index 3 (first healthy index)", () => {
|
||||
expect(DISEASE_ID_TO_INDEX["healthy"]).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Bidirectional mapping", () => {
|
||||
it("INDEX_TO_DISEASE_ID and DISEASE_ID_TO_INDEX are inverses", () => {
|
||||
for (const [idxStr, id] of Object.entries(INDEX_TO_DISEASE_ID)) {
|
||||
const idx = parseInt(idxStr);
|
||||
expect(DISEASE_ID_TO_INDEX[id]).toBe(idx);
|
||||
}
|
||||
});
|
||||
|
||||
it("round-trips for all disease IDs", () => {
|
||||
for (const [id, idx] of Object.entries(DISEASE_ID_TO_INDEX)) {
|
||||
expect(INDEX_TO_DISEASE_ID[idx]).toBe(id);
|
||||
}
|
||||
});
|
||||
|
||||
it("round-trips for all indices", () => {
|
||||
it("every index round-trips correctly", () => {
|
||||
for (let i = 0; i < NUM_CLASSES; i++) {
|
||||
const id = INDEX_TO_DISEASE_ID[i];
|
||||
expect(DISEASE_ID_TO_INDEX[id]).toBe(i);
|
||||
const idx = DISEASE_ID_TO_INDEX[id];
|
||||
expect(INDEX_TO_DISEASE_ID[idx]).toBe(id);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("getDiseaseIdForIndex", () => {
|
||||
it("returns 'healthy' for index 0", () => {
|
||||
expect(getDiseaseIdForIndex(0)).toBe("healthy");
|
||||
});
|
||||
|
||||
it("returns disease ID for valid disease index", () => {
|
||||
expect(getDiseaseIdForIndex(1)).toBe(diseases[0].id);
|
||||
});
|
||||
|
||||
it("returns 'unknown' for out-of-range positive index", () => {
|
||||
expect(getDiseaseIdForIndex(1000)).toBe("unknown");
|
||||
expect(getDiseaseIdForIndex(100)).toBe("unknown");
|
||||
});
|
||||
|
||||
it("returns 'unknown' for negative index", () => {
|
||||
expect(getDiseaseIdForIndex(-1)).toBe("unknown");
|
||||
});
|
||||
|
||||
it("returns 'unknown' for index past NUM_CLASSES", () => {
|
||||
expect(getDiseaseIdForIndex(NUM_CLASSES + 10)).toBe("unknown");
|
||||
it("returns correct ID for valid index", () => {
|
||||
expect(getDiseaseIdForIndex(20)).toBe("early-blight");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getIndexForDiseaseId", () => {
|
||||
it("returns 0 for 'healthy'", () => {
|
||||
expect(getIndexForDiseaseId("healthy")).toBe(0);
|
||||
});
|
||||
|
||||
it("returns correct index for known disease", () => {
|
||||
const idx = getIndexForDiseaseId(diseases[0].id);
|
||||
expect(idx).toBe(1);
|
||||
});
|
||||
|
||||
it("returns -1 for unknown disease ID", () => {
|
||||
expect(getIndexForDiseaseId("nonexistent-disease")).toBe(-1);
|
||||
});
|
||||
@@ -169,9 +135,7 @@ describe("getIndexForDiseaseId", () => {
|
||||
});
|
||||
|
||||
it("is case-insensitive", () => {
|
||||
const lowerIdx = getIndexForDiseaseId(diseases[0].id);
|
||||
const upperIdx = getIndexForDiseaseId(diseases[0].id.toUpperCase());
|
||||
expect(upperIdx).toBe(lowerIdx);
|
||||
expect(getIndexForDiseaseId("EARLY-BLIGHT")).toBe(20);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -184,10 +148,9 @@ describe("isRealDisease", () => {
|
||||
expect(isRealDisease("unknown")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns true for actual disease IDs", () => {
|
||||
for (const disease of diseases) {
|
||||
expect(isRealDisease(disease.id)).toBe(true);
|
||||
}
|
||||
it("returns true for known disease IDs", () => {
|
||||
expect(isRealDisease("early-blight")).toBe(true);
|
||||
expect(isRealDisease("septoria-leaf-spot")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for arbitrary non-special strings", () => {
|
||||
@@ -195,27 +158,37 @@ describe("isRealDisease", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("getPlantVillageClassName", () => {
|
||||
it("returns correct class name for tomato healthy", () => {
|
||||
expect(getPlantVillageClassName(37)).toBe("Tomato___healthy");
|
||||
});
|
||||
|
||||
it("returns correct class name for potato early blight", () => {
|
||||
expect(getPlantVillageClassName(20)).toBe("Potato___Early_blight");
|
||||
});
|
||||
|
||||
it("returns 'unknown' for out-of-range index", () => {
|
||||
expect(getPlantVillageClassName(100)).toBe("unknown");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getAllDiseaseIds", () => {
|
||||
it("returns array of all disease IDs", () => {
|
||||
it("returns only mapped disease IDs", () => {
|
||||
const ids = getAllDiseaseIds();
|
||||
expect(ids.length).toBe(diseases.length);
|
||||
expect(ids).toContain("early-blight");
|
||||
expect(ids).toContain("late-blight");
|
||||
expect(ids).toContain("squash-powdery-mildew");
|
||||
expect(ids).toContain("strawberry-leaf-scorch");
|
||||
expect(ids).toContain("bacterial-leaf-spot-tomato");
|
||||
expect(ids).toContain("septoria-leaf-spot");
|
||||
});
|
||||
|
||||
it("excludes 'healthy'", () => {
|
||||
const ids = getAllDiseaseIds();
|
||||
expect(ids).not.toContain("healthy");
|
||||
expect(getAllDiseaseIds()).not.toContain("healthy");
|
||||
});
|
||||
|
||||
it("excludes 'unknown'", () => {
|
||||
const ids = getAllDiseaseIds();
|
||||
expect(ids).not.toContain("unknown");
|
||||
});
|
||||
|
||||
it("includes all disease IDs from knowledge base", () => {
|
||||
const ids = getAllDiseaseIds();
|
||||
for (const disease of diseases) {
|
||||
expect(ids).toContain(disease.id);
|
||||
}
|
||||
expect(getAllDiseaseIds()).not.toContain("unknown");
|
||||
});
|
||||
|
||||
it("has no duplicates", () => {
|
||||
|
||||
@@ -1,74 +1,197 @@
|
||||
/**
|
||||
* Class label mapping for the plant disease classifier model.
|
||||
*
|
||||
* Maps model output index → disease ID string.
|
||||
* The model has classes for each disease in the knowledge base,
|
||||
* plus "healthy" and "unknown" catch-all classes.
|
||||
* This model is a MobileNetV2 trained on the PlantVillage dataset
|
||||
* with 38 classes (14 crops × diseases/healthy).
|
||||
*
|
||||
* Model output shape: [1, NUM_CLASSES] where NUM_CLASSES = 95
|
||||
* (93 diseases + "healthy" + "unknown")
|
||||
* Model output shape: [1, NUM_CLASSES] where NUM_CLASSES = 38
|
||||
*
|
||||
* Index layout:
|
||||
* 0 → "healthy"
|
||||
* 1–93 → disease IDs (order matches diseases.json)
|
||||
* 94 → "unknown"
|
||||
* Index layout (from labels_pv_original.json):
|
||||
* 0 → Apple___Apple_scab
|
||||
* 1 → Apple___Black_rot
|
||||
* 2 → Apple___Cedar_apple_rust
|
||||
* 3 → Apple___healthy
|
||||
* 4 → Blueberry___healthy
|
||||
* 5 → Cherry_(including_sour)___Powdery_mildew
|
||||
* 6 → Cherry_(including_sour)___healthy
|
||||
* 7 → Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot
|
||||
* 8 → Corn_(maize)___Common_rust_
|
||||
* 9 → Corn_(maize)___Northern_Leaf_Blight
|
||||
* 10 → Corn_(maize)___healthy
|
||||
* 11 → Grape___Black_rot
|
||||
* 12 → Grape___Esca_(Black_Measles)
|
||||
* 13 → Grape___Leaf_blight_(Isariopsis_Leaf_Spot)
|
||||
* 14 → Grape___healthy
|
||||
* 15 → Orange___Haunglongbing_(Citrus_greening)
|
||||
* 16 → Peach___Bacterial_spot
|
||||
* 17 → Peach___healthy
|
||||
* 18 → Pepper,_bell___Bacterial_spot
|
||||
* 19 → Pepper,_bell___healthy
|
||||
* 20 → Potato___Early_blight
|
||||
* 21 → Potato___Late_blight
|
||||
* 22 → Potato___healthy
|
||||
* 23 → Raspberry___healthy
|
||||
* 24 → Soybean___healthy
|
||||
* 25 → Squash___Powdery_mildew
|
||||
* 26 → Strawberry___Leaf_scorch
|
||||
* 27 → Strawberry___healthy
|
||||
* 28 → Tomato___Bacterial_spot
|
||||
* 29 → Tomato___Early_blight
|
||||
* 30 → Tomato___Late_blight
|
||||
* 31 → Tomato___Leaf_Mold
|
||||
* 32 → Tomato___Septoria_leaf_spot
|
||||
* 33 → Tomato___Spider_mites Two-spotted_spider_mite
|
||||
* 34 → Tomato___Target_Spot
|
||||
* 35 → Tomato___Tomato_Yellow_Leaf_Curl_Virus
|
||||
* 36 → Tomato___Tomato_mosaic_virus
|
||||
* 37 → Tomato___healthy
|
||||
*
|
||||
* Some PlantVillage classes overlap with this app's knowledge base.
|
||||
* Exact class name → disease ID mappings:
|
||||
* Potato___Early_blight → "early-blight"
|
||||
* Potato___Late_blight → "late-blight"
|
||||
* Squash___Powdery_mildew → "squash-powdery-mildew"
|
||||
* Strawberry___Leaf_scorch → "strawberry-leaf-scorch"
|
||||
* Tomato___Bacterial_spot → "bacterial-leaf-spot-tomato"
|
||||
* Tomato___Early_blight → "early-blight"
|
||||
* Tomato___Late_blight → "late-blight"
|
||||
* Tomato___Septoria_leaf_spot → "septoria-leaf-spot"
|
||||
* All other classes map to "unknown" and are filtered out during enrichment.
|
||||
*
|
||||
* After fine-tuning to the app's 93 disease classes, this file will be
|
||||
* rewritten to match the new model's output layer.
|
||||
*/
|
||||
|
||||
import rawDiseases from "@/data/diseases.json";
|
||||
import type { Disease } from "@/lib/types";
|
||||
// ─── PlantVillage class names (in model output order) ────────────────────
|
||||
|
||||
const diseases: Disease[] = rawDiseases as Disease[];
|
||||
const PLANTVILLAGE_CLASSES: string[] = [
|
||||
"Apple___Apple_scab",
|
||||
"Apple___Black_rot",
|
||||
"Apple___Cedar_apple_rust",
|
||||
"Apple___healthy",
|
||||
"Blueberry___healthy",
|
||||
"Cherry_(including_sour)___Powdery_mildew",
|
||||
"Cherry_(including_sour)___healthy",
|
||||
"Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot",
|
||||
"Corn_(maize)___Common_rust_",
|
||||
"Corn_(maize)___Northern_Leaf_Blight",
|
||||
"Corn_(maize)___healthy",
|
||||
"Grape___Black_rot",
|
||||
"Grape___Esca_(Black_Measles)",
|
||||
"Grape___Leaf_blight_(Isariopsis_Leaf_Spot)",
|
||||
"Grape___healthy",
|
||||
"Orange___Haunglongbing_(Citrus_greening)",
|
||||
"Peach___Bacterial_spot",
|
||||
"Peach___healthy",
|
||||
"Pepper,_bell___Bacterial_spot",
|
||||
"Pepper,_bell___healthy",
|
||||
"Potato___Early_blight",
|
||||
"Potato___Late_blight",
|
||||
"Potato___healthy",
|
||||
"Raspberry___healthy",
|
||||
"Soybean___healthy",
|
||||
"Squash___Powdery_mildew",
|
||||
"Strawberry___Leaf_scorch",
|
||||
"Strawberry___healthy",
|
||||
"Tomato___Bacterial_spot",
|
||||
"Tomato___Early_blight",
|
||||
"Tomato___Late_blight",
|
||||
"Tomato___Leaf_Mold",
|
||||
"Tomato___Septoria_leaf_spot",
|
||||
"Tomato___Spider_mites Two-spotted_spider_mite",
|
||||
"Tomato___Target_Spot",
|
||||
"Tomato___Tomato_Yellow_Leaf_Curl_Virus",
|
||||
"Tomato___Tomato_mosaic_virus",
|
||||
"Tomato___healthy",
|
||||
] as const;
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
/** Index for the "healthy" class */
|
||||
export const HEALTHY_INDEX = 0;
|
||||
|
||||
/** First index for actual disease classes */
|
||||
export const FIRST_DISEASE_INDEX = 1;
|
||||
|
||||
/** Index for the "unknown" catch-all class */
|
||||
export const UNKNOWN_INDEX = 1 + diseases.length;
|
||||
|
||||
/** Total number of output classes */
|
||||
export const NUM_CLASSES = UNKNOWN_INDEX + 1;
|
||||
|
||||
// ─── Index → Disease ID mapping ──────────────────────────────────────────────
|
||||
// ─── PlantVillage → App disease ID mapping ──────────────────────────────
|
||||
|
||||
/**
|
||||
* Map from model output index to disease ID string.
|
||||
* Index 0 = "healthy", indices 1..N = disease IDs, last = "unknown".
|
||||
* Maps PlantVillage class names (in the form "Plant___Disease") to
|
||||
* this app's disease IDs. Unmapped classes resolve to "unknown".
|
||||
*/
|
||||
function plantVillageNameToDiseaseId(pvName: string): string {
|
||||
const parts = pvName.split("___");
|
||||
if (parts.length !== 2) {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
const disease = parts[1];
|
||||
|
||||
// Detect "healthy" variants
|
||||
if (disease === "healthy") {
|
||||
return "healthy";
|
||||
}
|
||||
|
||||
// Map exact PlantVillage class names to our disease IDs.
|
||||
// Only map classes where we're confident the correspondence holds.
|
||||
const exactMap: Record<string, string> = {
|
||||
Squash___Powdery_mildew: "squash-powdery-mildew",
|
||||
Strawberry___Leaf_scorch: "strawberry-leaf-scorch",
|
||||
Potato___Early_blight: "early-blight",
|
||||
Potato___Late_blight: "late-blight",
|
||||
Tomato___Bacterial_spot: "bacterial-leaf-spot-tomato",
|
||||
Tomato___Early_blight: "early-blight",
|
||||
Tomato___Late_blight: "late-blight",
|
||||
Tomato___Septoria_leaf_spot: "septoria-leaf-spot",
|
||||
};
|
||||
|
||||
return exactMap[pvName] ?? "unknown";
|
||||
}
|
||||
|
||||
// ─── Constants ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Total number of model output classes */
|
||||
export const NUM_CLASSES = PLANTVILLAGE_CLASSES.length; // 38
|
||||
|
||||
/** Index for the "healthy" class — multiple PV indices map to this */
|
||||
export const HEALTHY_INDEX = 0; // First PV healthy class, others also map to this string
|
||||
|
||||
/** First disease index (unused in PV mapping, kept for compatibility) */
|
||||
export const FIRST_DISEASE_INDEX = 0;
|
||||
|
||||
/** Index for the "unknown" catch-all — PV classes we can't map */
|
||||
export const UNKNOWN_INDEX = NUM_CLASSES - 1; // 37 (Tomato___healthy maps to "healthy", not unknown)
|
||||
|
||||
// ─── Index → Disease ID mapping ─────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Map from model output index to app disease ID string.
|
||||
* Built dynamically from PlantVillage class names.
|
||||
*/
|
||||
export const INDEX_TO_DISEASE_ID: Record<number, string> = Object.freeze(
|
||||
(() => {
|
||||
const map: Record<number, string> = {};
|
||||
map[HEALTHY_INDEX] = "healthy";
|
||||
for (let i = 0; i < diseases.length; i++) {
|
||||
map[FIRST_DISEASE_INDEX + i] = diseases[i].id;
|
||||
for (let i = 0; i < NUM_CLASSES; i++) {
|
||||
map[i] = plantVillageNameToDiseaseId(PLANTVILLAGE_CLASSES[i]);
|
||||
}
|
||||
map[UNKNOWN_INDEX] = "unknown";
|
||||
return map;
|
||||
})(),
|
||||
);
|
||||
|
||||
// ─── Disease ID → Index mapping ──────────────────────────────────────────────
|
||||
// ─── Disease ID → Index mapping ─────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Map from disease ID string to model output index.
|
||||
* For duplicates (e.g., both potato and tomato "Early_blight" → "early-blight"),
|
||||
* returns the first matching index.
|
||||
*/
|
||||
export const DISEASE_ID_TO_INDEX: Record<string, number> = Object.freeze(
|
||||
(() => {
|
||||
const map: Record<string, number> = {};
|
||||
map["healthy"] = HEALTHY_INDEX;
|
||||
for (let i = 0; i < diseases.length; i++) {
|
||||
map[diseases[i].id] = FIRST_DISEASE_INDEX + i;
|
||||
for (let i = 0; i < NUM_CLASSES; i++) {
|
||||
const id = INDEX_TO_DISEASE_ID[i];
|
||||
// First occurrence wins (potato before tomato for early/late blight)
|
||||
if (map[id] === undefined) {
|
||||
map[id] = i;
|
||||
}
|
||||
}
|
||||
map["unknown"] = UNKNOWN_INDEX;
|
||||
return map;
|
||||
})(),
|
||||
);
|
||||
|
||||
// ─── Lookup helpers ──────────────────────────────────────────────────────────
|
||||
// ─── Lookup helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get the disease ID for a given model output index.
|
||||
@@ -93,9 +216,22 @@ export function isRealDisease(diseaseId: string): boolean {
|
||||
return diseaseId !== "healthy" && diseaseId !== "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the PlantVillage display name for a given model output index.
|
||||
*/
|
||||
export function getPlantVillageClassName(index: number): string {
|
||||
return PLANTVILLAGE_CLASSES[index] ?? "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all known disease IDs (excluding "healthy" and "unknown").
|
||||
*/
|
||||
export function getAllDiseaseIds(): string[] {
|
||||
return diseases.map((d) => d.id);
|
||||
const ids = new Set<string>();
|
||||
for (const id of Object.values(INDEX_TO_DISEASE_ID)) {
|
||||
if (id !== "healthy" && id !== "unknown") {
|
||||
ids.add(id);
|
||||
}
|
||||
}
|
||||
return Array.from(ids);
|
||||
}
|
||||
|
||||
@@ -93,7 +93,10 @@ export async function getModel(): Promise<PlantDiseaseModel> {
|
||||
const model = await Promise.race([
|
||||
loadingPromise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error(`Model load timed out after ${MODEL_LOAD_TIMEOUT}ms`)), MODEL_LOAD_TIMEOUT),
|
||||
setTimeout(
|
||||
() => reject(new Error(`Model load timed out after ${MODEL_LOAD_TIMEOUT}ms`)),
|
||||
MODEL_LOAD_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
@@ -172,6 +175,18 @@ async function tryLoadTFJS(): Promise<PlantDiseaseModel | null> {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let tf: any;
|
||||
|
||||
// Monkey-patch: add util.isNullOrUndefined for Node.js 26 compatibility.
|
||||
// @tensorflow/tfjs-node references this function which was removed in Node 15+.
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const nodeUtil = require("util");
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
if (typeof (nodeUtil as any).isNullOrUndefined !== "function") {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(nodeUtil as any).isNullOrUndefined = function (x: unknown): boolean {
|
||||
return x === null || x === undefined;
|
||||
};
|
||||
}
|
||||
|
||||
// Try tfjs-node first (server-side, uses native bindings).
|
||||
// Use dynamic strings so bundlers (Turbopack/webpack) don't trace these
|
||||
// as required dependencies — they are truly optional.
|
||||
@@ -197,7 +212,9 @@ async function tryLoadTFJS(): Promise<PlantDiseaseModel | null> {
|
||||
const startTime = performance.now();
|
||||
|
||||
// Reshape to [1, 3, 160, 160] NCHW → [1, 160, 160, 3] NHWC for TF.js
|
||||
const inputTensor = tf.tensor4d(Array.from(tensor), [3, 160, 160])
|
||||
// Reshape NCHW flat array [3*160*160] → [3, 160, 160] → NHWC [1, 160, 160, 3]
|
||||
const inputTensor = tf
|
||||
.tensor3d(Array.from(tensor), [3, 160, 160])
|
||||
.transpose([1, 2, 0])
|
||||
.expandDims(0);
|
||||
|
||||
@@ -352,7 +369,7 @@ function generateMockLogits(tensor: Float32Array): Float32Array {
|
||||
logits[topIndex] = 3.5;
|
||||
|
||||
// Second highest
|
||||
const secondIndex = (topIndex + Math.abs(hash % 10) + 1) % (numClasses - 1) + 1;
|
||||
const secondIndex = ((topIndex + Math.abs(hash % 10) + 1) % (numClasses - 1)) + 1;
|
||||
logits[secondIndex] = 2.5;
|
||||
|
||||
logits[numClasses - 1] = -2; // "unknown" gets low score
|
||||
|
||||
Reference in New Issue
Block a user