pre torch.compile -chkpoint made

2026-06-16 10:40:38 -05:00
parent 34855eff55
commit 6650d3c5ea
19 changed files with 2519 additions and 0 deletions
--- a/checkpoints/species_warmup/species_warmup_epoch=04.pt
+++ b/checkpoints/species_warmup/species_warmup_epoch=04.pt
--- a/data/.gitignore
+++ b/data/.gitignore
@@ -1 +1,2 @@
 dataset
+organized
--- a/launch-training.sh
+++ b/launch-training.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Launch hierarchical training — Stage A (species classifier)
+# Batch 1536 to utilize the 78GB free memory
+
+cd /home/mike/Plant-Health-ID
+source .venv/bin/activate
+mkdir -p logs checkpoints
+
+nohup python3 scripts/train_hierarchical.py \
+	--stage species \
+	--batch-size 512 \
+	--no-wandb \
+	>logs/species_training.log 2>&1 &
+
+echo "Training launched (PID: $!)"
+echo "Monitor with: tail -f logs/species_training.log"
--- a/logs/species_training.log
+++ b/logs/species_training.log
--- a/logs/training_log.json
+++ b/logs/training_log.json
@@ -0,0 +1,37 @@
+[
+  {
+    "train_loss": 5.172005830989476,
+    "val_loss": 4.916834101469621,
+    "val_species_acc": 0.2015638389945044,
+    "stage": "species_warmup",
+    "epoch": 0
+  },
+  {
+    "train_loss": 5.054380055033749,
+    "val_loss": 4.880044144132863,
+    "val_species_acc": 0.2087307238084681,
+    "stage": "species_warmup",
+    "epoch": 1
+  },
+  {
+    "train_loss": 5.039230073741956,
+    "val_loss": 4.862851754478786,
+    "val_species_acc": 0.21087951021313062,
+    "stage": "species_warmup",
+    "epoch": 2
+  },
+  {
+    "train_loss": 5.033712423401,
+    "val_loss": 4.860296101155488,
+    "val_species_acc": 0.20983496126641968,
+    "stage": "species_warmup",
+    "epoch": 3
+  },
+  {
+    "train_loss": 5.031291038826409,
+    "val_loss": 4.855738523732061,
+    "val_species_acc": 0.2122480825065764,
+    "stage": "species_warmup",
+    "epoch": 4
+  }
+]
--- a/package.json
+++ b/package.json
@@ -44,5 +44,14 @@
    "tailwindcss": "^4",
    "typescript": "^5",
    "vitest": "^4.1.8"
+  },
+  "allowScripts": {
+    "esbuild@0.18.20": true,
+    "esbuild@0.25.12": true,
+    "esbuild@0.28.0": true,
+    "@tensorflow/tfjs-node@4.22.0": true,
+    "sharp@0.34.5": true,
+    "core-js@3.29.1": true,
+    "unrs-resolver@1.12.2": true
  }
 }
--- a/scripts/pycache/organize-dataset.cpython-314.pyc
+++ b/scripts/pycache/organize-dataset.cpython-314.pyc
--- a/scripts/audit-dataset.py
+++ b/scripts/audit-dataset.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""Audit the dataset: parse dir names, cross-ref KB, gather stats."""
+
+import json, os, sys
+from collections import Counter
+
+BASE = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+# Load KB
+with open(os.path.join(BASE, 'src', 'data', 'plants.json')) as f:
+    plants = json.load(f)
+with open(os.path.join(BASE, 'src', 'data', 'diseases.json')) as f:
+    diseases = json.load(f)
+
+plant_ids = {p['id'] for p in plants}
+disease_ids = {d['id'] for d in diseases}
+sorted_disease_ids = sorted(disease_ids, key=len, reverse=True)
+
+# Scan dataset dirs (skip hidden files)
+dataset_dir = os.path.join(BASE, 'data', 'dataset')
+dirs = sorted([d for d in os.listdir(dataset_dir)
+               if os.path.isdir(os.path.join(dataset_dir, d)) and not d.startswith('.')])
+
+print(f"Total dataset directories: {len(dirs)}")
+
+# Strategy 1: Match against known plant IDs (longest first) to split plant/disease
+sorted_plant_ids = sorted(plant_ids, key=len, reverse=True)
+
+parsed = []
+unmatched_names = []
+for d in dirs:
+    # Try matching plant prefix (longest plant IDs first)
+    found = False
+    for pid in sorted_plant_ids:
+        prefix = pid + '-'
+        if d.startswith(prefix):
+            disease_part = d[len(prefix):]
+            parsed.append((pid, disease_part))
+            found = True
+            break
+    if not found:
+        # Try matching disease suffix from KB
+        for did in sorted_disease_ids:
+            suffix = '-' + did
+            if d.endswith(suffix):
+                plant_part = d[:-len(suffix)]
+                parsed.append((plant_part, did))
+                found = True
+                break
+    if not found:
+        unmatched_names.append(d)
+
+print(f"Parsed: {len(parsed)}")
+print(f"Unmatched: {len(unmatched_names)}")
+if unmatched_names:
+    print(f"First 20 unmatched: {unmatched_names[:20]}")
+
+# For unmatched, try heuristic: last N words (separated by -) are the disease
+# We need to figure out how many words constitute the disease
+# Let's analyze the unmatched ones
+print(f"\n=== Analyzing unmatched dir names ===")
+# Get all unique "last word" suffixes from unmatched
+suffix_counts = Counter()
+for d in unmatched_names:
+    parts = d.split('-')
+    # Try last 1, 2, 3, 4 words as disease
+    for n in range(1, min(7, len(parts))):
+        suffix = '-'.join(parts[-n:])
+        suffix_counts[suffix] += 1
+
+print(f"Most common last-1-word suffixes (potential single-word diseases):")
+for suf, cnt in suffix_counts.most_common(30):
+    if len(suf.split('-')) == 1:
+        print(f"  {suf}: {cnt}")
+
+print(f"\nMost common last-2-word suffixes:")
+for suf, cnt in suffix_counts.most_common(20):
+    if len(suf.split('-')) == 2:
+        print(f"  {suf}: {cnt}")
+
+# Build a heuristic disease dictionary from the dataset itself
+# For dirs that matched via plant prefix, extract all unique disease parts
+dataset_diseases = Counter()
+dataset_plants = Counter()
+for plant, disease in parsed:
+    dataset_diseases[disease] += 1
+    dataset_plants[plant] += 1
+
+print(f"\n=== Dataset disease labels (from plant-prefix matches) ===")
+for disease, cnt in sorted(dataset_diseases.items(), key=lambda x: -x[1]):
+    in_kb = "✓" if disease in disease_ids else ""
+    print(f"  {disease}: {cnt}x {in_kb}")
+
+# Now re-parse ALL dirs using our accumulated knowledge
+# Build a comprehensive disease ID list from both KB and dataset
+known_diseases = set(disease_ids) | set(dataset_diseases.keys())
+known_diseases.add('healthy')
+sorted_known_diseases = sorted(known_diseases, key=len, reverse=True)
+
+print(f"\n=== Comprehensive parsing ===")
+reparsed = []
+still_unmatched = []
+for d in dirs:
+    found = False
+    # Match disease suffix first
+    for did in sorted_known_diseases:
+        suffix = '-' + did
+        if d.endswith(suffix):
+            plant_part = d[:-len(suffix)]
+            # Plant part should end with a known plant or be a reasonable plant name
+            reparsed.append((plant_part, did))
+            found = True
+            break
+    if not found:
+        still_unmatched.append(d)
+
+print(f"Re-parsed: {len(reparsed)}")
+print(f"Still unmatched: {len(still_unmatched)}")
+if still_unmatched:
+    for u in still_unmatched[:20]:
+        print(f"  {u}")
+
+# Final analysis: unique plants and diseases
+unique_plants = sorted(set(p[0] for p in reparsed))
+unique_diseases = sorted(set(p[1] for p in reparsed))
+print(f"\nUnique plants: {len(unique_plants)}")
+print(f"Unique diseases: {len(unique_diseases)}")
+
+# Plant class counts
+plant_class_counts = Counter(p[0] for p in reparsed)
+print(f"\nTop 25 plants by class count:")
+for plant, cnt in plant_class_counts.most_common(25):
+    print(f"  {plant}: {cnt}")
+
+# Now count actual image files
+print(f"\n=== Image counts (full scan) ===")
+total_images = 0
+class_sizes = Counter()
+for d in dirs:
+    full_path = os.path.join(dataset_dir, d)
+    file_count = len([f for f in os.listdir(full_path)
+                      if os.path.isfile(os.path.join(full_path, f))])
+    class_sizes[d] = file_count
+    total_images += file_count
+
+size_vals = list(class_sizes.values())
+print(f"Total images: {total_images:,}")
+print(f"Classes: {len(class_sizes)}")
+print(f"Min/class: {min(size_vals)}, Max/class: {max(size_vals)}")
+print(f"Mean/class: {sum(size_vals)/len(size_vals):.0f}")
+print(f"Median/class: {sorted(size_vals)[len(size_vals)//2]}")
+
+# Images per plant (using the parsed data)
+plant_image_counts = Counter()
+for d, size in class_sizes.items():
+    # Find the plant for this dir
+    for plant, disease in reparsed:
+        # Simple matching: does the dir start with plant?
+        pass  # Let's do it properly below
+
+# Better: build a dir_name -> (plant, disease) lookup
+dir_to_class = {}
+for plant, disease in reparsed:
+    # Approximate the original dir name
+    key = f"{plant}-{disease}"
+    dir_to_class[key] = (plant, disease)
+
+plant_image_totals = Counter()
+for d, size in class_sizes.items():
+    # Find matching entry
+    # The dir name might not exactly match the key
+    if d in dir_to_class:
+        plant, disease = dir_to_class[d]
+        plant_image_totals[plant] += size
+
+print(f"\nTop 15 plants by total images:")
+for plant, cnt in plant_image_totals.most_common(15):
+    print(f"  {plant}: {cnt:,}")
--- a/scripts/organize-dataset.py
+++ b/scripts/organize-dataset.py
@@ -0,0 +1,471 @@
+#!/usr/bin/env python3
+"""
+Phase 1 — Dataset Reorganization for Hierarchical Model Training.
+
+Reorganizes flat data/dataset/plant-disease-name/ directories into:
+  data/organized/
+    train/{species}/{disease}/
+    val/{species}/{disease}/
+    species_index.json
+    class_hierarchy.json
+    dataset_stats.json
+
+Usage: python3 scripts/organize-dataset.py
+"""
+
+import json
+import os
+import random
+from collections import Counter, defaultdict
+from pathlib import Path
+
+from PIL import Image
+from joblib import Parallel, delayed
+from tqdm import tqdm
+
+# ─── Config ───────────────────────────────────────────────────────────────────
+
+BASE_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+DATASET_DIR = BASE_DIR / "data" / "dataset"
+ORGANIZED_DIR = BASE_DIR / "data" / "organized"
+TRAIN_DIR = ORGANIZED_DIR / "train"
+VAL_DIR = ORGANIZED_DIR / "val"
+
+RANDOM_SEED = 42
+TRAIN_RATIO = 0.85
+VAL_RATIO = 1.0 - TRAIN_RATIO
+
+MAX_DIM = 512
+JPEG_QUALITY = 90
+N_JOBS = 16
+
+random.seed(RANDOM_SEED)
+
+# Known disease-prefix words — words that start disease names but should NOT
+# be part of a plant name. If a plant part ends with one of these, we know
+# the split point is wrong.
+DISEASE_PREFIX_WORDS = {
+    "bacterial", "fungal", "viral", "downy", "powdery",
+    "alternaria", "phytophthora", "phoma", "phymatotrichum",
+    "pythium", "rhizoctonia", "sclerotinia", "fusarium",
+    "verticillium", "cercospora", "septoria", "anthracnose",
+    "black", "white", "gray", "brown", "green", "pink", "blue",
+    "soft", "hard", "sour", "bitter",
+    "southern", "northern", "common", "false", "true",
+    "european", "american", "aspen", "bacterial-blight",
+    "cercospora-leaf", "septoria-leaf", "alternaria-leaf",
+}
+
+# Valid multi-word plant suffixes (these CAN follow a hyphen in plant names)
+VALID_MULTI_WORD_PLANTS = {
+    "squash", "bean", "berry", "apple", "fern", "tree", "vine",
+    "cactus", "grass", "weed", "mint", "root", "seed", "leaf",
+    "flower", "fruit", "bark", "wood", "nut", "pea", "lily",
+    "rose", "moss", "palm", "fern", "orchid", "fig", "cress",
+    "plant", "sage", "thyme", "leaf-fig", "nest-fern", "tongue",
+    "tail", "ear", "eye", "nut-tree", "bean-tree",
+}
+
+IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff", ".tif"}
+
+# ─── Load KB Data ─────────────────────────────────────────────────────────────
+
+def load_kb():
+    with open(BASE_DIR / "src" / "data" / "plants.json") as f:
+        plants = json.load(f)
+    with open(BASE_DIR / "src" / "data" / "diseases.json") as f:
+        diseases = json.load(f)
+    return plants, diseases
+
+PLANTS, DISEASES = load_kb()
+KB_PLANT_IDS = {p["id"] for p in PLANTS}
+
+def get_dataset_dirs():
+    """Get all non-hidden subdirectories in the dataset folder."""
+    dirs = sorted([
+        d for d in os.listdir(DATASET_DIR)
+        if os.path.isdir(DATASET_DIR / d) and not d.startswith(".")
+    ])
+    return dirs
+
+def count_images(path):
+    """Count image files in a directory."""
+    if not path.exists():
+        return 0
+    return len([
+        f for f in os.listdir(path)
+        if os.path.isfile(path / f) and os.path.splitext(f)[1].lower() in IMAGE_EXTS
+    ])
+
+# ─── Phase 1: Parse directory names ────────────────────────────────────────────
+
+def build_plant_and_disease_dictionaries(dirs):
+    """
+    Build verified plant names and disease suffixes from the dataset.
+    Returns (parsed_dict, unmatched_list).
+    """
+    # Phase 1: Verify plant names from prefixes that appear with >=3 diseases
+    plant_candidates = defaultdict(set)
+    for d in dirs:
+        parts = d.split("-")
+        if len(parts) < 2:
+            continue
+        for split in range(1, min(len(parts), 6)):
+            plant = "-".join(parts[:split])
+            disease = "-".join(parts[split:])
+            if plant and disease and len(disease) > 2:
+                plant_candidates[plant].add(disease)
+
+    verified_plants = set(KB_PLANT_IDS)
+    for plant, diseases in plant_candidates.items():
+        if len(diseases) >= 3 and plant not in verified_plants:
+            verified_plants.add(plant)
+
+    print(f"  Verified plants: {len(verified_plants)} ({len(verified_plants & KB_PLANT_IDS)} from KB)")
+
+    # Phase 2: Match dirs by plant prefix (longest plant first)
+    sorted_plants = sorted(verified_plants, key=len, reverse=True)
+    plant_matched = {}
+    not_matched = []
+
+    for d in dirs:
+        matched = False
+        for plant in sorted_plants:
+            prefix = plant + "-"
+            if d.startswith(prefix):
+                disease = d[len(prefix):]
+                if disease:
+                    plant_matched[d] = (plant, disease)
+                    matched = True
+                    break
+        if not matched:
+            if d.endswith("-healthy"):
+                plant = d[:-len("-healthy")]
+                plant_matched[d] = (plant, "healthy")
+            else:
+                not_matched.append(d)
+
+    # Collect disease suffixes from Phase 2 matches
+    disease_suffixes = set(p[1] for p in plant_matched.values())
+    print(f"  Plant-matched dirs: {len(plant_matched)}, disease suffixes: {len(disease_suffixes)}")
+
+    # Phase 3: Match remaining dirs by disease suffix (longest suffix first)
+    sorted_disease_suffixes = sorted(disease_suffixes, key=len, reverse=True)
+    still_not_matched = []
+
+    for d in not_matched:
+        matched = False
+        for suffix in sorted_disease_suffixes:
+            if d.endswith("-" + suffix):
+                plant_part = d[:-len("-" + suffix)]
+                if plant_part and not plant_part.endswith("-"):
+                    plant_matched[d] = (plant_part, suffix)
+                    matched = True
+                    break
+        if not matched:
+            still_not_matched.append(d)
+
+    print(f"  Phase 3 matched: {len(not_matched) - len(still_not_matched)}")
+    print(f"  Phase 3 remaining: {len(still_not_matched)}")
+
+    # Phase 4: Handle trailing-hyphen dirs and healthy parent dir
+    final_unmatched = []
+    for d in still_not_matched:
+        if d.endswith("-"):
+            plant = d[:-1]
+            if plant:
+                plant_matched[d] = (plant, "unlabeled")
+        elif d == "healthy":
+            healthy_dir = DATASET_DIR / "healthy"
+            if healthy_dir.exists():
+                plant_subdirs = [
+                    s for s in os.listdir(healthy_dir)
+                    if os.path.isdir(healthy_dir / s) and not s.startswith(".")
+                ]
+                for sub_plant in plant_subdirs:
+                    # Use healthy/{sub_plant} as key so we know where to find the images
+                    plant_matched[f"healthy/{sub_plant}"] = (sub_plant, "healthy")
+                print(f"  Healthy dir: {len(plant_subdirs)} per-plant healthy classes")
+        else:
+            final_unmatched.append(d)
+
+    print(f"  Phase 4 handled {len(still_not_matched) - len(final_unmatched)} edge cases")
+    print(f"  Final unmatched: {len(final_unmatched)}")
+    if final_unmatched:
+        print(f"    E.g.: {final_unmatched[:10]}")
+
+    # Phase 5: Post-processing — fix species names that ate disease-prefix words
+    fix_count = 0
+    for d in list(plant_matched.keys()):
+        if d.startswith("healthy/"):
+            continue  # Skip healthy subdirs — these are correct
+        species, disease = plant_matched[d]
+        parts = species.split("-")
+        if len(parts) >= 2 and parts[-1] in DISEASE_PREFIX_WORDS:
+            # Move the last word from species to disease
+            new_species = "-".join(parts[:-1])
+            new_disease = parts[-1] + "-" + disease
+            plant_matched[d] = (new_species, new_disease)
+            fix_count += 1
+
+    print(f"  Post-process fixes (species ending with disease-prefix): {fix_count}")
+
+    return plant_matched, final_unmatched
+
+# ─── Image Processing ────────────────────────────────────────────────────────
+
+def process_image(args):
+    """Resize and convert a single image to 512px max JPEG q90."""
+    src_path, dst_path = args
+    try:
+        img = Image.open(src_path)
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        w, h = img.size
+        if max(w, h) > MAX_DIM:
+            ratio = MAX_DIM / max(w, h)
+            img = img.resize((int(w * ratio), int(h * ratio)), Image.LANCZOS)
+        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+        img.save(dst_path, "JPEG", quality=JPEG_QUALITY, optimize=True)
+        return (src_path, True, None)
+    except Exception as e:
+        return (src_path, False, str(e))
+
+def copy_and_split_class(src_dir, dst_train_dir, dst_val_dir, train_ratio=TRAIN_RATIO):
+    """
+    Copy images from src_dir to train/val dirs, splitting at the IMAGE level.
+    Returns (train_processed, train_failed, val_processed, val_failed).
+    """
+    # Check both possible source paths (regular dir or healthy subdir)
+    if not src_dir.exists():
+        return (0, 0, 0, 0)
+
+    src_files = sorted([
+        f for f in os.listdir(src_dir)
+        if os.path.isfile(src_dir / f) and os.path.splitext(f)[1].lower() in IMAGE_EXTS
+    ])
+    if not src_files:
+        return (0, 0, 0, 0)
+
+    # Split files at IMAGE level
+    random.shuffle(src_files)
+    split_idx = max(1, int(len(src_files) * train_ratio))
+    train_files = src_files[:split_idx]
+    val_files = src_files[split_idx:]
+
+    # Process train images
+    train_pairs = [
+        (str(src_dir / f), str(dst_train_dir / f"img_{i:04d}.jpg"))
+        for i, f in enumerate(train_files)
+    ]
+    val_pairs = [
+        (str(src_dir / f), str(dst_val_dir / f"img_{i:04d}.jpg"))
+        for i, f in enumerate(val_files)
+    ]
+
+    results = Parallel(n_jobs=N_JOBS, prefer="threads")(
+        delayed(process_image)(pair) for pair in train_pairs + val_pairs
+    )
+
+    train_ok = sum(1 for i, (_, ok, _) in enumerate(results) if ok and i < len(train_pairs))
+    train_fail = sum(1 for i, (_, ok, _) in enumerate(results) if not ok and i < len(train_pairs))
+    val_ok = sum(1 for i, (_, ok, _) in enumerate(results) if ok and i >= len(train_pairs))
+    val_fail = sum(1 for i, (_, ok, _) in enumerate(results) if not ok and i >= len(train_pairs))
+
+    return (train_ok, train_fail, val_ok, val_fail)
+
+# ─── Build Metadata ──────────────────────────────────────────────────────────
+
+def build_metadata(parsed, train_counts, val_counts, unmatched):
+    """Build species_index.json, class_hierarchy.json, dataset_stats.json."""
+    species_disease_map = defaultdict(set)
+    for species, disease in parsed.values():
+        species_disease_map[species].add(disease)
+    species_index = {k: sorted(v) for k, v in sorted(species_disease_map.items())}
+
+    class_hierarchy = {
+        "version": "1.0",
+        "description": "Hierarchical plant disease classification dataset",
+        "num_species": len(species_index),
+        "num_classes": len(parsed),
+        "species": {species: sorted(diseases) for species, diseases in species_index.items()}
+    }
+
+    # Aggregate counts
+    total_train = sum(cnt for sp, di, cnt in train_counts)
+    total_val = sum(cnt for sp, di, cnt in val_counts)
+    total_all = total_train + total_val
+
+    all_counts = [cnt for _, _, cnt in (train_counts + val_counts)]
+
+    species_disease_counts = defaultdict(lambda: defaultdict(int))
+    for sp, di, cnt in train_counts + val_counts:
+        species_disease_counts[sp][di] += cnt
+
+    # Also count classes from the parsed dict (unique species/disease combos)
+    parsed_classes = set((sp, di) for sp, di in parsed.values())
+
+    stats = {
+        "total_images": total_all,
+        "total_species": len(species_index),
+        "total_classes": len(parsed_classes),
+        "train_images": total_train,
+        "val_images": total_val,
+        "images_per_class": {
+            "min": min(all_counts) if all_counts else 0,
+            "max": max(all_counts) if all_counts else 0,
+            "mean": round(sum(all_counts) / len(all_counts)) if all_counts else 0,
+            "median": sorted(all_counts)[len(all_counts) // 2] if all_counts else 0,
+        },
+        "train_pct": round(total_train / total_all * 100, 1) if total_all else 0,
+        "val_pct": round(total_val / total_all * 100, 1) if total_all else 0,
+        "unmatched_dirs": len(unmatched),
+        "unmatched_dir_names": unmatched[:100] if unmatched else [],
+        "species_disease_counts": {
+            species: dict(diseases) for species, diseases in species_disease_counts.items()
+        }
+    }
+
+    return species_index, class_hierarchy, stats
+
+# ─── Main Pipeline ───────────────────────────────────────────────────────────
+
+def main():
+    print("=" * 60)
+    print("Phase 1 — Dataset Reorganization")
+    print("=" * 60)
+    print(f"Dataset: {DATASET_DIR}")
+    print(f"Output: {ORGANIZED_DIR}")
+    print()
+
+    # Step 1: Scan
+    print("─" * 40)
+    print("Step 1: Scanning dataset directories...")
+    print("─" * 40)
+    dirs = get_dataset_dirs()
+    print(f"  Found {len(dirs)} class directories")
+
+    # Step 2: Parse directory names into (species, disease) pairs
+    print()
+    print("─" * 40)
+    print("Step 2: Parsing directory names...")
+    print("─" * 40)
+    parsed, unmatched = build_plant_and_disease_dictionaries(dirs)
+
+    species_set = set(s for s, _ in parsed.values())
+    disease_set = set(d for _, d in parsed.values())
+    raw_classes = len(parsed)
+    unique_classes = len(set((s, d) for s, d in parsed.values()))
+    print(f"\n  Parsed: {raw_classes} entries")
+    print(f"  Unique species: {len(species_set)}")
+    print(f"  Unique disease labels: {len(disease_set)}")
+    print(f"  Unique (species, disease) pairs: {unique_classes}")
+
+    # Step 3: Process images with image-level train/val split
+    print()
+    print("─" * 40)
+    print("Step 3: Processing images (resize + train/val split)...")
+    print(f"  Max dimension: {MAX_DIM}px, JPEG q{JPEG_QUALITY}")
+    print(f"  Workers: {N_JOBS}")
+    print(f"  Split: {TRAIN_RATIO*100:.0f}/{VAL_RATIO*100:.0f} (image-level)")
+    print("─" * 40)
+
+    train_counts = []  # (species, disease, count)
+    val_counts = []
+    total_skipped = 0
+
+    # Process regular dirs
+    regular_items = [(d, sp, di) for d, (sp, di) in parsed.items()
+                     if not d.startswith("healthy/") and d in dirs]
+    healthy_items = [(d, sp, di) for d, (sp, di) in parsed.items()
+                     if d.startswith("healthy/")]
+
+    # Organize healthy items by plant
+    healthy_by_plant = {}
+    for d, sp, di in healthy_items:
+        healthy_by_plant[sp] = d  # d is like "healthy/tomato"
+
+    print(f"\n  Processing {len(regular_items)} disease + {len(healthy_items)} healthy classes...")
+
+    for d, species, disease in tqdm(regular_items, desc="  Disease classes"):
+        src_dir = DATASET_DIR / d
+        dst_train = TRAIN_DIR / species / disease
+        dst_val = VAL_DIR / species / disease
+
+        # Skip if already done (check a few files)
+        if dst_train.exists() and dst_val.exists() and \
+           len(os.listdir(dst_train)) + len(os.listdir(dst_val)) >= count_images(src_dir):
+            total_skipped += count_images(src_dir)
+            continue
+
+        tr_ok, tr_fail, va_ok, va_fail = copy_and_split_class(src_dir, dst_train, dst_val)
+        train_counts.append((species, disease, tr_ok))
+        val_counts.append((species, disease, va_ok))
+
+    # Process healthy subdirs
+    for sp, hkey in tqdm(healthy_by_plant.items(), desc="  Healthy classes"):
+        src_dir = DATASET_DIR / hkey  # e.g. data/dataset/healthy/tomato
+        dst_train = TRAIN_DIR / sp / "healthy"
+        dst_val = VAL_DIR / sp / "healthy"
+
+        if dst_train.exists() and dst_val.exists() and \
+           len(os.listdir(dst_train)) + len(os.listdir(dst_val)) >= count_images(src_dir):
+            total_skipped += count_images(src_dir)
+            continue
+
+        tr_ok, tr_fail, va_ok, va_fail = copy_and_split_class(src_dir, dst_train, dst_val)
+        train_counts.append((sp, "healthy", tr_ok))
+        val_counts.append((sp, "healthy", va_ok))
+
+    total_train = sum(c for _, _, c in train_counts)
+    total_val = sum(c for _, _, c in val_counts)
+    print(f"\n  Train images: {total_train:,}")
+    print(f"  Val images: {total_val:,}")
+    print(f"  Skipped previously processed: {total_skipped:,}")
+
+    # Step 4: Build metadata
+    print()
+    print("─" * 40)
+    print("Step 4: Building metadata files...")
+    print("─" * 40)
+    ORGANIZED_DIR.mkdir(parents=True, exist_ok=True)
+
+    species_index, class_hierarchy, stats = build_metadata(
+        parsed, train_counts, val_counts, unmatched
+    )
+
+    with open(ORGANIZED_DIR / "species_index.json", "w") as f:
+        json.dump(species_index, f, indent=2)
+    print(f"  ✓ species_index.json ({len(species_index)} species)")
+
+    with open(ORGANIZED_DIR / "class_hierarchy.json", "w") as f:
+        json.dump(class_hierarchy, f, indent=2)
+    print(f"  ✓ class_hierarchy.json")
+
+    with open(ORGANIZED_DIR / "dataset_stats.json", "w") as f:
+        json.dump(stats, f, indent=2)
+    print(f"  ✓ dataset_stats.json")
+
+    # Summary
+    print()
+    print("=" * 60)
+    print("Done!")
+    print("=" * 60)
+    print(f"  Total images: {stats['total_images']:,}")
+    print(f"  Species: {stats['total_species']}")
+    print(f"  Classes: {stats['total_classes']}")
+    print(f"  Train: {stats['train_images']:,} ({stats['train_pct']}%)")
+    print(f"  Val: {stats['val_images']:,} ({stats['val_pct']}%)")
+    print(f"  Unmatched dirs: {stats['unmatched_dirs']}")
+    print(f"  Train dir: {TRAIN_DIR}")
+    print(f"  Val dir: {VAL_DIR}")
+
+    if stats['unmatched_dirs'] > 0:
+        print(f"\n  ⚠  Manual review needed for {stats['unmatched_dirs']} dirs:")
+        for u in stats['unmatched_dir_names'][:20]:
+            print(f"     {u}")
+
+    return stats
+
+if __name__ == "__main__":
+    main()
--- a/scripts/train_hierarchical.py
+++ b/scripts/train_hierarchical.py
--- a/tasks/multi-image-user-feedback/01-api-types-and-schema.md
+++ b/tasks/multi-image-user-feedback/01-api-types-and-schema.md
@@ -0,0 +1,68 @@
+# 01. Extend Types and Add Feedback DB Schema
+
+meta:
+id: multi-image-user-feedback-01
+feature: multi-image-user-feedback
+priority: P0
+depends_on: []
+tags: [types, schema, database]
+
+objective:
+
+- Update shared TypeScript types to support multi-image requests, species-constrained inference, top-5 combo predictions, and post-evaluation feedback.
+- Add a new database table for storing feedback entries.
+
+deliverables:
+
+- `src/lib/types.ts` — updated with new interfaces
+- `src/lib/db/schema.ts` — updated with `diagnosisFeedback` table
+
+steps:
+
+1. Add these new types to `src/lib/types.ts`:
+   - `IdentifyOptions` — optional fields sent in the identify request: `secondImageId?: string`, `userSpecies?: string`, `useForTraining?: boolean`
+   - `IdentifyRequest` — extend to include `options?: IdentifyOptions`
+   - `TopPrediction` — a prediction with both species and disease info: `{ speciesName: string, diseaseName: string, diseaseId: string, confidence: ConfidenceResult, rank: number }`
+   - `IdentifyResponse` — extend to include `topSpeciesDisease?: TopPrediction[]`, `speciesConfidence?: ConfidenceResult`, `infoProvided: string[]` (which optional inputs the user gave)
+   - `AccuracyRating` — `"correct" | "incorrect" | "unsure"`
+   - `DiagnosisFeedback` — full feedback shape: `{ sessionId: string, imageIds: string[], userSpecies?: string, predictedDiseaseId: string, accuracyRating: AccuracyRating, consentToStoreImages: boolean, userCorrectedSpecies?: string, notes?: string, createdAt: string }`
+   - `FeedbackRequest` — POST body for the feedback endpoint
+   - `FeedbackResponse` — confirmation response
+
+2. Add a `diagnosisFeedback` table to `src/lib/db/schema.ts`:
+   - `id` — text primary key (UUID v4)
+   - `sessionId` — text, session identifier for grouping
+   - `imageIds` — JSON text array of stored image IDs
+   - `userSpecies` — text, nullable
+   - `predictedDiseaseId` — text, the top model prediction
+   - `accuracyRating` — text enum: `"correct" | "incorrect" | "unsure"`
+   - `consentToStoreImages` — integer (boolean)
+   - `userCorrectedSpecies` — text, nullable (only when accuracy=incorrect or unsure)
+   - `notes` — text, nullable
+   - `modelVersion` — text, the model version used
+   - `createdAt` — text, auto timestamp
+   - Add indexes on `sessionId`, `accuracyRating`, `createdAt`
+
+3. Export `DiagnosisFeedbackRow` and `DiagnosisFeedbackInsert` type helpers.
+
+tests:
+
+- Unit: verify new types compile correctly
+- Unit: verify schema migration produces correct table DDL
+- Unit: verify INSERT and SELECT on feedback table through Drizzle
+
+acceptance_criteria:
+
+- All new types are exported from `src/lib/types.ts`
+- `diagnosisFeedback` table exists in schema with all required columns
+- `DiagnosisFeedbackRow` and `DiagnosisFeedbackInsert` are exported
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Drizzle Kit (`npx drizzle-kit generate`) produces valid migration SQL
+
+notes:
+
+- The `sessionId` ties together the upload, identify, and feedback flow
+- Image storage consent is a boolean to comply with data privacy requirements
--- a/tasks/multi-image-user-feedback/02-multi-image-inference-pipeline.md
+++ b/tasks/multi-image-user-feedback/02-multi-image-inference-pipeline.md
@@ -0,0 +1,65 @@
+# 02. Multi-Image Ensemble & Species-Constrained Inference
+
+meta:
+id: multi-image-user-feedback-02
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01]
+tags: [inference, ml]
+
+objective:
+
+- Extend the inference pipeline to support multi-image ensemble inference (averaging features or logits from 2+ images).
+- Add species-constrained softmax that renormalizes probabilities over only the disease classes belonging to a known species.
+
+deliverables:
+
+- `src/lib/ml/inference.ts` — updated with ensemble and constrained inference functions
+- `src/lib/ml/confidence.ts` — updated with species-aware confidence calibration
+
+steps:
+
+1. In `src/lib/ml/inference.ts`, add:
+   - `runEnsembleInference(tensors: Float32Array[], topK?: number): Promise<InferenceResult>` — runs multiple images through the model, averages their logits, and returns top-K predictions. Averaging logits (before softmax) is preferred over averaging probabilities since it preserves confidence structure.
+   - `speciesConstrainedSoftmax(logits: Float32Array, speciesClasses: number[]): Float32Array` — given the full 11,818-class logits and a list of class indices belonging to the user-specified species, compute softmax over only those indices and return a renormalized probability vector (zero everywhere else). The model output dimension (11,818) should be a configurable constant.
+   - `runSpeciesConstrainedInference(tensor: Float32Array, speciesClassIndices: number[], topK?: number): Promise<InferenceResult>` — run inference then apply species-constrained softmax before extracting top-K.
+   - `runEnsembleSpeciesConstrained(tensors: Float32Array[], speciesClassIndices: number[], topK?: number): Promise<InferenceResult>` — ensemble then constrain.
+
+2. Export `CLASSIFIER_NUM_CLASSES` constant (11,818) and `SPECIES_CLASS_RANGES` (a map from species name → [startIndex, endIndex] in the model output) from a new constants file or from labels.ts.
+
+3. In `src/lib/ml/confidence.ts`, add:
+   - `calibrateSpeciesConfidence(rawProb: number, numDiseaseClasses: number): ConfidenceResult` — adjusts calibration factor based on how many disease classes the species has (fewer classes = higher effective confidence).
+   - `getEnsembleConfidence(predictions: RawPrediction[][]): ConfidenceResult` — aggregate confidence from multiple images.
+
+4. Create `src/lib/ml/species-class-ranges.ts` containing the mapping from species name → [class start index, class end index] in the 11,818-class model output. This is derived from the training dataset's `species_index.json` or `class_hierarchy.json`.
+
+5. Handle edge cases:
+   - If tensors array is empty → throw
+   - If tensor length doesn't match expected model input → throw validation error
+   - If species name not found in `SPECIES_CLASS_RANGES` → fall back to full softmax
+
+tests:
+
+- Unit: test logit averaging with 2 identical tensors → results should be identical to single inference
+- Unit: test logit averaging with 2 different tensors → verify averaged output
+- Unit: test species-constrained softmax — verify probabilities are zero outside the constrained indices
+- Unit: test constrained softmax sums to ~1.0 within the species class range
+- Unit: test ensemble + constrained combined pipeline
+
+acceptance_criteria:
+
+- `runEnsembleInference` accepts multiple tensors and returns averaged top-K predictions
+- `speciesConstrainedSoftmax` zeros out all classes outside the species range
+- `runSpeciesConstrainedInference` and `runEnsembleSpeciesConstrained` produce constrained results
+- Confidence calibration accounts for number of disease classes in the species
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Unit tests pass with `npx vitest run src/lib/ml/ --reporter=verbose`
+
+notes:
+
+- The current mock model outputs 38 classes. These new functions target the 11,818-class model.
+- Until the real model loads, ensemble/constrained functions should still work with mock data (just with fewer classes).
+- The species-ranges file should be auto-generated from `data/organized/class_hierarchy.json` and checked into version control.
--- a/tasks/multi-image-user-feedback/03-hierarchical-model-loader.md
+++ b/tasks/multi-image-user-feedback/03-hierarchical-model-loader.md
@@ -0,0 +1,73 @@
+# 03. Load Trained Swin-Tiny Model with Species/Disease Routing
+
+meta:
+id: multi-image-user-feedback-03
+feature: multi-image-user-feedback
+priority: P2
+depends_on: []
+tags: [ml, model-loader, inference]
+
+objective:
+
+- Create a new model loader backend that loads the trained Swin-Tiny checkpoint (`species_final_final.pt`) and routes through the species head and disease heads to produce 11,818-class logits.
+- This task requires the PyTorch model to finish training on the Strix Halo machine and must be exported to the correct format before implementation.
+
+deliverables:
+
+- `src/lib/ml/hierarchical-model.ts` — new PlantDiseaseModel implementation for the Swin-Tiny model
+- `scripts/export-model.js` — script to export the PyTorch checkpoint to TF.js format
+- `public/models/plant-disease-classifier-v2/` — exported model directory (TF.js or ONNX)
+
+steps:
+
+1. Create `scripts/export-model.js`:
+   - Load the PyTorch checkpoint from `checkpoints/species_final/species_final_final.pt`
+   - Export to ONNX format with NCHW input shape [1, 3, 224, 224]
+   - Also export `species_index.json` and `class_hierarchy.json` alongside the model
+   - Output to `public/models/plant-disease-classifier-v2/`
+
+2. Create `src/lib/ml/hierarchical-model.ts`:
+   - Implement the `PlantDiseaseModel` interface
+   - Load the ONNX model via `onnxruntime-node`
+   - Load species/disease index files
+   - Implement `predict()`:
+     - Preprocess to 224×224 (Swin-Tiny input size, not 160)
+     - Run forward pass → get [1, 768] features → species logits → disease routing
+     - The model checkpoint is a single forward pass that already produces 11,818 logits from the combined species + disease heads
+     - Return the full 11,818-dimension logits array
+   - Implement `getStatus()` returning model metadata with `numClasses: 11818`
+
+3. Update `src/lib/ml/model-loader.ts`:
+   - Add detection for v2 model directory (`model-v2.json` or similar)
+   - Try loading v2 model first (if available), fall back to v1 then mock
+   - Export `MODEL_NUM_CLASSES` constant for use by other modules
+   - Export `getModelVersion()` to distinguish v1 (38-class) from v2 (11,818-class)
+
+4. Handle edge cases:
+   - No model checkpoint available → fall back through v1 → mock
+   - CUDA/ROCm not available for ONNX → use CPU backend
+   - Model version mismatch → clear error message
+
+tests:
+
+- Integration: export model from checkpoint and verify output shape is [1, 11818]
+- Integration: load exported model and run inference on a test image
+- Unit: model loader graceful fallback chain (v2 → v1 → mock)
+
+acceptance_criteria:
+
+- Exported model produces 11,818 logits from a 224×224 image
+- Model loader loads v2 model when available, falls back gracefully when not
+- All existing v1 model consumers continue to work unmodified (via version detection)
+
+validation:
+
+- `node scripts/export-model.js` produces model files
+- `npx tsc --noEmit` passes
+- POST to `/api/identify` returns predictions (may be limited if species→disease label mapping not yet complete)
+
+notes:
+
+- This task is **blocked on model training completion**. The task file is the implementation spec; actual work begins after `species_final_final.pt` exists.
+- The ONNX export path is preferred for server-side inference (no Python runtime needed once exported).
+- If ONNX export quality degrades the output, export to TF.js SavedModel format instead.
--- a/tasks/multi-image-user-feedback/04-enhanced-identify-api-route.md
+++ b/tasks/multi-image-user-feedback/04-enhanced-identify-api-route.md
@@ -0,0 +1,80 @@
+# 04. Enhanced API Route for Multi-Image, Species-Aware Identification
+
+meta:
+id: multi-image-user-feedback-04
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01, multi-image-user-feedback-02, multi-image-user-feedback-03]
+tags: [api, backend]
+
+objective:
+
+- Update the `/api/identify` route to accept optional `options` (secondImageId, userSpecies), run ensemble inference when multiple images provided, apply species-constrained softmax when species is known, and return top-5 species+disease combo predictions with confidence metadata.
+
+deliverables:
+
+- `src/app/api/identify/route.ts` — updated route handler
+
+steps:
+
+1. Update the request parsing to accept `options?: IdentifyOptions` alongside `imageId`:
+
+   ```typescript
+   const { imageId, options } = body;
+   const secondImageId = options?.secondImageId;
+   const userSpecies = options?.userSpecies;
+   ```
+
+2. Update image loading to support optional second image:
+   - Load first image tensor (existing logic)
+   - If `secondImageId` provided, load and preprocess that image too
+   - Validate both images exist before inference
+
+3. Update inference logic to use the new pipeline:
+   - If 2 images provided → call `runEnsembleInference(tensors, topK=10)`
+   - If `userSpecies` provided → get species class range, call `runSpeciesConstrainedInference` or `runEnsembleSpeciesConstrained`
+   - If 2 images + species → use full ensemble+constrained
+   - If 1 image + no species → use existing single-inference path (backward compatible)
+   - Pass the `infoProvided` list to the response (which optional inputs were used)
+
+4. Generate top-5 species+disease combo predictions:
+   - After enrichment, construct `TopPrediction[]` from the top enriched predictions
+   - Each entry: `{ speciesName, diseaseName, diseaseId, confidence, rank }`
+   - Include species confidence when `userSpecies` was provided
+
+5. Add `speciesConfidence` to response:
+   - When user provides species, compute how much the constraint improves confidence vs unconstrained
+   - Return both constrained and unconstrained confidence for comparison
+
+6. Handle demo/mock mode:
+   - When no real model loaded, return mock top-5 combos with appropriate demo_mode flag
+   - Mock combos should be realistic (use knowledge base to generate plausible species/disease pairs)
+
+tests:
+
+- Integration: single image + no options → existing behavior unchanged
+- Integration: single image + species → constrained results, all predictions belong to that species
+- Integration: 2 images → ensemble results, confidence should differ from single image
+- Integration: 2 images + species → fully constrained ensemble
+- Integration: missing secondImageId returns 400 error
+- Integration: demo mode returns mock data
+
+acceptance_criteria:
+
+- Existing single-image identify flow works unchanged when `options` omitted
+- When secondImageId is provided, inference runs on both images
+- When userSpecies is provided, only diseases of that species are returned
+- Top-5 species/disease combos are included in response
+- Confidence reflects whether 1 or 2 images were used
+- 400 error for invalid/missing second image
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Existing identify API tests pass: `npx vitest run src/app/api/identify/`
+- Manual test with curl sending multi-image request
+
+notes:
+
+- The `infoProvided` array helps the UI show what data was used for the diagnosis
+- When userSpecies is given but no model can restrict to that species (e.g., mock mode), fall back to filtering results client-side by the plant name
--- a/tasks/multi-image-user-feedback/05-upload-page-second-image-species.md
+++ b/tasks/multi-image-user-feedback/05-upload-page-second-image-species.md
@@ -0,0 +1,99 @@
+# 05. Upload Page with Optional Second Image and Species Selector
+
+meta:
+id: multi-image-user-feedback-05
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01]
+tags: [ui, frontend, upload]
+
+objective:
+
+- Enhance the upload page to support an optional second image upload and a species search/select field.
+- Show live confidence indicators that update as the user provides more information.
+
+deliverables:
+
+- `src/app/upload/page.tsx` — updated upload flow
+- `src/components/ImageUpload.tsx` — updated to support multiple uploads
+- `src/components/SpeciesSelector.tsx` — new species search/dropdown component
+- `src/components/ConfidencePreview.tsx` — new inline confidence indicator
+
+steps:
+
+1. Create `src/components/SpeciesSelector.tsx`:
+   - Searchable dropdown with all 531 plant species from the knowledge base
+   - Uses `fuse.js` for fuzzy matching (lightweight, fast client-side search)
+   - Props: `value: string | null`, `onChange: (species: string | null) => void`, `disabled?: boolean`
+   - States: empty, searching, selected, clearable
+   - Shows common name and scientific name in results
+   - Keyboard-navigable: up/down arrows, enter to select, escape to close
+   - Mobile-friendly with full-screen overlay on small screens
+
+2. Create `src/components/ConfidencePreview.tsx`:
+   - Small inline bar showing estimated confidence based on info provided
+   - Props: `numImages: number`, `speciesProvided: boolean`, `className?: string`
+   - Dynamics:
+     - 1 image, no species → "Low confidence — add another photo or identify the plant"
+     - 1 image + species → "Medium confidence"
+     - 2 images, no species → "Medium confidence — getting clearer"
+     - 2 images + species → "High confidence — good data for diagnosis"
+   - Animated transitions between states
+   - Uses the same color scheme as ConfidenceBadge (green/amber/red)
+
+3. Update `src/components/ImageUpload.tsx`:
+   - Change from single-image to multi-image upload flow
+   - After first successful upload, show a "Add another photo (optional)" button
+   - Second upload uses the same ImageUpload internals but is secondary in visual weight
+   - Store both `imageId` responses for the identify request
+   - Add `uploadedImages: UploadResponse[]` tracking
+   - Expose method to clear all images
+
+4. Update `src/app/upload/page.tsx`:
+   - Add state for: firstImageId, secondImageId, selectedSpecies
+   - Add `SpeciesSelector` component below the upload zone(s)
+   - Add `ConfidencePreview` component showing live confidence estimate
+   - Add a "Continue to Diagnosis" button that becomes more prominent as more info is provided
+   - On submit:
+     - Build `IdentifyOptions` with secondImageId and/or userSpecies if provided
+     - Pass `options` in the identify API call (or query params to results page)
+     - Navigate to `/results/{firstImageId}?options={encodedOptions}`
+
+5. Handle edge cases:
+   - User adds second image, then removes it → gracefully falls back to single-image
+   - User selects species, then wants to change it → searchable select supports re-selection
+   - No species match found → user can type free-form (stored as-is)
+   - Upload of second image fails → component shows inline error but allows retry without blocking the first image
+
+6. Optimistic UI guidance:
+   - Add a small info panel below the confidence preview explaining _why_ more info helps
+   - Text: "Adding a second photo from a different angle helps our AI make a more accurate diagnosis. Identifying the plant species narrows down the possible diseases."
+
+tests:
+
+- Unit: SpeciesSelector renders, searches, selects, clears
+- Unit: SpeciesSelector keyboard navigation works
+- Unit: ConfidencePreview renders correct messages for each combination
+- Unit: ImageUpload supports 2-image flow
+- Integration: Full upload flow with 2 images + species → verify all data in request
+- A11y: verify aria-labels, roles, keyboard navigation
+
+acceptance_criteria:
+
+- User can upload a second image (optional, after first succeeds)
+- User can search and select a plant species from a dropdown
+- Confidence preview bar updates dynamically as info is added
+- "Continue to Diagnosis" button is prominent once at least 1 image is uploaded
+- Navigate to results page with all options encoded
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Manual test: upload flow with 0/1/2 images and with/without species
+- Responsive test: works on mobile viewport (375px width)
+
+notes:
+
+- The species list comes from the knowledge base (`/api/plants` endpoint)
+- `fuse.js` is already lightweight (~15KB gzipped) and can be client-imported
+- The options are passed as URL query params to the results page since we navigate before the identify API call
--- a/tasks/multi-image-user-feedback/06-dynamic-results-dashboard.md
+++ b/tasks/multi-image-user-feedback/06-dynamic-results-dashboard.md
@@ -0,0 +1,94 @@
+# 06. Results Dashboard with Dynamic Confidence and Top-5 Display
+
+meta:
+id: multi-image-user-feedback-06
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01, multi-image-user-feedback-04, multi-image-user-feedback-05]
+tags: [ui, frontend, results]
+
+objective:
+
+- Enhance the results dashboard to display an info panel showing what data the user provided (1/2 images, species) and how it affected confidence.
+- Show top-5 species/disease combination predictions as a compact card stack.
+- Animate confidence transitions when the user lands on results.
+
+deliverables:
+
+- `src/components/ResultsDashboard.tsx` — updated dashboard
+- `src/components/InfoProvidedBanner.tsx` — new component showing what info was used
+- `src/components/TopCombinationsCard.tsx` — new component for top-5 species/disease combo list
+
+steps:
+
+1. Create `src/components/InfoProvidedBanner.tsx`:
+   - Display a banner/panel at the top of results showing:
+     - Number of images analyzed (1 or 2)
+     - Whether user identified the plant species (yes/no, with species name if yes)
+     - Icons/checkmarks for each piece of info
+   - Show a compact breakdown: "You provided: 📸 2 images · 🌿 Species: Tomato"
+   - Props: `{ numImages: number, userSpecies?: string | null }`
+   - Style: subtle background, small text, positioned between page header and results
+   - Animate in with a fade-slide effect
+
+2. Create `src/components/TopCombinationsCard.tsx`:
+   - Display the top-5 species/disease combination predictions from the API response
+   - Each row: rank badge, disease name, plant name, confidence bar
+   - Clicking a row expands it to show full disease info (reuses DiseaseCard internals)
+   - Props: `{ predictions: TopPrediction[], onSelect: (diseaseId: string) => void }`
+   - States:
+     - Loading: skeleton rows
+     - Empty: no combinations available
+     - Error: graceful message
+     - Populated: ranked list with horizontal confidence bars
+   - Confidence bar: colored (green/amber/red) horizontal bar with percentage label
+   - The top-5 is filterable: user can toggle between "All diseases" and "Constrained to your species" (when species was provided)
+
+3. Update `src/components/ResultsDashboard.tsx`:
+   - Accept new props: `numImages: number`, `infoProvided: string[]`, `userSpecies?: string`, `topCombinations?: TopPrediction[]`
+   - Add `InfoProvidedBanner` at the top of the results area
+   - Add `TopCombinationsCard` in the right sidebar (below image preview on desktop)
+   - When `infoProvided` includes species, show a tag/badge: "Species identified: Tomato" with a lock icon (implying the results are constrained to that species)
+   - When the response contains a species confidence, show a "How confidence changes with more info" mini-accordion:
+     - "With 1 image: 65% confidence"
+     - "With 2 images: 72% confidence"
+     - "With 2 images + species: 88% confidence"
+     - This educates the user on the value of providing more info
+
+4. Animate confidence transitions:
+   - When results load, confidence badges count up from 0 to their final percentage
+   - Use CSS `@keyframes` for the count-up animation
+   - Duration: ~600ms with ease-out curve
+   - Only animate on initial load, not on re-renders
+
+5. Handle edge cases:
+   - No top combinations (no species match) → show message: "No common patterns found"
+   - Single image, no species → hide the "how confidence changes" section (nothing to compare)
+   - Single image with species → show comparison vs without species (estimate)
+   - Demo mode → show realistic mock combos
+
+tests:
+
+- Unit: InfoProvidedBanner renders correct icons for 1/2 images and species presence
+- Unit: TopCombinationsCard renders ranked list and toggles between constrained/all
+- Unit: confidence count-up animation triggers on mount
+- Integration: full results page with all new sections renders correctly
+
+acceptance_criteria:
+
+- InfoProvidedBanner shows how many images and whether species was identified
+- TopCombinationsCard shows top-5 predictions with confidence bars
+- Confidence values count up on page load
+- When species info is available, a "how confidence changes" section is visible
+- All existing results functionality (DiseaseCard, SymptomChecker, etc.) still works
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Manual test: navigate to results with 2 images + species → verify all UI sections
+- Manual test: navigate with 1 image no species → verify simplified UI
+
+notes:
+
+- The top-5 combos come from the identify API response's `topSpeciesDisease` field
+- Confidence comparison values are estimated when the model hasn't been run with/without the constraint — the API provides both constrained and unconstrained confidence
--- a/tasks/multi-image-user-feedback/07-post-evaluation-feedback-component.md
+++ b/tasks/multi-image-user-feedback/07-post-evaluation-feedback-component.md
@@ -0,0 +1,111 @@
+# 07. Post-Diagnosis Feedback Component (Accuracy / Unsure / Store Consent)
+
+meta:
+id: multi-image-user-feedback-07
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01, multi-image-user-feedback-06]
+tags: [ui, frontend, feedback]
+
+objective:
+
+- Create a feedback panel that appears after the diagnosis results, asking the user to rate accuracy (✓ / ✗ / ?) and optionally consent to storing their images for model retraining.
+
+deliverables:
+
+- `src/components/PostDiagnosisFeedback.tsx` — new feedback component
+- `src/components/ResultsDashboard.tsx` — updated to include feedback panel
+
+steps:
+
+1. Create `src/components/PostDiagnosisFeedback.tsx`:
+
+   Component structure (vertically stacked in a card):
+
+   ```
+   ┌─────────────────────────────────────────────┐
+   │  💬 How accurate was this diagnosis?         │
+   │                                              │
+   │  [ ✅ Correct ] [ ❌ Incorrect ] [ ❓ Unsure ] │
+   │                                              │
+   │  ── (if Incorrect or Unsure selected) ──     │
+   │  What did you expect? (optional)              │
+   │  [_____________________________] text input  │
+   │                                              │
+   │  ──────────────────────────────────────────── │
+   │                                              │
+   │  ☐ Allow us to store these images to         │
+   │    improve future diagnoses?                  │
+   │  (Your privacy matters — images stored         │
+   │   securely and never shared)                  │
+   │                                              │
+   │  [ Submit Feedback ] → sent to /api/feedback  │
+   │                                              │
+   │  ───── (after submission) ─────              │
+   │  ✓ Thank you! Your feedback helps us improve. │
+   └─────────────────────────────────────────────┘
+   ```
+
+   Props: `{ sessionId: string, imageIds: string[], predictedDiseaseId: string, userSpecies?: string, modelVersion: string, onSubmit?: () => void }`
+
+   States:
+   - **Pending**: not yet rated, three large buttons (✓/✗/?)
+   - **Rated**: accuracy selected, showing optional text input + consent checkbox
+   - **Submitting**: loading spinner on submit button
+   - **Submitted**: success message with thank-you text
+   - **Error**: submission failed, retry button
+
+   Implementation details:
+   - Accuracy buttons are large and touch-friendly (min 48px tap target)
+   - Selected button fills with its color: green (✓), red (✗), amber (?)
+   - Text input is an optional free-text field for user comments
+   - Consent checkbox has a brief privacy notice below it
+   - Submit button disabled until accuracy is rated
+   - On submit, POST to `/api/feedback` with `DiagnosisFeedback` body
+   - Animated transitions between states
+
+2. Update `src/components/ResultsDashboard.tsx`:
+   - Import and render `PostDiagnosisFeedback` at the bottom of the results area
+   - Pass sessionId (generated from first imageId), imageIds, predictedDiseaseId, userSpecies
+   - Show feedback panel after all prediction cards
+   - If no predictions at all, still show feedback (they may want to tell us the model was wrong)
+
+3. Handle edge cases:
+   - Feedback submission fails → show inline error with retry
+   - User refreshes page → already-submitted state persists if submission completed (could use sessionStorage)
+   - Consent unchecked → still submit feedback (just with consent=false)
+   - No predictions returned → show feedback anyway with "No disease identified" context
+
+tests:
+
+- Unit: all four states render correctly (pending/rated/submitting/submitted)
+- Unit: accuracy selection toggles correctly (only one selected at a time)
+- Unit: submit button disabled until accuracy is rated
+- Unit: consent checkbox unchecked by default
+- Unit: text input only shown when accuracy is "incorrect" or "unsure"
+- Unit: submission calls /api/feedback with correct payload shape
+- Integration: feedback flow from rating to submission to success
+
+acceptance_criteria:
+
+- Three accuracy rating buttons are always visible after results
+- Rating is required before submission
+- Optional text input appears for "Incorrect" or "Unsure" ratings
+- Consent checkbox allows opting in to image storage
+- Submit sends correct payload to /api/feedback
+- Success message shown after submission
+- Error state with retry if submission fails
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Manual test: rate accuracy, type notes, toggle consent, submit
+- Manual test: verify API receives correct data
+- A11y: verify all interactive elements have accessible labels
+
+notes:
+
+- The `sessionId` ties together upload → identify → feedback for the same session
+- Privacy notice text should be reviewed for legal compliance
+- Consider adding a "Share with the community" option in a future iteration
+- Debounce the submit button to prevent double-submission
--- a/tasks/multi-image-user-feedback/08-feedback-api-endpoint.md
+++ b/tasks/multi-image-user-feedback/08-feedback-api-endpoint.md
@@ -0,0 +1,107 @@
+# 08. Feedback API Endpoint for Accuracy Ratings and Storage Consent
+
+meta:
+id: multi-image-user-feedback-08
+feature: multi-image-user-feedback
+priority: P1
+depends_on: [multi-image-user-feedback-01]
+tags: [api, backend, database]
+
+objective:
+
+- Create a POST endpoint at `/api/feedback` that accepts diagnosis feedback submissions (accuracy rating, notes, image storage consent) and persists them to the database.
+
+deliverables:
+
+- `src/app/api/feedback/route.ts` — new API route
+- `src/app/api/feedback/feedback.test.ts` — test file
+
+steps:
+
+1. Create `src/app/api/feedback/route.ts`:
+
+   Route: `POST /api/feedback`
+
+   Accepts JSON body matching `FeedbackRequest`:
+
+   ```typescript
+   {
+     sessionId: string;
+     imageIds: string[];
+     userSpecies?: string;
+     predictedDiseaseId: string;
+     accuracyRating: "correct" | "incorrect" | "unsure";
+     consentToStoreImages: boolean;
+     userCorrectedSpecies?: string;
+     notes?: string;
+   }
+   ```
+
+   Handler logic:
+   - Parse and validate request body
+   - Generate UUID for `id`
+   - Get `modelVersion` from model loader's `getStatus()`
+   - Set `createdAt` to current timestamp
+   - Insert into `diagnosisFeedback` table via Drizzle
+   - Return `FeedbackResponse`: `{ success: true, id: string }`
+   - Handle validation errors with 400 status
+   - Handle DB errors with 500 status
+
+   Validation rules:
+   - `sessionId` — required, non-empty string
+   - `imageIds` — required, array of non-empty strings, min length 1
+   - `accuracyRating` — required, must be one of "correct", "incorrect", "unsure"
+   - `consentToStoreImages` — required, boolean
+   - `userSpecies` — optional string
+   - `userCorrectedSpecies` — optional string, only meaningful when accuracy is not "correct"
+   - `notes` — optional string, max 500 characters (with error message if exceeded)
+
+2. Create `src/lib/api/feedback.ts` — client-side helper:
+   - `submitFeedback(data: FeedbackRequest): Promise<FeedbackResponse>`
+   - POST to `/api/feedback` with JSON body
+   - 15-second timeout
+   - Handle network errors gracefully
+
+3. Handle edge cases:
+   - Invalid JSON body → 400 with descriptive error
+   - Missing required fields → 400 listing missing fields
+   - Invalid accuracyRating value → 400 with allowed values
+   - Database unreachable → 500 with error message
+   - Duplicate sessionId → allowed (user can submit multiple times for different predictions)
+
+4. CORS and caching:
+   - Add `Cache-Control: no-store` header
+   - No authentication required (public endpoint for feedback)
+
+tests:
+
+- Unit: valid feedback submission returns 200 with success
+- Unit: missing required fields return 400
+- Unit: invalid accuracyRating returns 400
+- Unit: notes over 500 chars returns 400
+- Unit: empty imageIds array returns 400
+- Unit: client helper `submitFeedback()` makes correct fetch call
+- Unit: client helper handles network error gracefully
+- Integration: submit feedback and verify it exists in database
+
+acceptance_criteria:
+
+- POST /api/feedback accepts valid feedback and stores it
+- Invalid requests return appropriate 400 errors with descriptive messages
+- Database stores all fields correctly
+- Client helper function is usable from any feedback component
+- Endpoint returns `{ success: true, id }` on success
+
+validation:
+
+- `npx tsc --noEmit` passes
+- Unit tests pass: `npx vitest run src/app/api/feedback/`
+- Manual test: `curl -X POST http://localhost:3000/api/feedback -H 'Content-Type: application/json' -d '{"sessionId":"test","imageIds":["img1"],"predictedDiseaseId":"early-blight","accuracyRating":"correct","consentToStoreImages":false}'`
+- Verify stored data with direct DB query
+
+notes:
+
+- No auth needed for MVP — feedback is public and anonymous
+- imageIds reference images in the uploads directory; no automatic cleanup
+- A future task could add a review/admin dashboard for browsing feedback entries
+- Rate limiting could be added later if needed (by sessionId or IP)
--- a/tasks/multi-image-user-feedback/README.md
+++ b/tasks/multi-image-user-feedback/README.md
@@ -0,0 +1,40 @@
+# Multi-Image Upload & User Feedback
+
+Objective: Allow users to upload a second optional image, manually identify the plant species, see dynamic confidence updates, top-5 predictions, and provide post-diagnosis feedback (accuracy rating + storage consent).
+
+Status legend: [ ] todo, [~] in-progress, [x] done
+
+## Tasks
+
+- [ ] 01 — Extend types and add feedback DB schema → `01-api-types-and-schema.md`
+- [ ] 02 — Multi-image ensemble & species-constrained inference → `02-multi-image-inference-pipeline.md`
+- [ ] 03 — Load trained Swin-Tiny model with species/disease routing → `03-hierarchical-model-loader.md`
+- [ ] 04 — Enhanced API route for multi-image, species-aware identification → `04-enhanced-identify-api-route.md`
+- [ ] 05 — Upload page with optional second image and species selector → `05-upload-page-second-image-species.md`
+- [ ] 06 — Results dashboard with dynamic confidence and top-5 display → `06-dynamic-results-dashboard.md`
+- [ ] 07 — Post-diagnosis feedback component (accuracy / unsure / store consent) → `07-post-evaluation-feedback-component.md`
+- [ ] 08 — Feedback API endpoint for accuracy ratings and storage consent → `08-feedback-api-endpoint.md`
+
+## Dependencies
+
+```
+01 ← 02  (types needed by inference)
+01 ← 04  (types needed by API route)
+01 ← 05  (types needed by upload page)
+01 ← 06  (types needed by results dashboard)
+01 ← 07  (types needed by feedback component)
+01 ← 08  (schema needed by feedback API)
+02 ← 04  (inference pipeline needed by API route)
+03 ← 04  (model loader needed by API route)
+05 ← 06  (upload outcomes fed into results)
+06 ← 07  (results shown before post-eval)
+04 ← 06  (API response needed by results dashboard)
+```
+
+## Exit Criteria
+
+- User can optionally upload a second image and/or enter a species name
+- Confidence scores dynamically reflect the amount of information provided
+- Top-5 species/disease combo predictions are displayed
+- After diagnosis, user can rate accuracy (✓/✗/?) and opt in to image storage for training
+- Feedback is persisted to the database