beepboop

2026-06-06 15:09:46 -04:00
parent 78220d3568
commit 06295c83ca
56 changed files with 12018 additions and 440 deletions
--- a/apps/web/tasks/production-ml-pipeline/07-end-to-end-testing.md
+++ b/apps/web/tasks/production-ml-pipeline/07-end-to-end-testing.md
@@ -0,0 +1,292 @@
+# 07. End-to-End Integration Testing
+
+meta:
+id: production-ml-pipeline-07
+feature: production-ml-pipeline
+priority: P1
+depends_on: [production-ml-pipeline-05]
+tags: [testing, integration, e2e]
+
+objective:
+
+- Create comprehensive end-to-end tests that validate the full pipeline from image upload to disease diagnosis
+- Verify real model inference produces valid, calibrated predictions
+- Test all code paths: normal flow, healthy result, error cases, plant context
+- Ensure all components work together correctly in a realistic scenario
+
+deliverables:
+
+- `tests/e2e/pipeline.test.ts` — full pipeline E2E tests
+- `tests/e2e/fixtures/` — test images and expected results
+- `tests/e2e/utils.ts` — test utilities (upload helper, identify helper)
+- Updated `vitest.config.ts` — E2E test configuration
+
+steps:
+
+1. **Create test fixtures** `tests/e2e/fixtures/`:
+   - `tomato-early-blight.jpg` — known tomato early blight image (from PlantVillage test set)
+   - `tomato-healthy.jpg` — known healthy tomato image
+   - `unknown-plant.jpg` — unrelated image (should produce low confidence)
+   - `invalid-image.txt` — non-image file (should fail validation)
+   - `expected-results.json` — expected disease IDs and confidence ranges for each test image
+
+2. **Create E2E test utilities** `tests/e2e/utils.ts`:
+
+   ```typescript
+   import fs from "fs/promises";
+   import path from "path";
+
+   export async function uploadTestImage(
+     filename: string,
+   ): Promise<{ imageId: string; previewUrl: string }> {
+     const imagePath = path.join(__dirname, "fixtures", filename);
+     const imageBuffer = await fs.readFile(imagePath);
+
+     const formData = new FormData();
+     formData.append("image", new Blob([imageBuffer], { type: "image/jpeg" }), filename);
+
+     const response = await fetch("http://localhost:3000/api/upload", {
+       method: "POST",
+       body: formData,
+     });
+
+     if (!response.ok) {
+       throw new Error(`Upload failed: ${response.status}`);
+     }
+
+     return response.json();
+   }
+
+   export async function identifyImage(imageId: string, plantId?: string): Promise<any> {
+     const response = await fetch("http://localhost:3000/api/identify", {
+       method: "POST",
+       headers: { "Content-Type": "application/json" },
+       body: JSON.stringify({ imageId, plantId }),
+     });
+
+     if (!response.ok) {
+       throw new Error(`Identify failed: ${response.status}`);
+     }
+
+     return response.json();
+   }
+   ```
+
+3. **Write full pipeline E2E test** `tests/e2e/pipeline.test.ts`:
+
+   ```typescript
+   import { describe, it, expect, beforeAll } from "vitest";
+   import { uploadTestImage, identifyImage } from "./utils";
+   import expectedResults from "./fixtures/expected-results.json";
+
+   describe("End-to-End Pipeline", () => {
+     describe("Normal flow: disease detection", () => {
+       it("uploads a tomato early blight image and returns correct diagnosis", async () => {
+         // 1. Upload
+         const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
+         expect(imageId).toBeDefined();
+
+         // 2. Identify
+         const result = await identifyImage(imageId);
+
+         // 3. Verify response structure
+         expect(result.predictions).toBeDefined();
+         expect(result.predictions.length).toBeGreaterThan(0);
+         expect(result.metadata).toBeDefined();
+         expect(result.metadata.model).toBe("plant-classifier-v1");
+         expect(result.metadata.inferenceTimeMs).toBeGreaterThan(0);
+         expect(result.demo_mode).toBeFalsy();
+
+         // 4. Verify top prediction is early blight
+         const topPrediction = result.predictions[0];
+         expect(topPrediction.diseaseId).toBe("early-blight");
+         expect(topPrediction.disease.name).toContain("Early Blight");
+         expect(topPrediction.plant.id).toBe("tomato");
+
+         // 5. Verify confidence is calibrated
+         expect(topPrediction.confidence.adjusted).toBeGreaterThan(0.5);
+         expect(topPrediction.confidence.label).toBe("high");
+
+         // 6. Verify disease data is enriched
+         expect(topPrediction.disease.symptoms.length).toBeGreaterThanOrEqual(3);
+         expect(topPrediction.disease.treatment.length).toBeGreaterThanOrEqual(3);
+         expect(topPrediction.disease.prevention.length).toBeGreaterThanOrEqual(2);
+       });
+     });
+
+     describe("Healthy result", () => {
+       it("returns healthy result for healthy plant image", async () => {
+         const { imageId } = await uploadTestImage("tomato-healthy.jpg");
+         const result = await identifyImage(imageId);
+
+         // Should return healthy: true or top prediction is a healthy class
+         if (result.healthy) {
+           expect(result.healthy).toBe(true);
+           expect(result.plantId).toBe("tomato");
+           expect(result.confidence.adjusted).toBeGreaterThan(0.5);
+         } else {
+           // If not healthy result, confidence should be low
+           const topPrediction = result.predictions[0];
+           expect(topPrediction.confidence.adjusted).toBeLessThan(0.5);
+         }
+       });
+     });
+
+     describe("Unknown image", () => {
+       it("returns low confidence for unrelated image", async () => {
+         const { imageId } = await uploadTestImage("unknown-plant.jpg");
+         const result = await identifyImage(imageId);
+
+         // Should have predictions but with low confidence
+         if (result.predictions) {
+           const topPrediction = result.predictions[0];
+           expect(topPrediction.confidence.adjusted).toBeLessThan(0.5);
+           expect(topPrediction.confidence.label).toBe("low");
+         }
+       });
+     });
+
+     describe("Plant context", () => {
+       it("boosts predictions when plantId is provided", async () => {
+         const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
+
+         // Without plant context
+         const resultNoContext = await identifyImage(imageId);
+         const confidenceNoContext = resultNoContext.predictions[0].confidence.adjusted;
+
+         // With plant context
+         const resultWithContext = await identifyImage(imageId, "tomato");
+         const confidenceWithContext = resultWithContext.predictions[0].confidence.adjusted;
+
+         // Context should boost confidence (or at least not reduce it)
+         expect(confidenceWithContext).toBeGreaterThanOrEqual(confidenceNoContext);
+
+         // Boosted prediction should have contextBoosted flag
+         const boosted = resultWithContext.predictions.find((p) => p.contextBoosted);
+         expect(boosted).toBeDefined();
+       });
+     });
+
+     describe("Error cases", () => {
+       it("returns 404 for non-existent imageId", async () => {
+         const response = await fetch("http://localhost:3000/api/identify", {
+           method: "POST",
+           headers: { "Content-Type": "application/json" },
+           body: JSON.stringify({ imageId: "non-existent-id" }),
+         });
+
+         expect(response.status).toBe(404);
+       });
+
+       it("returns 400 for invalid image upload", async () => {
+         const formData = new FormData();
+         formData.append("image", new Blob(["not an image"], { type: "text/plain" }), "test.txt");
+
+         const response = await fetch("http://localhost:3000/api/upload", {
+           method: "POST",
+           body: formData,
+         });
+
+         expect(response.status).toBe(400);
+       });
+     });
+
+     describe("Performance", () => {
+       it("completes inference in under 500ms", async () => {
+         const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
+
+         const start = Date.now();
+         await identifyImage(imageId);
+         const elapsed = Date.now() - start;
+
+         expect(elapsed).toBeLessThan(500);
+       });
+     });
+   });
+   ```
+
+4. **Create expected results fixture** `tests/e2e/fixtures/expected-results.json`:
+
+   ```json
+   {
+     "tomato-early-blight.jpg": {
+       "expectedDiseaseId": "early-blight",
+       "expectedPlantId": "tomato",
+       "minConfidence": 0.6,
+       "expectedConfidenceLabel": "high"
+     },
+     "tomato-healthy.jpg": {
+       "expectedHealthy": true,
+       "expectedPlantId": "tomato",
+       "minConfidence": 0.5
+     },
+     "unknown-plant.jpg": {
+       "maxConfidence": 0.5,
+       "expectedConfidenceLabel": "low"
+     }
+   }
+   ```
+
+5. **Update vitest config** to support E2E tests:
+
+   ```typescript
+   // vitest.config.ts
+   export default defineConfig({
+     test: {
+       // ... existing config ...
+       include: ["src/**/*.test.ts", "src/**/*.test.tsx", "tests/**/*.test.ts"],
+     },
+   });
+   ```
+
+6. **Add E2E test script** to `package.json`:
+
+   ```json
+   {
+     "scripts": {
+       "test:e2e": "vitest run tests/e2e"
+     }
+   }
+   ```
+
+7. **Document E2E test setup** in `tests/e2e/README.md`:
+   - Requires dev server running (`npm run dev`)
+   - Requires model files present (`public/models/plant-disease-classifier/`)
+   - Requires test fixtures (download PlantVillage test images)
+   - Run with `npm run test:e2e`
+
+8. **Download test images** from PlantVillage dataset:
+   - Use images from the PlantVillage test split (not training)
+   - Place in `tests/e2e/fixtures/`
+   - Document source and license
+
+tests:
+
+- E2E: full pipeline test (upload → identify → verify results)
+- E2E: healthy result detection
+- E2E: unknown image produces low confidence
+- E2E: plant context boosts predictions
+- E2E: error cases (404, 400)
+- E2E: performance (< 500ms inference)
+
+acceptance_criteria:
+
+- All E2E tests pass with real model inference
+- Test fixtures are documented and licensed appropriately
+- E2E tests can be run with `npm run test:e2e`
+- Tests cover: normal flow, healthy result, unknown image, plant context, errors, performance
+- Test results are deterministic (no flaky tests)
+
+validation:
+
+- `npm run test:e2e` — all tests pass
+- Manual: run tests against dev server and verify output
+- Check test coverage: all major code paths are exercised
+
+notes:
+
+- E2E tests require the dev server to be running (`npm run dev`)
+- Test images should be from PlantVillage test split (not training) to avoid overfitting concerns
+- If test images are not available, use synthetic test data (random tensors) for CI
+- Performance test threshold (500ms) is generous — actual inference should be < 200ms with tfjs-node
+- E2E tests are separate from unit tests — run them in CI after deployment to staging