beepboop
This commit is contained in:
292
apps/web/tasks/production-ml-pipeline/07-end-to-end-testing.md
Normal file
292
apps/web/tasks/production-ml-pipeline/07-end-to-end-testing.md
Normal file
@@ -0,0 +1,292 @@
|
||||
# 07. End-to-End Integration Testing
|
||||
|
||||
meta:
|
||||
id: production-ml-pipeline-07
|
||||
feature: production-ml-pipeline
|
||||
priority: P1
|
||||
depends_on: [production-ml-pipeline-05]
|
||||
tags: [testing, integration, e2e]
|
||||
|
||||
objective:
|
||||
|
||||
- Create comprehensive end-to-end tests that validate the full pipeline from image upload to disease diagnosis
|
||||
- Verify real model inference produces valid, calibrated predictions
|
||||
- Test all code paths: normal flow, healthy result, error cases, plant context
|
||||
- Ensure all components work together correctly in a realistic scenario
|
||||
|
||||
deliverables:
|
||||
|
||||
- `tests/e2e/pipeline.test.ts` — full pipeline E2E tests
|
||||
- `tests/e2e/fixtures/` — test images and expected results
|
||||
- `tests/e2e/utils.ts` — test utilities (upload helper, identify helper)
|
||||
- Updated `vitest.config.ts` — E2E test configuration
|
||||
|
||||
steps:
|
||||
|
||||
1. **Create test fixtures** `tests/e2e/fixtures/`:
|
||||
- `tomato-early-blight.jpg` — known tomato early blight image (from PlantVillage test set)
|
||||
- `tomato-healthy.jpg` — known healthy tomato image
|
||||
- `unknown-plant.jpg` — unrelated image (should produce low confidence)
|
||||
- `invalid-image.txt` — non-image file (should fail validation)
|
||||
- `expected-results.json` — expected disease IDs and confidence ranges for each test image
|
||||
|
||||
2. **Create E2E test utilities** `tests/e2e/utils.ts`:
|
||||
|
||||
```typescript
|
||||
import fs from "fs/promises";
|
||||
import path from "path";
|
||||
|
||||
export async function uploadTestImage(
|
||||
filename: string,
|
||||
): Promise<{ imageId: string; previewUrl: string }> {
|
||||
const imagePath = path.join(__dirname, "fixtures", filename);
|
||||
const imageBuffer = await fs.readFile(imagePath);
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("image", new Blob([imageBuffer], { type: "image/jpeg" }), filename);
|
||||
|
||||
const response = await fetch("http://localhost:3000/api/upload", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Upload failed: ${response.status}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
export async function identifyImage(imageId: string, plantId?: string): Promise<any> {
|
||||
const response = await fetch("http://localhost:3000/api/identify", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ imageId, plantId }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Identify failed: ${response.status}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
```
|
||||
|
||||
3. **Write full pipeline E2E test** `tests/e2e/pipeline.test.ts`:
|
||||
|
||||
```typescript
|
||||
import { describe, it, expect, beforeAll } from "vitest";
|
||||
import { uploadTestImage, identifyImage } from "./utils";
|
||||
import expectedResults from "./fixtures/expected-results.json";
|
||||
|
||||
describe("End-to-End Pipeline", () => {
|
||||
describe("Normal flow: disease detection", () => {
|
||||
it("uploads a tomato early blight image and returns correct diagnosis", async () => {
|
||||
// 1. Upload
|
||||
const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
|
||||
expect(imageId).toBeDefined();
|
||||
|
||||
// 2. Identify
|
||||
const result = await identifyImage(imageId);
|
||||
|
||||
// 3. Verify response structure
|
||||
expect(result.predictions).toBeDefined();
|
||||
expect(result.predictions.length).toBeGreaterThan(0);
|
||||
expect(result.metadata).toBeDefined();
|
||||
expect(result.metadata.model).toBe("plant-classifier-v1");
|
||||
expect(result.metadata.inferenceTimeMs).toBeGreaterThan(0);
|
||||
expect(result.demo_mode).toBeFalsy();
|
||||
|
||||
// 4. Verify top prediction is early blight
|
||||
const topPrediction = result.predictions[0];
|
||||
expect(topPrediction.diseaseId).toBe("early-blight");
|
||||
expect(topPrediction.disease.name).toContain("Early Blight");
|
||||
expect(topPrediction.plant.id).toBe("tomato");
|
||||
|
||||
// 5. Verify confidence is calibrated
|
||||
expect(topPrediction.confidence.adjusted).toBeGreaterThan(0.5);
|
||||
expect(topPrediction.confidence.label).toBe("high");
|
||||
|
||||
// 6. Verify disease data is enriched
|
||||
expect(topPrediction.disease.symptoms.length).toBeGreaterThanOrEqual(3);
|
||||
expect(topPrediction.disease.treatment.length).toBeGreaterThanOrEqual(3);
|
||||
expect(topPrediction.disease.prevention.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Healthy result", () => {
|
||||
it("returns healthy result for healthy plant image", async () => {
|
||||
const { imageId } = await uploadTestImage("tomato-healthy.jpg");
|
||||
const result = await identifyImage(imageId);
|
||||
|
||||
// Should return healthy: true or top prediction is a healthy class
|
||||
if (result.healthy) {
|
||||
expect(result.healthy).toBe(true);
|
||||
expect(result.plantId).toBe("tomato");
|
||||
expect(result.confidence.adjusted).toBeGreaterThan(0.5);
|
||||
} else {
|
||||
// If not healthy result, confidence should be low
|
||||
const topPrediction = result.predictions[0];
|
||||
expect(topPrediction.confidence.adjusted).toBeLessThan(0.5);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Unknown image", () => {
|
||||
it("returns low confidence for unrelated image", async () => {
|
||||
const { imageId } = await uploadTestImage("unknown-plant.jpg");
|
||||
const result = await identifyImage(imageId);
|
||||
|
||||
// Should have predictions but with low confidence
|
||||
if (result.predictions) {
|
||||
const topPrediction = result.predictions[0];
|
||||
expect(topPrediction.confidence.adjusted).toBeLessThan(0.5);
|
||||
expect(topPrediction.confidence.label).toBe("low");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Plant context", () => {
|
||||
it("boosts predictions when plantId is provided", async () => {
|
||||
const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
|
||||
|
||||
// Without plant context
|
||||
const resultNoContext = await identifyImage(imageId);
|
||||
const confidenceNoContext = resultNoContext.predictions[0].confidence.adjusted;
|
||||
|
||||
// With plant context
|
||||
const resultWithContext = await identifyImage(imageId, "tomato");
|
||||
const confidenceWithContext = resultWithContext.predictions[0].confidence.adjusted;
|
||||
|
||||
// Context should boost confidence (or at least not reduce it)
|
||||
expect(confidenceWithContext).toBeGreaterThanOrEqual(confidenceNoContext);
|
||||
|
||||
// Boosted prediction should have contextBoosted flag
|
||||
const boosted = resultWithContext.predictions.find((p) => p.contextBoosted);
|
||||
expect(boosted).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error cases", () => {
|
||||
it("returns 404 for non-existent imageId", async () => {
|
||||
const response = await fetch("http://localhost:3000/api/identify", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ imageId: "non-existent-id" }),
|
||||
});
|
||||
|
||||
expect(response.status).toBe(404);
|
||||
});
|
||||
|
||||
it("returns 400 for invalid image upload", async () => {
|
||||
const formData = new FormData();
|
||||
formData.append("image", new Blob(["not an image"], { type: "text/plain" }), "test.txt");
|
||||
|
||||
const response = await fetch("http://localhost:3000/api/upload", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
expect(response.status).toBe(400);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Performance", () => {
|
||||
it("completes inference in under 500ms", async () => {
|
||||
const { imageId } = await uploadTestImage("tomato-early-blight.jpg");
|
||||
|
||||
const start = Date.now();
|
||||
await identifyImage(imageId);
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
4. **Create expected results fixture** `tests/e2e/fixtures/expected-results.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"tomato-early-blight.jpg": {
|
||||
"expectedDiseaseId": "early-blight",
|
||||
"expectedPlantId": "tomato",
|
||||
"minConfidence": 0.6,
|
||||
"expectedConfidenceLabel": "high"
|
||||
},
|
||||
"tomato-healthy.jpg": {
|
||||
"expectedHealthy": true,
|
||||
"expectedPlantId": "tomato",
|
||||
"minConfidence": 0.5
|
||||
},
|
||||
"unknown-plant.jpg": {
|
||||
"maxConfidence": 0.5,
|
||||
"expectedConfidenceLabel": "low"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
5. **Update vitest config** to support E2E tests:
|
||||
|
||||
```typescript
|
||||
// vitest.config.ts
|
||||
export default defineConfig({
|
||||
test: {
|
||||
// ... existing config ...
|
||||
include: ["src/**/*.test.ts", "src/**/*.test.tsx", "tests/**/*.test.ts"],
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
6. **Add E2E test script** to `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"scripts": {
|
||||
"test:e2e": "vitest run tests/e2e"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
7. **Document E2E test setup** in `tests/e2e/README.md`:
|
||||
- Requires dev server running (`npm run dev`)
|
||||
- Requires model files present (`public/models/plant-disease-classifier/`)
|
||||
- Requires test fixtures (download PlantVillage test images)
|
||||
- Run with `npm run test:e2e`
|
||||
|
||||
8. **Download test images** from PlantVillage dataset:
|
||||
- Use images from the PlantVillage test split (not training)
|
||||
- Place in `tests/e2e/fixtures/`
|
||||
- Document source and license
|
||||
|
||||
tests:
|
||||
|
||||
- E2E: full pipeline test (upload → identify → verify results)
|
||||
- E2E: healthy result detection
|
||||
- E2E: unknown image produces low confidence
|
||||
- E2E: plant context boosts predictions
|
||||
- E2E: error cases (404, 400)
|
||||
- E2E: performance (< 500ms inference)
|
||||
|
||||
acceptance_criteria:
|
||||
|
||||
- All E2E tests pass with real model inference
|
||||
- Test fixtures are documented and licensed appropriately
|
||||
- E2E tests can be run with `npm run test:e2e`
|
||||
- Tests cover: normal flow, healthy result, unknown image, plant context, errors, performance
|
||||
- Test results are deterministic (no flaky tests)
|
||||
|
||||
validation:
|
||||
|
||||
- `npm run test:e2e` — all tests pass
|
||||
- Manual: run tests against dev server and verify output
|
||||
- Check test coverage: all major code paths are exercised
|
||||
|
||||
notes:
|
||||
|
||||
- E2E tests require the dev server to be running (`npm run dev`)
|
||||
- Test images should be from PlantVillage test split (not training) to avoid overfitting concerns
|
||||
- If test images are not available, use synthetic test data (random tensors) for CI
|
||||
- Performance test threshold (500ms) is generous — actual inference should be < 200ms with tfjs-node
|
||||
- E2E tests are separate from unit tests — run them in CI after deployment to staging
|
||||
Reference in New Issue
Block a user