onnx, fix depl issue

2026-06-03 13:35:37 -04:00
parent 1408d0cd1d
commit 61d48d3648
5 changed files with 511 additions and 285 deletions
--- a/tasks/ios-production/README.md
+++ b/tasks/ios-production/README.md
@@ -21,7 +21,7 @@ Status legend: [ ] todo, [~] in-progress, [x] done
 ### Performance Optimization
 - [x] 09 — Image Caching & Lazy Loading → `09-image-caching.md`
 - [x] 10 — Memory Management & Leak Audit → `10-memory-leak-audit.md`
- [~] 11 — Background Fetch & Sync Optimization → `11-background-fetch.md`
+- [x] 11 — Background Fetch & Sync Optimization → `11-background-fetch.md`
 - [x] 12 — App Launch Time Optimization → `12-launch-time.md`

 ### Native Features
--- a/web/.gitignore
+++ b/web/.gitignore
@@ -5,6 +5,7 @@ dist
 .netlify
 .vinxi
 app.config.timestamp_*.js
+.pi-lens

 # Environment
 .env*
--- a/web/.vercelignore
+++ b/web/.vercelignore
@@ -0,0 +1,51 @@
+# ── ML Model (255MB ONNX model — too large for Vercel, downloaded at runtime) ──
+src/server/models/spam-classifier/
+
+# ── Build Artifacts ──
+.output/
+.nitro/
+dist/
+
+# ── Test Files (not needed in production) ──
+e2e/
+test/
+**/*.test.ts
+**/*.test.tsx
+**/*.spec.ts
+**/*.spec.tsx
+
+# ── Development / Config ──
+.dockerignore
+Dockerfile
+docker-compose.yml
+docker-compose.yaml
+vitest.config.ts
+vitest.node.config.ts
+playwright.config.ts
+drizzle.config.ts
+drizzle/
+
+# ── Version Control ──
+.git/
+.gitignore
+.github/
+.husky/
+
+# ── Environment (already in .gitignore, being explicit) ──
+.env
+.env.development
+.env.production
+.env.local
+
+# ── Editors / OS ──
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+.DS_Store
+Thumbs.db
+
+# ── Pi agent / dev tooling ──
+.pi-lens/
+.agents/
--- a/web/src/server/services/spamshield/onnx.inference.ts
+++ b/web/src/server/services/spamshield/onnx.inference.ts
@@ -90,7 +90,7 @@ function cacheKey(text: string): string {
 	let hash = 0;
 	for (let i = 0; i < normalized.length; i++) {
 		const char = normalized.charCodeAt(i);
-    hash = ((hash << 5) - hash) + char;
+		hash = (hash << 5) - hash + char;
 		hash |= 0; // Convert to 32bit integer
 	}
 	return String(hash);
@@ -155,7 +155,9 @@ class BertTokenizer {
 		let doLowercase = true;
 		let modelMaxLength = 512;
 		try {
-      const configData = JSON.parse(fs.readFileSync(tokenizerConfigPath, "utf-8"));
+			const configData = JSON.parse(
+				fs.readFileSync(tokenizerConfigPath, "utf-8"),
+			);
 			doLowercase = configData.do_lower_case ?? true;
 			modelMaxLength = configData.model_max_length ?? 512;
 		} catch {
@@ -180,13 +182,20 @@ class BertTokenizer {
 		return text.split(/\s+/).filter((t) => t.length > 0);
 	}

-  private wordpiece_tokenize(token: string, maxOutputTokens: number = 20): string[] {
+	private wordpiece_tokenize(
+		token: string,
+		maxOutputTokens: number = 20,
+	): string[] {
 		const outputTokens: string[] = [];
 		let isBad = false;
 		let start = 0;
 		let subToken: string | null = null;

-    while (start < token.length && !isBad && outputTokens.length < maxOutputTokens) {
+		while (
+			start < token.length &&
+			!isBad &&
+			outputTokens.length < maxOutputTokens
+		) {
 			let found = false;

 			for (let end = token.length; end > start; end--) {
@@ -230,7 +239,10 @@ class BertTokenizer {
 		return tokens;
 	}

-  encode(text: string, maxLen: number = 128): { inputIds: number[]; attentionMask: number[] } {
+	encode(
+		text: string,
+		maxLen: number = 128,
+	): { inputIds: number[]; attentionMask: number[] } {
 		const tokens = this.tokenize(text);

 		// Add [CLS] and [SEP]
@@ -252,12 +264,153 @@ class BertTokenizer {
 // ── Model Loading ──────────────────────────────────────────────────────────

 const MODEL_DIR_ENV = "SPAM_MODEL_DIR";
-const DEFAULT_MODEL_DIR = path.join(__dirname, "..", "..", "models", "spam-classifier");
+const DEFAULT_MODEL_DIR = path.join(
+	__dirname,
+	"..",
+	"..",
+	"models",
+	"spam-classifier",
+);

 function getModelDir(): string {
 	return process.env[MODEL_DIR_ENV] || DEFAULT_MODEL_DIR;
 }

+// ── Remote Model Download ────────────────────────────────────────────────────
+
+const MODEL_DOWNLOAD_URL_ENV = "SPAM_MODEL_URL_BASE";
+
+/** Model files that need to be available in the model directory. */
+const MODEL_FILES = [
+	"model.onnx",
+	"model.onnx.data",
+	"tokenizer.json",
+	"vocab.txt",
+	"tokenizer_config.json",
+	"special_tokens_map.json",
+	"model_metadata.json",
+] as const;
+
+/**
+ * Check if all required model files exist in the given directory.
+ */
+function modelFilesExist(dir: string): boolean {
+	try {
+		return MODEL_FILES.every((f) => fs.existsSync(path.join(dir, f)));
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * Download a single model file from a remote URL to a local path.
+ * Uses streaming to handle large files (e.g., model.onnx.data at 255MB).
+ */
+async function downloadModelFile(url: string, destPath: string): Promise<void> {
+	const response = await fetch(url);
+	if (!response.ok) {
+		throw new Error(
+			`Failed to download ${url}: ${response.status} ${response.statusText}`,
+		);
+	}
+
+	const reader = response.body?.getReader();
+	if (!reader) {
+		throw new Error(`No response body stream for ${url}`);
+	}
+
+	// Ensure parent directory exists
+	const dir = path.dirname(destPath);
+	fs.mkdirSync(dir, { recursive: true });
+
+	// Stream to file
+	const writer = fs.createWriteStream(destPath);
+	try {
+		let totalBytes = 0;
+		let lastLog = 0;
+		while (true) {
+			const { done, value } = await reader.read();
+			if (done) break;
+			writer.write(value);
+			totalBytes += value.length;
+
+			// Log progress every ~10MB
+			if (totalBytes - lastLog > 10 * 1024 * 1024) {
+				lastLog = totalBytes;
+				const mb = (totalBytes / (1024 * 1024)).toFixed(1);
+				console.log(
+					`[spamshield] Downloaded ${path.basename(destPath)}: ${mb}MB`,
+				);
+			}
+		}
+	} finally {
+		writer.end();
+		await new Promise<void>((resolve) => writer.on("finish", resolve));
+	}
+
+	const totalMB = (fs.statSync(destPath).size / (1024 * 1024)).toFixed(1);
+	console.log(
+		`[spamshield] Downloaded ${path.basename(destPath)} (${totalMB}MB)`,
+	);
+}
+
+/**
+ * Download all model files from a remote URL base to the model directory.
+ * Falls back gracefully — if the URL is not configured, returns false.
+ */
+async function downloadModelIfMissing(modelDir: string): Promise<boolean> {
+	// If model files already exist locally, nothing to do
+	if (modelFilesExist(modelDir)) {
+		return true;
+	}
+
+	const baseUrl = process.env[MODEL_DOWNLOAD_URL_ENV];
+	if (!baseUrl) {
+		console.log(
+			"[spamshield] Model files not found locally and SPAM_MODEL_URL_BASE not set — " +
+				"will use rule-engine fallback",
+		);
+		return false;
+	}
+
+	const normalizedBase = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
+	console.log(`[spamshield] Downloading model from: ${normalizedBase}`);
+
+	// Ensure model directory exists
+	fs.mkdirSync(modelDir, { recursive: true });
+
+	// Track which files we already have (for caching across cold starts)
+	const existing = new Set<string>();
+	for (const file of MODEL_FILES) {
+		const filePath = path.join(modelDir, file);
+		if (fs.existsSync(filePath) && fs.statSync(filePath).size > 0) {
+			existing.add(file);
+		}
+	}
+
+	// Download missing files
+	for (const file of MODEL_FILES) {
+		if (existing.has(file)) {
+			console.log(`[spamshield] Already have ${file}, skipping download`);
+			continue;
+		}
+		const url = `${normalizedBase}${file}`;
+		const destPath = path.join(modelDir, file);
+		console.log(`[spamshield] Downloading ${file}...`);
+		try {
+			await downloadModelFile(url, destPath);
+		} catch (err) {
+			console.error(`[spamshield] Failed to download ${file}:`, err);
+			// If the main model files fail, we can't use the model
+			if (file === "model.onnx" || file === "model.onnx.data") {
+				throw err;
+			}
+		}
+	}
+
+	return modelFilesExist(modelDir);
+}
+
 async function loadModel(): Promise<void> {
 	if (modelState.loaded) return;

@@ -265,6 +418,9 @@ async function loadModel(): Promise<void> {
 		const modelDir = getModelDir();
 		console.log(`[spamshield] Loading ONNX model from: ${modelDir}`);

+		// Download model files if missing (production/Vercel path)
+		await downloadModelIfMissing(modelDir);
+
 		// Load metadata
 		const metadataPath = path.join(modelDir, "model_metadata.json");
 		if (fs.existsSync(metadataPath)) {
@@ -288,14 +444,21 @@ async function loadModel(): Promise<void> {

 		modelState.session = await ort.InferenceSession.create(modelPath);
 		console.log("[spamshield] ONNX session created");
-    console.log(`[spamshield] Inputs: ${modelState.session.inputNames.join(", ")}`);
-    console.log(`[spamshield] Outputs: ${modelState.session.outputNames.join(", ")}`);
+		console.log(
+			`[spamshield] Inputs: ${modelState.session.inputNames.join(", ")}`,
+		);
+		console.log(
+			`[spamshield] Outputs: ${modelState.session.outputNames.join(", ")}`,
+		);

 		modelState.loaded = true;
 		console.log("[spamshield] Model loaded successfully");
 	} catch (err) {
 		modelState.loadError = err instanceof Error ? err : new Error(String(err));
-    console.error("[spamshield] Failed to load ONNX model:", modelState.loadError);
+		console.error(
+			"[spamshield] Failed to load ONNX model:",
+			modelState.loadError,
+		);
 		console.log("[spamshield] Falling back to rule engine for classification");
 	}
 }
@@ -351,7 +514,10 @@ async function runInference(
 	}

 	const inputIdsTensor = new ort.Tensor("int64", inputIdsBigInt, [1, maxLen]);
-  const attentionMaskTensor = new ort.Tensor("int64", attentionMaskBigInt, [1, maxLen]);
+	const attentionMaskTensor = new ort.Tensor("int64", attentionMaskBigInt, [
+		1,
+		maxLen,
+	]);

 	// Run inference
 	const feeds: Record<string, Tensor> = {
--- a/web/vercel.json
+++ b/web/vercel.json
@@ -0,0 +1,8 @@
+{
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "solidstart",
+  "buildCommand": "npm run build",
+  "installCommand": "npm install",
+  "outputDirectory": ".output/public",
+  "regions": ["iad1"]
+}