import { spawn } from "child_process"; import { v4 as uuidv4 } from "uuid"; import { logger } from "../logger"; const EMBEDDING_DIM = 192; const MODEL_VERSION = "ecapa-tdnn-0.1.0-mock"; export class EmbeddingService { private mlServiceUrl: string; private readonly maxRetries = 3; private readonly retryDelay = 1000; constructor() { this.mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001"; } async extract(audioBuffer: Buffer): Promise { const mlAvailable = await this.checkMLService(); if (mlAvailable) { logger.info("Using ML service for embedding extraction", { mlUrl: this.mlServiceUrl }); return this.extractViaML(audioBuffer); } logger.info("Using mock embedding generation", { audioBufferLength: audioBuffer.length }); return this.generateMockFromBuffer(audioBuffer); } async classify(embedding: number[]): Promise { const mlAvailable = await this.checkMLService(); if (mlAvailable) { logger.info("Using ML service for classification", { embeddingLength: embedding.length }); return this.classifyViaML(embedding); } logger.info("Using mock classification", { embeddingLength: embedding.length }); const mean = embedding.reduce((s, v) => s + v, 0) / embedding.length; const variance = embedding.reduce((s, v) => s + (v - mean) ** 2, 0) / embedding.length; const stdDev = Math.sqrt(variance); const syntheticIndicators = [ stdDev < 0.1 ? 0.8 : 0.2, Math.abs(mean) > 0.5 ? 0.7 : 0.3, this.hasArtifacts(embedding) ? 0.9 : 0.1, ]; return syntheticIndicators.reduce((s, v) => s + v, 0) / syntheticIndicators.length; } getModelVersion(): string { return MODEL_VERSION; } private async extractViaML(audioBuffer: Buffer): Promise { return new Promise((resolve, reject) => { const jsonInput = audioBuffer.toString("base64"); const proc = spawn("python3", [ "-c", ` import urllib.request, json, sys req = urllib.request.Request( "${this.mlServiceUrl}/embedding", data=json.dumps({"audio": "${jsonInput.substring(0, 5000)}"}).encode(), headers={"Content-Type": "application/json"} ) try: with urllib.request.urlopen(req, timeout=60) as resp: data = json.loads(resp.read()) sys.stdout.write(json.dumps({"ok": True, "vector": data.get("embedding", []), "dim": data.get("dimension", ${EMBEDDING_DIM})})) except Exception as e: sys.stdout.write(json.dumps({"ok": False, "error": str(e)})) `, ]); let output = ""; proc.stdout.on("data", (chunk) => { output += chunk.toString(); }); proc.on("close", (code) => { try { const result = JSON.parse(output); if (result.ok && result.vector.length === EMBEDDING_DIM) { resolve({ vector: result.vector, dimension: EMBEDDING_DIM }); } else { resolve(this.generateMockFromBuffer(audioBuffer)); } } catch { resolve(this.generateMockFromBuffer(audioBuffer)); } }); }); } private async classifyViaML(embedding: number[]): Promise { return new Promise((resolve) => { const proc = spawn("python3", [ "-c", ` import urllib.request, json, sys req = urllib.request.Request( "${this.mlServiceUrl}/classify", data=json.dumps({"embedding": ${JSON.stringify(embedding)}}).encode(), headers={"Content-Type": "application/json"} ) try: with urllib.request.urlopen(req, timeout=30) as resp: data = json.loads(resp.read()) sys.stdout.write(json.dumps({"score": data.get("synthetic_score", 0.5)})) except: sys.stdout.write(json.dumps({"score": 0.5})) `, ]); let output = ""; proc.stdout.on("data", (chunk) => { output += chunk.toString(); }); proc.on("close", () => { try { const result = JSON.parse(output); resolve(result.score || 0.5); } catch { resolve(0.5); } }); }); } private hasArtifacts(embedding: number[]): boolean { const window = 16; let artifactCount = 0; for (let i = 0; i < embedding.length - window; i += window) { const slice = embedding.slice(i, i + window); const localMean = slice.reduce((s, v) => s + v, 0) / slice.length; const localVar = slice.reduce((s, v) => s + (v - localMean) ** 2, 0) / slice.length; if (localVar < 0.001) artifactCount++; } return artifactCount > embedding.length / window / 3; } private generateMockFromBuffer(audioBuffer: Buffer): EmbeddingOutput { let hash = 0; const sampleSize = Math.min(audioBuffer.length, 1024); for (let i = 0; i < sampleSize; i += 4) { hash = ((hash << 5) - hash + audioBuffer.readInt32LE(i)) | 0; } const seed = Math.abs(hash); const rng = this.createRNG(seed); const vector: number[] = []; for (let i = 0; i < EMBEDDING_DIM; i++) { const u1 = rng(); const u2 = rng(); const gauss = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2); vector.push(parseFloat(gauss.toFixed(6))); } const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0)); const normalized = vector.map((v) => parseFloat((v / norm).toFixed(6))); return { vector: normalized, dimension: EMBEDDING_DIM }; } private async checkMLService(): Promise { logger.info("Checking ML service availability", { mlUrl: this.mlServiceUrl }); return new Promise((resolve) => { const proc = spawn("python3", [ "-c", ` import urllib.request, sys try: urllib.request.urlopen("${this.mlServiceUrl}/health", timeout=2) sys.exit(0) except: sys.exit(1) `, ]); proc.on("close", (code) => resolve(code === 0)); }); } private createRNG(seed: number): () => number { return () => { seed = (seed * 1664525 + 1013904223) & 0xffffffff; return (seed >>> 0) / 0xffffffff; }; } } export interface EmbeddingOutput { vector: number[]; dimension: number; }