VoicePrint: Quality improvements P2-1-5, P3-2 (FRE-5006)
- P2-1: Extract duplicate mock ML logic to modular embedding.service.ts / faiss.index.ts - P2-2: Weak hashes already fixed via SHA-256 (FRE-5002) - P2-3: Parallel batch processing with chunked Promise.allSettled - P2-4: Consistent DI pattern via modular imports - P2-5: Structured logging via ConsoleLogger - P3-2: Batch jobId computed/logged, persistence blocked on schema Approved by CTO review (FRE-5338) Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
192
packages/api/src/services/voiceprint/embedding.service.ts
Normal file
192
packages/api/src/services/voiceprint/embedding.service.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
import { spawn } from "child_process";
|
||||
import { logger } from './logger';
|
||||
import { voicePrintEnv } from './voiceprint.config';
|
||||
|
||||
const EMBEDDING_DIM = 192;
|
||||
const MODEL_VERSION = "ecapa-tdnn-0.1.0-mock";
|
||||
|
||||
export class EmbeddingService {
|
||||
private mlServiceUrl: string;
|
||||
private initialized = false;
|
||||
|
||||
constructor() {
|
||||
this.mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001";
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
this.initialized = true;
|
||||
logger.info('Embedding service initialized', { mlUrl: this.mlServiceUrl, modelVersion: MODEL_VERSION });
|
||||
}
|
||||
|
||||
async extract(audioBuffer: Buffer): Promise<number[]> {
|
||||
await this.initialize();
|
||||
|
||||
const mlAvailable = await this.checkMLService();
|
||||
if (mlAvailable) {
|
||||
logger.info('Using ML service for embedding', { mlUrl: this.mlServiceUrl });
|
||||
return this.extractViaML(audioBuffer);
|
||||
}
|
||||
|
||||
logger.info('Using mock embedding generation', { audioBufferLength: audioBuffer.length });
|
||||
return this.generateMockFromBuffer(audioBuffer);
|
||||
}
|
||||
|
||||
async analyze(audioBuffer: Buffer): Promise<{
|
||||
confidence: number;
|
||||
detectionType: string;
|
||||
features: Record<string, number>;
|
||||
embedding: number[];
|
||||
}> {
|
||||
const embedding = await this.extract(audioBuffer);
|
||||
const confidence = this.estimateSyntheticConfidence(audioBuffer, embedding);
|
||||
const detectionType = confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD ? 'synthetic_voice' : 'natural';
|
||||
const features = this.extractAnalysisFeatures(audioBuffer, embedding);
|
||||
|
||||
return { confidence, detectionType, features, embedding };
|
||||
}
|
||||
|
||||
getModelVersion(): string {
|
||||
return MODEL_VERSION;
|
||||
}
|
||||
|
||||
private async extractViaML(audioBuffer: Buffer): Promise<number[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const jsonInput = audioBuffer.toString("base64");
|
||||
const proc = spawn("python3", [
|
||||
"-c",
|
||||
`
|
||||
import urllib.request, json, sys
|
||||
req = urllib.request.Request(
|
||||
"${this.mlServiceUrl}/embedding",
|
||||
data=json.dumps({"audio": "${jsonInput.substring(0, 5000)}"}).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
data = json.loads(resp.read())
|
||||
sys.stdout.write(json.dumps({"ok": True, "vector": data.get("embedding", []), "dim": data.get("dimension", ${EMBEDDING_DIM})}))
|
||||
except Exception as e:
|
||||
sys.stdout.write(json.dumps({"ok": False, "error": str(e)}))
|
||||
`,
|
||||
]);
|
||||
|
||||
let output = "";
|
||||
proc.stdout.on("data", (chunk) => { output += chunk.toString(); });
|
||||
proc.on("close", (code) => {
|
||||
try {
|
||||
const result = JSON.parse(output);
|
||||
if (result.ok && result.vector && result.vector.length === EMBEDDING_DIM) {
|
||||
resolve(result.vector);
|
||||
} else {
|
||||
resolve(this.generateMockFromBuffer(audioBuffer));
|
||||
}
|
||||
} catch {
|
||||
resolve(this.generateMockFromBuffer(audioBuffer));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private generateMockFromBuffer(audioBuffer: Buffer): number[] {
|
||||
let hash = 0;
|
||||
const sampleSize = Math.min(audioBuffer.length, 1024);
|
||||
for (let i = 0; i < sampleSize; i += 4) {
|
||||
hash = ((hash << 5) - hash + audioBuffer.readInt32LE(i)) | 0;
|
||||
}
|
||||
const seed = Math.abs(hash);
|
||||
|
||||
const rng = this.createRNG(seed);
|
||||
const vector: number[] = [];
|
||||
|
||||
// Box-Muller transform for Gaussian distribution
|
||||
for (let i = 0; i < EMBEDDING_DIM; i += 2) {
|
||||
const u1 = rng();
|
||||
const u2 = rng();
|
||||
const mag = Math.sqrt(-2 * Math.log(u1));
|
||||
const z0 = mag * Math.cos(2 * Math.PI * u2);
|
||||
const z1 = mag * Math.sin(2 * Math.PI * u2);
|
||||
vector.push(parseFloat(z0.toFixed(6)));
|
||||
if (i + 1 < EMBEDDING_DIM) {
|
||||
vector.push(parseFloat(z1.toFixed(6)));
|
||||
}
|
||||
}
|
||||
|
||||
// L2 normalize
|
||||
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
||||
return vector.map((v) => parseFloat((v / norm).toFixed(6)));
|
||||
}
|
||||
|
||||
private estimateSyntheticConfidence(buffer: Buffer, embedding: number[]): number {
|
||||
const meanAmplitude = buffer.reduce((s, v) => s + v, 0) / buffer.length / 255;
|
||||
const meanEmbedding = embedding.reduce((s, v) => s + v, 0) / embedding.length;
|
||||
const embeddingStdDev = Math.sqrt(embedding.reduce((s, v) => s + (v - meanEmbedding) ** 2, 0) / embedding.length);
|
||||
|
||||
const amplitudeScore = Math.abs(meanAmplitude - 0.5) * 2;
|
||||
const embeddingScore = 1.0 - Math.min(1.0, embeddingStdDev * 2);
|
||||
const varianceScore = Math.min(1.0, buffer.length / 10000);
|
||||
|
||||
return Math.min(1.0, amplitudeScore * 0.3 + embeddingScore * 0.4 + varianceScore * 0.3);
|
||||
}
|
||||
|
||||
private extractAnalysisFeatures(buffer: Buffer, embedding: number[]): Record<string, number> {
|
||||
const meanAmplitude = buffer.reduce((s, v) => s + v, 0) / buffer.length / 255;
|
||||
const zeroCrossings = buffer.reduce((count, v, i, arr) => {
|
||||
return i > 0 && ((v - 128) * (arr[i - 1] - 128) < 0) ? count + 1 : count;
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
mean_amplitude: meanAmplitude,
|
||||
zero_crossing_rate: zeroCrossings / buffer.length,
|
||||
embedding_energy: embedding.reduce((s, v) => s + v * v, 0),
|
||||
embedding_entropy: this.calculateEntropy(embedding),
|
||||
};
|
||||
}
|
||||
|
||||
private calculateEntropy(values: number[]): number {
|
||||
const bins = 20;
|
||||
const histogram = new Array(bins).fill(0);
|
||||
const min = Math.min(...values);
|
||||
const max = Math.max(...values);
|
||||
const range = max - min || 1;
|
||||
|
||||
for (const v of values) {
|
||||
const bin = Math.min(bins - 1, Math.floor(((v - min) / range) * bins));
|
||||
histogram[bin]++;
|
||||
}
|
||||
|
||||
let entropy = 0;
|
||||
const total = values.length;
|
||||
for (const count of histogram) {
|
||||
if (count > 0) {
|
||||
const p = count / total;
|
||||
entropy -= p * Math.log2(p);
|
||||
}
|
||||
}
|
||||
return entropy;
|
||||
}
|
||||
|
||||
private async checkMLService(): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("python3", [
|
||||
"-c",
|
||||
`
|
||||
import urllib.request, sys
|
||||
try:
|
||||
urllib.request.urlopen("${this.mlServiceUrl}/health", timeout=2)
|
||||
sys.exit(0)
|
||||
except:
|
||||
sys.exit(1)
|
||||
`,
|
||||
]);
|
||||
proc.on("close", (code) => resolve(code === 0));
|
||||
});
|
||||
}
|
||||
|
||||
private createRNG(seed: number): () => number {
|
||||
return () => {
|
||||
seed = (seed * 1664525 + 1013904223) & 0xffffffff;
|
||||
return (seed >>> 0) / 0xffffffff;
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user