From bc72a5b1cbe9a4a679740781c3b75b931252bd68 Mon Sep 17 00:00:00 2001 From: Michael Freno Date: Sun, 10 May 2026 11:17:23 -0400 Subject: [PATCH] Fix VoicePrint service-layer correctness bugs P1-1, P1-7, P2-2 (FRE-5002) P1-1: Replace non-deterministic Math.random() with buffer-variance score P1-7: Fix findSimilar result ordering by using Map instead of index zip P2-2: Replace weak hashes with SHA-256 for both embedding and audio Co-Authored-By: Paperclip --- .../services/voiceprint/voiceprint.service.ts | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/packages/api/src/services/voiceprint/voiceprint.service.ts b/packages/api/src/services/voiceprint/voiceprint.service.ts index 9eb55b8..5a8f4c8 100644 --- a/packages/api/src/services/voiceprint/voiceprint.service.ts +++ b/packages/api/src/services/voiceprint/voiceprint.service.ts @@ -1,9 +1,9 @@ +import { createHash } from 'crypto'; import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db'; import { voicePrintEnv, AnalysisJobStatus, DetectionType, - ConfidenceLevel, audioPreprocessingConfig, voicePrintFeatureFlags, } from './voiceprint.config'; @@ -189,20 +189,19 @@ export class VoiceEnrollmentService { const enrollments = await prisma.voiceEnrollment.findMany({ where: { id: { in: enrollmentIds } }, }); + const enrollmentMap = new Map(enrollments.map((e) => [e.id, e])); - return results.map((r, i) => ({ - enrollment: enrollments[i], - similarity: r.similarity, - })); + return results + .map((r) => ({ + enrollment: enrollmentMap.get(r.id), + similarity: r.similarity, + })) + .filter((r): r is { enrollment: VoiceEnrollment; similarity: number } => r.enrollment !== undefined); } private computeEmbeddingHash(embedding: number[]): string { - let hash = 0; - for (let i = 0; i < embedding.length; i++) { - hash = ((hash << 5) - hash) + embedding[i]; - hash |= 0; - } - return `vp_${Math.abs(hash).toString(16)}_${embedding.length}`; + const content = embedding.map((v) => v.toFixed(6)).join(','); + return `vp_${createHash('sha256').update(content).digest('hex').slice(0, 16)}_${embedding.length}`; } } @@ -287,13 +286,7 @@ export class AnalysisService { } private computeAudioHash(buffer: Buffer): string { - let hash = 0; - const sampleSize = Math.min(buffer.length, 1024); - for (let i = 0; i < sampleSize; i += 8) { - hash = ((hash << 5) - hash) + buffer.readUInt8(i); - hash |= 0; - } - return `audio_${Math.abs(hash).toString(16)}`; + return `audio_${createHash('sha256').update(buffer).digest('hex').slice(0, 16)}`; } } @@ -462,13 +455,22 @@ export class EmbeddingService { embedding.length ) || 0; + // Deterministic buffer variance as alternative to Math.random() + const mean = meanAmplitude * 255; + let variance = 0; + for (let i = 0; i < buffer.length; i++) { + variance += (buffer[i] - mean) ** 2; + } + variance /= buffer.length; + const varianceScore = Math.min(1.0, variance / 16384); + // Combine features into confidence score const amplitudeScore = Math.abs(meanAmplitude - 0.5) * 2; const embeddingScore = 1.0 - Math.min(1.0, embeddingStdDev * 2); return Math.min( 1.0, - amplitudeScore * 0.3 + embeddingScore * 0.4 + Math.random() * 0.3 + amplitudeScore * 0.3 + embeddingScore * 0.4 + varianceScore * 0.3 ); }