Fix VoicePrint service-layer correctness bugs P1-1, P1-7, P2-2 (FRE-5002)
P1-1: Replace non-deterministic Math.random() with buffer-variance score P1-7: Fix findSimilar result ordering by using Map instead of index zip P2-2: Replace weak hashes with SHA-256 for both embedding and audio Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import { createHash } from 'crypto';
|
||||
import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db';
|
||||
import {
|
||||
voicePrintEnv,
|
||||
AnalysisJobStatus,
|
||||
DetectionType,
|
||||
ConfidenceLevel,
|
||||
audioPreprocessingConfig,
|
||||
voicePrintFeatureFlags,
|
||||
} from './voiceprint.config';
|
||||
@@ -189,20 +189,19 @@ export class VoiceEnrollmentService {
|
||||
const enrollments = await prisma.voiceEnrollment.findMany({
|
||||
where: { id: { in: enrollmentIds } },
|
||||
});
|
||||
const enrollmentMap = new Map(enrollments.map((e) => [e.id, e]));
|
||||
|
||||
return results.map((r, i) => ({
|
||||
enrollment: enrollments[i],
|
||||
similarity: r.similarity,
|
||||
}));
|
||||
return results
|
||||
.map((r) => ({
|
||||
enrollment: enrollmentMap.get(r.id),
|
||||
similarity: r.similarity,
|
||||
}))
|
||||
.filter((r): r is { enrollment: VoiceEnrollment; similarity: number } => r.enrollment !== undefined);
|
||||
}
|
||||
|
||||
private computeEmbeddingHash(embedding: number[]): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < embedding.length; i++) {
|
||||
hash = ((hash << 5) - hash) + embedding[i];
|
||||
hash |= 0;
|
||||
}
|
||||
return `vp_${Math.abs(hash).toString(16)}_${embedding.length}`;
|
||||
const content = embedding.map((v) => v.toFixed(6)).join(',');
|
||||
return `vp_${createHash('sha256').update(content).digest('hex').slice(0, 16)}_${embedding.length}`;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -287,13 +286,7 @@ export class AnalysisService {
|
||||
}
|
||||
|
||||
private computeAudioHash(buffer: Buffer): string {
|
||||
let hash = 0;
|
||||
const sampleSize = Math.min(buffer.length, 1024);
|
||||
for (let i = 0; i < sampleSize; i += 8) {
|
||||
hash = ((hash << 5) - hash) + buffer.readUInt8(i);
|
||||
hash |= 0;
|
||||
}
|
||||
return `audio_${Math.abs(hash).toString(16)}`;
|
||||
return `audio_${createHash('sha256').update(buffer).digest('hex').slice(0, 16)}`;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,13 +455,22 @@ export class EmbeddingService {
|
||||
embedding.length
|
||||
) || 0;
|
||||
|
||||
// Deterministic buffer variance as alternative to Math.random()
|
||||
const mean = meanAmplitude * 255;
|
||||
let variance = 0;
|
||||
for (let i = 0; i < buffer.length; i++) {
|
||||
variance += (buffer[i] - mean) ** 2;
|
||||
}
|
||||
variance /= buffer.length;
|
||||
const varianceScore = Math.min(1.0, variance / 16384);
|
||||
|
||||
// Combine features into confidence score
|
||||
const amplitudeScore = Math.abs(meanAmplitude - 0.5) * 2;
|
||||
const embeddingScore = 1.0 - Math.min(1.0, embeddingStdDev * 2);
|
||||
|
||||
return Math.min(
|
||||
1.0,
|
||||
amplitudeScore * 0.3 + embeddingScore * 0.4 + Math.random() * 0.3
|
||||
amplitudeScore * 0.3 + embeddingScore * 0.4 + varianceScore * 0.3
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user