Files
Kordant/packages/api/src/services/voiceprint/voiceprint.service.ts
Michael Freno 268889ead4 VoicePrint: Quality improvements P2-1-5, P3-2 (FRE-5006)
- P2-1: Extract duplicate mock ML logic to modular embedding.service.ts / faiss.index.ts
- P2-2: Weak hashes already fixed via SHA-256 (FRE-5002)
- P2-3: Parallel batch processing with chunked Promise.allSettled
- P2-4: Consistent DI pattern via modular imports
- P2-5: Structured logging via ConsoleLogger
- P3-2: Batch jobId computed/logged, persistence blocked on schema

Approved by CTO review (FRE-5338)

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-14 07:12:31 -04:00

422 lines
12 KiB
TypeScript

import { createHash } from 'crypto';
import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db';
import {
voicePrintEnv,
AnalysisJobStatus,
DetectionType,
audioPreprocessingConfig,
voicePrintFeatureFlags,
} from './voiceprint.config';
import { checkFlag } from './voiceprint.feature-flags';
import { logger } from './logger';
import { EmbeddingService as ModularEmbeddingService } from './embedding.service';
import { FAISSIndex as ModularFAISSIndex } from './faiss.index';
// Alias for backwards compatibility
const EmbeddingService = ModularEmbeddingService;
const FAISSIndex = ModularFAISSIndex;
// Audio preprocessing service
export class AudioPreprocessor {
/**
* Normalize audio to 16kHz mono with VAD and noise reduction.
* Returns preprocessing metadata and the processed audio buffer.
*/
async preprocess(
audioBuffer: Buffer,
options?: {
sourceSampleRate?: number;
channels?: number;
}
): Promise<{
buffer: Buffer;
metadata: {
sampleRate: number;
channels: number;
duration: number;
format: string;
};
}> {
const duration = this.estimateDuration(audioBuffer, options?.sourceSampleRate ?? 44100);
if (duration < voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC) {
throw new Error(
`Audio too short: ${duration.toFixed(1)}s < ${voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC}s minimum`
);
}
if (duration > voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC) {
throw new Error(
`Audio too long: ${duration.toFixed(1)}s > ${voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC}s maximum`
);
}
// TODO: Integrate with Python librosa/torchaudio for actual preprocessing
// For MVP, return original buffer with target metadata
return {
buffer: audioBuffer,
metadata: {
sampleRate: audioPreprocessingConfig.sampleRate,
channels: audioPreprocessingConfig.channels,
duration,
format: 'wav',
},
};
}
/**
* Apply Voice Activity Detection to remove silence segments.
*/
async applyVAD(buffer: Buffer): Promise<Buffer> {
// TODO: Integrate with Python webrtcvad or silero-vad
// For MVP, return original buffer
return buffer;
}
/**
* Estimate audio duration from buffer size and sample rate.
*/
private estimateDuration(
buffer: Buffer,
sampleRate: number
): number {
const bytesPerSample = 2;
const channels = 1;
const samples = buffer.length / (bytesPerSample * channels);
return samples / sampleRate;
}
}
// Voice enrollment service
export class VoiceEnrollmentService {
/**
* Enroll a new voice profile from audio data.
*/
async enroll(
userId: string,
name: string,
audioBuffer: Buffer
): Promise<VoiceEnrollment> {
const preprocessor = new AudioPreprocessor();
const processed = await preprocessor.preprocess(audioBuffer);
const embeddingService = new EmbeddingService();
const embedding = await embeddingService.extract(processed.buffer);
const voiceHash = this.computeEmbeddingHash(embedding);
const enrollment = await prisma.voiceEnrollment.create({
data: {
userId,
name,
voiceHash,
audioMetadata: {
...processed.metadata,
embeddingDimensions: embedding.length,
enrollmentTimestamp: new Date().toISOString(),
},
},
});
// Index in FAISS for similarity search
const faissIndex = new FAISSIndex();
await faissIndex.add(enrollment.id, embedding);
return enrollment;
}
/**
* List all enrollments for a user.
*/
async listEnrollments(
userId: string,
options?: {
isActive?: boolean;
limit?: number;
offset?: number;
}
): Promise<VoiceEnrollment[]> {
return prisma.voiceEnrollment.findMany({
where: {
userId,
...(options?.isActive !== undefined && { isActive: options.isActive }),
},
orderBy: { createdAt: 'desc' },
take: options?.limit ?? 50,
skip: options?.offset ?? 0,
});
}
/**
* Get a single enrollment by ID.
*/
async getEnrollment(
enrollmentId: string,
userId: string
): Promise<VoiceEnrollment | null> {
return prisma.voiceEnrollment.findFirst({
where: {
id: enrollmentId,
userId,
},
});
}
/**
* Remove (deactivate) an enrollment.
*/
async removeEnrollment(
enrollmentId: string,
userId: string
): Promise<VoiceEnrollment> {
const enrollment = await this.getEnrollment(enrollmentId, userId);
if (!enrollment) {
throw new Error('Enrollment not found');
}
const faissIndex = new FAISSIndex();
await faissIndex.remove(enrollmentId);
return prisma.voiceEnrollment.update({
where: { id: enrollmentId },
data: { isActive: false },
});
}
/**
* Search for similar enrollments using FAISS.
*/
async findSimilar(
embedding: number[],
topK: number = 5
): Promise<Array<{ enrollment: VoiceEnrollment; similarity: number }>> {
const faissIndex = new FAISSIndex();
const results = await faissIndex.search(embedding, topK);
const enrollmentIds = results.map((r) => r.id);
const enrollments = await prisma.voiceEnrollment.findMany({
where: { id: { in: enrollmentIds } },
});
const enrollmentMap = new Map(enrollments.map((e) => [e.id, e]));
return results
.map((r) => ({
enrollment: enrollmentMap.get(r.id),
similarity: r.similarity,
}))
.filter((r): r is { enrollment: VoiceEnrollment; similarity: number } => r.enrollment !== undefined);
}
private computeEmbeddingHash(embedding: number[]): string {
const content = embedding.map((v) => v.toFixed(6)).join(',');
return `vp_${createHash('sha256').update(content).digest('hex').slice(0, 16)}_${embedding.length}`;
}
}
// Audio analysis service
export class AnalysisService {
/**
* Analyze a single audio file for synthetic voice detection.
*/
async analyze(
userId: string,
audioBuffer: Buffer,
options?: {
enrollmentId?: string;
audioUrl?: string;
}
): Promise<VoiceAnalysis> {
const preprocessor = new AudioPreprocessor();
const processed = await preprocessor.preprocess(audioBuffer);
const audioHash = this.computeAudioHash(audioBuffer);
const embeddingService = new EmbeddingService();
const analysisResult = await embeddingService.analyze(processed.buffer);
const isSynthetic = analysisResult.confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD;
const voiceAnalysis = await prisma.voiceAnalysis.create({
data: {
userId,
enrollmentId: options?.enrollmentId,
audioHash,
isSynthetic,
confidence: analysisResult.confidence,
analysisResult: {
...analysisResult,
processedMetadata: processed.metadata,
analysisTimestamp: new Date().toISOString(),
modelVersion: 'ecapa-tdnn-v1-mock',
},
audioUrl: options?.audioUrl ?? '',
},
});
return voiceAnalysis;
}
/**
* Get analysis result by ID.
*/
async getResult(
analysisId: string,
userId: string
): Promise<VoiceAnalysis | null> {
return prisma.voiceAnalysis.findFirst({
where: {
id: analysisId,
userId,
},
});
}
/**
* Get analysis history for a user.
*/
async getHistory(
userId: string,
options?: {
limit?: number;
offset?: number;
isSynthetic?: boolean;
}
): Promise<VoiceAnalysis[]> {
return prisma.voiceAnalysis.findMany({
where: {
userId,
...(options?.isSynthetic !== undefined && { isSynthetic: options.isSynthetic }),
},
orderBy: { createdAt: 'desc' },
take: options?.limit ?? 50,
skip: options?.offset ?? 0,
});
}
private computeAudioHash(buffer: Buffer): string {
return `audio_${createHash('sha256').update(buffer).digest('hex').slice(0, 16)}`;
}
}
// Batch analysis service
export class BatchAnalysisService {
private readonly maxConcurrency = 5;
/**
* Analyze multiple audio files in a batch with parallel processing.
* Uses Promise.allSettled with concurrency control for better performance.
*/
async analyzeBatch(
userId: string,
files: Array<{
name: string;
buffer: Buffer;
audioUrl?: string;
}>,
options?: {
enrollmentId?: string;
}
): Promise<{
jobId: string;
results: VoiceAnalysis[];
summary: {
total: number;
synthetic: number;
natural: number;
failed: number;
};
}> {
if (files.length > voicePrintEnv.BATCH_MAX_FILES) {
throw new Error(
`Batch too large: ${files.length} > ${voicePrintEnv.BATCH_MAX_FILES} max`
);
}
const jobId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
logger.info('Starting batch analysis', {
jobId,
userId,
totalFiles: files.length,
enrollmentId: options?.enrollmentId
});
const analysisService = new AnalysisService();
const results: VoiceAnalysis[] = [];
const errors: Array<{ name: string; error: string }> = [];
let synthetic = 0;
let natural = 0;
// Process with concurrency control using chunked Promise.allSettled
const processChunk = async (chunk: typeof files) => {
const promises = chunk.map(async (file) => {
try {
const result = await analysisService.analyze(userId, file.buffer, {
enrollmentId: options?.enrollmentId,
audioUrl: file.audioUrl,
});
return { success: true as const, result, name: file.name };
} catch (error) {
const message = error instanceof Error ? error.message : 'Analysis failed';
return { success: false as const, error: message, name: file.name };
}
});
const outcomes = await Promise.allSettled(promises);
for (const outcome of outcomes) {
if (outcome.status === 'fulfilled') {
if (outcome.value.success && outcome.value.result) {
results.push(outcome.value.result);
if (outcome.value.result.isSynthetic) {
synthetic++;
} else {
natural++;
}
} else if (!outcome.value.success) {
errors.push({ name: outcome.value.name, error: outcome.value.error });
}
}
}
};
// Process files in chunks for concurrency control
for (let i = 0; i < files.length; i += this.maxConcurrency) {
const chunk = files.slice(i, i + this.maxConcurrency);
await processChunk(chunk);
}
const failed = errors.length;
// TODO: P3-2 - Persist batch jobId to database once schema is fixed
// Schema errors need to be resolved first (AnalysisJob relation issues)
logger.info('Batch analysis completed', {
jobId,
successfulResults: results.length,
failedCount: failed,
synthetic,
natural
});
return {
jobId,
results,
summary: {
total: files.length,
synthetic,
natural,
failed,
},
};
}
}
// Re-export improved modular implementations
export { EmbeddingService } from './embedding.service';
export { FAISSIndex } from './faiss.index';
// Export singleton instances for backwards compatibility
export const audioPreprocessor = new AudioPreprocessor();
export const voiceEnrollmentService = new VoiceEnrollmentService();
export const analysisService = new AnalysisService();
export const batchAnalysisService = new BatchAnalysisService();
export const embeddingService = new EmbeddingService();