import { createHash } from 'crypto'; import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db'; import { voicePrintEnv, AnalysisJobStatus, DetectionType, audioPreprocessingConfig, voicePrintFeatureFlags, } from './voiceprint.config'; import { checkFlag } from './voiceprint.feature-flags'; import { logger } from './logger'; import { EmbeddingService as ModularEmbeddingService } from './embedding.service'; import { FAISSIndex as ModularFAISSIndex } from './faiss.index'; // Alias for backwards compatibility const EmbeddingService = ModularEmbeddingService; const FAISSIndex = ModularFAISSIndex; // Audio preprocessing service export class AudioPreprocessor { /** * Normalize audio to 16kHz mono with VAD and noise reduction. * Returns preprocessing metadata and the processed audio buffer. */ async preprocess( audioBuffer: Buffer, options?: { sourceSampleRate?: number; channels?: number; } ): Promise<{ buffer: Buffer; metadata: { sampleRate: number; channels: number; duration: number; format: string; }; }> { const duration = this.estimateDuration(audioBuffer, options?.sourceSampleRate ?? 44100); if (duration < voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC) { throw new Error( `Audio too short: ${duration.toFixed(1)}s < ${voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC}s minimum` ); } if (duration > voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC) { throw new Error( `Audio too long: ${duration.toFixed(1)}s > ${voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC}s maximum` ); } // TODO: Integrate with Python librosa/torchaudio for actual preprocessing // For MVP, return original buffer with target metadata return { buffer: audioBuffer, metadata: { sampleRate: audioPreprocessingConfig.sampleRate, channels: audioPreprocessingConfig.channels, duration, format: 'wav', }, }; } /** * Apply Voice Activity Detection to remove silence segments. */ async applyVAD(buffer: Buffer): Promise { // TODO: Integrate with Python webrtcvad or silero-vad // For MVP, return original buffer return buffer; } /** * Estimate audio duration from buffer size and sample rate. */ private estimateDuration( buffer: Buffer, sampleRate: number ): number { const bytesPerSample = 2; const channels = 1; const samples = buffer.length / (bytesPerSample * channels); return samples / sampleRate; } } // Voice enrollment service export class VoiceEnrollmentService { /** * Enroll a new voice profile from audio data. */ async enroll( userId: string, name: string, audioBuffer: Buffer ): Promise { const preprocessor = new AudioPreprocessor(); const processed = await preprocessor.preprocess(audioBuffer); const embeddingService = new EmbeddingService(); const embedding = await embeddingService.extract(processed.buffer); const voiceHash = this.computeEmbeddingHash(embedding); const enrollment = await prisma.voiceEnrollment.create({ data: { userId, name, voiceHash, audioMetadata: { ...processed.metadata, embeddingDimensions: embedding.length, enrollmentTimestamp: new Date().toISOString(), }, }, }); // Index in FAISS for similarity search const faissIndex = new FAISSIndex(); await faissIndex.add(enrollment.id, embedding); return enrollment; } /** * List all enrollments for a user. */ async listEnrollments( userId: string, options?: { isActive?: boolean; limit?: number; offset?: number; } ): Promise { return prisma.voiceEnrollment.findMany({ where: { userId, ...(options?.isActive !== undefined && { isActive: options.isActive }), }, orderBy: { createdAt: 'desc' }, take: options?.limit ?? 50, skip: options?.offset ?? 0, }); } /** * Get a single enrollment by ID. */ async getEnrollment( enrollmentId: string, userId: string ): Promise { return prisma.voiceEnrollment.findFirst({ where: { id: enrollmentId, userId, }, }); } /** * Remove (deactivate) an enrollment. */ async removeEnrollment( enrollmentId: string, userId: string ): Promise { const enrollment = await this.getEnrollment(enrollmentId, userId); if (!enrollment) { throw new Error('Enrollment not found'); } const faissIndex = new FAISSIndex(); await faissIndex.remove(enrollmentId); return prisma.voiceEnrollment.update({ where: { id: enrollmentId }, data: { isActive: false }, }); } /** * Search for similar enrollments using FAISS. */ async findSimilar( embedding: number[], topK: number = 5 ): Promise> { const faissIndex = new FAISSIndex(); const results = await faissIndex.search(embedding, topK); const enrollmentIds = results.map((r) => r.id); const enrollments = await prisma.voiceEnrollment.findMany({ where: { id: { in: enrollmentIds } }, }); const enrollmentMap = new Map(enrollments.map((e) => [e.id, e])); return results .map((r) => ({ enrollment: enrollmentMap.get(r.id), similarity: r.similarity, })) .filter((r): r is { enrollment: VoiceEnrollment; similarity: number } => r.enrollment !== undefined); } private computeEmbeddingHash(embedding: number[]): string { const content = embedding.map((v) => v.toFixed(6)).join(','); return `vp_${createHash('sha256').update(content).digest('hex').slice(0, 16)}_${embedding.length}`; } } // Audio analysis service export class AnalysisService { /** * Analyze a single audio file for synthetic voice detection. */ async analyze( userId: string, audioBuffer: Buffer, options?: { enrollmentId?: string; audioUrl?: string; } ): Promise { const preprocessor = new AudioPreprocessor(); const processed = await preprocessor.preprocess(audioBuffer); const audioHash = this.computeAudioHash(audioBuffer); const embeddingService = new EmbeddingService(); const analysisResult = await embeddingService.analyze(processed.buffer); const isSynthetic = analysisResult.confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD; const voiceAnalysis = await prisma.voiceAnalysis.create({ data: { userId, enrollmentId: options?.enrollmentId, audioHash, isSynthetic, confidence: analysisResult.confidence, analysisResult: { ...analysisResult, processedMetadata: processed.metadata, analysisTimestamp: new Date().toISOString(), modelVersion: 'ecapa-tdnn-v1-mock', }, audioUrl: options?.audioUrl ?? '', }, }); return voiceAnalysis; } /** * Get analysis result by ID. */ async getResult( analysisId: string, userId: string ): Promise { return prisma.voiceAnalysis.findFirst({ where: { id: analysisId, userId, }, }); } /** * Get analysis history for a user. */ async getHistory( userId: string, options?: { limit?: number; offset?: number; isSynthetic?: boolean; } ): Promise { return prisma.voiceAnalysis.findMany({ where: { userId, ...(options?.isSynthetic !== undefined && { isSynthetic: options.isSynthetic }), }, orderBy: { createdAt: 'desc' }, take: options?.limit ?? 50, skip: options?.offset ?? 0, }); } private computeAudioHash(buffer: Buffer): string { return `audio_${createHash('sha256').update(buffer).digest('hex').slice(0, 16)}`; } } // Batch analysis service export class BatchAnalysisService { private readonly maxConcurrency = 5; /** * Analyze multiple audio files in a batch with parallel processing. * Uses Promise.allSettled with concurrency control for better performance. */ async analyzeBatch( userId: string, files: Array<{ name: string; buffer: Buffer; audioUrl?: string; }>, options?: { enrollmentId?: string; } ): Promise<{ jobId: string; results: VoiceAnalysis[]; summary: { total: number; synthetic: number; natural: number; failed: number; }; }> { if (files.length > voicePrintEnv.BATCH_MAX_FILES) { throw new Error( `Batch too large: ${files.length} > ${voicePrintEnv.BATCH_MAX_FILES} max` ); } const jobId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; logger.info('Starting batch analysis', { jobId, userId, totalFiles: files.length, enrollmentId: options?.enrollmentId }); const analysisService = new AnalysisService(); const results: VoiceAnalysis[] = []; const errors: Array<{ name: string; error: string }> = []; let synthetic = 0; let natural = 0; // Process with concurrency control using chunked Promise.allSettled const processChunk = async (chunk: typeof files) => { const promises = chunk.map(async (file) => { try { const result = await analysisService.analyze(userId, file.buffer, { enrollmentId: options?.enrollmentId, audioUrl: file.audioUrl, }); return { success: true as const, result, name: file.name }; } catch (error) { const message = error instanceof Error ? error.message : 'Analysis failed'; return { success: false as const, error: message, name: file.name }; } }); const outcomes = await Promise.allSettled(promises); for (const outcome of outcomes) { if (outcome.status === 'fulfilled') { if (outcome.value.success && outcome.value.result) { results.push(outcome.value.result); if (outcome.value.result.isSynthetic) { synthetic++; } else { natural++; } } else if (!outcome.value.success) { errors.push({ name: outcome.value.name, error: outcome.value.error }); } } } }; // Process files in chunks for concurrency control for (let i = 0; i < files.length; i += this.maxConcurrency) { const chunk = files.slice(i, i + this.maxConcurrency); await processChunk(chunk); } const failed = errors.length; // TODO: P3-2 - Persist batch jobId to database once schema is fixed // Schema errors need to be resolved first (AnalysisJob relation issues) logger.info('Batch analysis completed', { jobId, successfulResults: results.length, failedCount: failed, synthetic, natural }); return { jobId, results, summary: { total: files.length, synthetic, natural, failed, }, }; } } // Re-export improved modular implementations export { EmbeddingService } from './embedding.service'; export { FAISSIndex } from './faiss.index'; // Export singleton instances for backwards compatibility export const audioPreprocessor = new AudioPreprocessor(); export const voiceEnrollmentService = new VoiceEnrollmentService(); export const analysisService = new AnalysisService(); export const batchAnalysisService = new BatchAnalysisService(); export const embeddingService = new EmbeddingService();