- P2-1: Extract duplicate mock ML logic to modular embedding.service.ts / faiss.index.ts - P2-2: Weak hashes already fixed via SHA-256 (FRE-5002) - P2-3: Parallel batch processing with chunked Promise.allSettled - P2-4: Consistent DI pattern via modular imports - P2-5: Structured logging via ConsoleLogger - P3-2: Batch jobId computed/logged, persistence blocked on schema Approved by CTO review (FRE-5338) Co-Authored-By: Paperclip <noreply@paperclip.ing>
422 lines
12 KiB
TypeScript
422 lines
12 KiB
TypeScript
import { createHash } from 'crypto';
|
|
import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db';
|
|
import {
|
|
voicePrintEnv,
|
|
AnalysisJobStatus,
|
|
DetectionType,
|
|
audioPreprocessingConfig,
|
|
voicePrintFeatureFlags,
|
|
} from './voiceprint.config';
|
|
import { checkFlag } from './voiceprint.feature-flags';
|
|
import { logger } from './logger';
|
|
import { EmbeddingService as ModularEmbeddingService } from './embedding.service';
|
|
import { FAISSIndex as ModularFAISSIndex } from './faiss.index';
|
|
|
|
// Alias for backwards compatibility
|
|
const EmbeddingService = ModularEmbeddingService;
|
|
const FAISSIndex = ModularFAISSIndex;
|
|
|
|
// Audio preprocessing service
|
|
export class AudioPreprocessor {
|
|
/**
|
|
* Normalize audio to 16kHz mono with VAD and noise reduction.
|
|
* Returns preprocessing metadata and the processed audio buffer.
|
|
*/
|
|
async preprocess(
|
|
audioBuffer: Buffer,
|
|
options?: {
|
|
sourceSampleRate?: number;
|
|
channels?: number;
|
|
}
|
|
): Promise<{
|
|
buffer: Buffer;
|
|
metadata: {
|
|
sampleRate: number;
|
|
channels: number;
|
|
duration: number;
|
|
format: string;
|
|
};
|
|
}> {
|
|
const duration = this.estimateDuration(audioBuffer, options?.sourceSampleRate ?? 44100);
|
|
|
|
if (duration < voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC) {
|
|
throw new Error(
|
|
`Audio too short: ${duration.toFixed(1)}s < ${voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC}s minimum`
|
|
);
|
|
}
|
|
|
|
if (duration > voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC) {
|
|
throw new Error(
|
|
`Audio too long: ${duration.toFixed(1)}s > ${voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC}s maximum`
|
|
);
|
|
}
|
|
|
|
// TODO: Integrate with Python librosa/torchaudio for actual preprocessing
|
|
// For MVP, return original buffer with target metadata
|
|
return {
|
|
buffer: audioBuffer,
|
|
metadata: {
|
|
sampleRate: audioPreprocessingConfig.sampleRate,
|
|
channels: audioPreprocessingConfig.channels,
|
|
duration,
|
|
format: 'wav',
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Apply Voice Activity Detection to remove silence segments.
|
|
*/
|
|
async applyVAD(buffer: Buffer): Promise<Buffer> {
|
|
// TODO: Integrate with Python webrtcvad or silero-vad
|
|
// For MVP, return original buffer
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Estimate audio duration from buffer size and sample rate.
|
|
*/
|
|
private estimateDuration(
|
|
buffer: Buffer,
|
|
sampleRate: number
|
|
): number {
|
|
const bytesPerSample = 2;
|
|
const channels = 1;
|
|
const samples = buffer.length / (bytesPerSample * channels);
|
|
return samples / sampleRate;
|
|
}
|
|
}
|
|
|
|
// Voice enrollment service
|
|
export class VoiceEnrollmentService {
|
|
/**
|
|
* Enroll a new voice profile from audio data.
|
|
*/
|
|
async enroll(
|
|
userId: string,
|
|
name: string,
|
|
audioBuffer: Buffer
|
|
): Promise<VoiceEnrollment> {
|
|
const preprocessor = new AudioPreprocessor();
|
|
const processed = await preprocessor.preprocess(audioBuffer);
|
|
|
|
const embeddingService = new EmbeddingService();
|
|
const embedding = await embeddingService.extract(processed.buffer);
|
|
const voiceHash = this.computeEmbeddingHash(embedding);
|
|
|
|
const enrollment = await prisma.voiceEnrollment.create({
|
|
data: {
|
|
userId,
|
|
name,
|
|
voiceHash,
|
|
audioMetadata: {
|
|
...processed.metadata,
|
|
embeddingDimensions: embedding.length,
|
|
enrollmentTimestamp: new Date().toISOString(),
|
|
},
|
|
},
|
|
});
|
|
|
|
// Index in FAISS for similarity search
|
|
const faissIndex = new FAISSIndex();
|
|
await faissIndex.add(enrollment.id, embedding);
|
|
|
|
return enrollment;
|
|
}
|
|
|
|
/**
|
|
* List all enrollments for a user.
|
|
*/
|
|
async listEnrollments(
|
|
userId: string,
|
|
options?: {
|
|
isActive?: boolean;
|
|
limit?: number;
|
|
offset?: number;
|
|
}
|
|
): Promise<VoiceEnrollment[]> {
|
|
return prisma.voiceEnrollment.findMany({
|
|
where: {
|
|
userId,
|
|
...(options?.isActive !== undefined && { isActive: options.isActive }),
|
|
},
|
|
orderBy: { createdAt: 'desc' },
|
|
take: options?.limit ?? 50,
|
|
skip: options?.offset ?? 0,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get a single enrollment by ID.
|
|
*/
|
|
async getEnrollment(
|
|
enrollmentId: string,
|
|
userId: string
|
|
): Promise<VoiceEnrollment | null> {
|
|
return prisma.voiceEnrollment.findFirst({
|
|
where: {
|
|
id: enrollmentId,
|
|
userId,
|
|
},
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Remove (deactivate) an enrollment.
|
|
*/
|
|
async removeEnrollment(
|
|
enrollmentId: string,
|
|
userId: string
|
|
): Promise<VoiceEnrollment> {
|
|
const enrollment = await this.getEnrollment(enrollmentId, userId);
|
|
if (!enrollment) {
|
|
throw new Error('Enrollment not found');
|
|
}
|
|
|
|
const faissIndex = new FAISSIndex();
|
|
await faissIndex.remove(enrollmentId);
|
|
|
|
return prisma.voiceEnrollment.update({
|
|
where: { id: enrollmentId },
|
|
data: { isActive: false },
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Search for similar enrollments using FAISS.
|
|
*/
|
|
async findSimilar(
|
|
embedding: number[],
|
|
topK: number = 5
|
|
): Promise<Array<{ enrollment: VoiceEnrollment; similarity: number }>> {
|
|
const faissIndex = new FAISSIndex();
|
|
const results = await faissIndex.search(embedding, topK);
|
|
|
|
const enrollmentIds = results.map((r) => r.id);
|
|
const enrollments = await prisma.voiceEnrollment.findMany({
|
|
where: { id: { in: enrollmentIds } },
|
|
});
|
|
const enrollmentMap = new Map(enrollments.map((e) => [e.id, e]));
|
|
|
|
return results
|
|
.map((r) => ({
|
|
enrollment: enrollmentMap.get(r.id),
|
|
similarity: r.similarity,
|
|
}))
|
|
.filter((r): r is { enrollment: VoiceEnrollment; similarity: number } => r.enrollment !== undefined);
|
|
}
|
|
|
|
private computeEmbeddingHash(embedding: number[]): string {
|
|
const content = embedding.map((v) => v.toFixed(6)).join(',');
|
|
return `vp_${createHash('sha256').update(content).digest('hex').slice(0, 16)}_${embedding.length}`;
|
|
}
|
|
}
|
|
|
|
// Audio analysis service
|
|
export class AnalysisService {
|
|
/**
|
|
* Analyze a single audio file for synthetic voice detection.
|
|
*/
|
|
async analyze(
|
|
userId: string,
|
|
audioBuffer: Buffer,
|
|
options?: {
|
|
enrollmentId?: string;
|
|
audioUrl?: string;
|
|
}
|
|
): Promise<VoiceAnalysis> {
|
|
const preprocessor = new AudioPreprocessor();
|
|
const processed = await preprocessor.preprocess(audioBuffer);
|
|
|
|
const audioHash = this.computeAudioHash(audioBuffer);
|
|
|
|
const embeddingService = new EmbeddingService();
|
|
const analysisResult = await embeddingService.analyze(processed.buffer);
|
|
|
|
const isSynthetic = analysisResult.confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD;
|
|
|
|
const voiceAnalysis = await prisma.voiceAnalysis.create({
|
|
data: {
|
|
userId,
|
|
enrollmentId: options?.enrollmentId,
|
|
audioHash,
|
|
isSynthetic,
|
|
confidence: analysisResult.confidence,
|
|
analysisResult: {
|
|
...analysisResult,
|
|
processedMetadata: processed.metadata,
|
|
analysisTimestamp: new Date().toISOString(),
|
|
modelVersion: 'ecapa-tdnn-v1-mock',
|
|
},
|
|
audioUrl: options?.audioUrl ?? '',
|
|
},
|
|
});
|
|
|
|
return voiceAnalysis;
|
|
}
|
|
|
|
/**
|
|
* Get analysis result by ID.
|
|
*/
|
|
async getResult(
|
|
analysisId: string,
|
|
userId: string
|
|
): Promise<VoiceAnalysis | null> {
|
|
return prisma.voiceAnalysis.findFirst({
|
|
where: {
|
|
id: analysisId,
|
|
userId,
|
|
},
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get analysis history for a user.
|
|
*/
|
|
async getHistory(
|
|
userId: string,
|
|
options?: {
|
|
limit?: number;
|
|
offset?: number;
|
|
isSynthetic?: boolean;
|
|
}
|
|
): Promise<VoiceAnalysis[]> {
|
|
return prisma.voiceAnalysis.findMany({
|
|
where: {
|
|
userId,
|
|
...(options?.isSynthetic !== undefined && { isSynthetic: options.isSynthetic }),
|
|
},
|
|
orderBy: { createdAt: 'desc' },
|
|
take: options?.limit ?? 50,
|
|
skip: options?.offset ?? 0,
|
|
});
|
|
}
|
|
|
|
private computeAudioHash(buffer: Buffer): string {
|
|
return `audio_${createHash('sha256').update(buffer).digest('hex').slice(0, 16)}`;
|
|
}
|
|
}
|
|
|
|
// Batch analysis service
|
|
export class BatchAnalysisService {
|
|
private readonly maxConcurrency = 5;
|
|
|
|
/**
|
|
* Analyze multiple audio files in a batch with parallel processing.
|
|
* Uses Promise.allSettled with concurrency control for better performance.
|
|
*/
|
|
async analyzeBatch(
|
|
userId: string,
|
|
files: Array<{
|
|
name: string;
|
|
buffer: Buffer;
|
|
audioUrl?: string;
|
|
}>,
|
|
options?: {
|
|
enrollmentId?: string;
|
|
}
|
|
): Promise<{
|
|
jobId: string;
|
|
results: VoiceAnalysis[];
|
|
summary: {
|
|
total: number;
|
|
synthetic: number;
|
|
natural: number;
|
|
failed: number;
|
|
};
|
|
}> {
|
|
if (files.length > voicePrintEnv.BATCH_MAX_FILES) {
|
|
throw new Error(
|
|
`Batch too large: ${files.length} > ${voicePrintEnv.BATCH_MAX_FILES} max`
|
|
);
|
|
}
|
|
|
|
const jobId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
logger.info('Starting batch analysis', {
|
|
jobId,
|
|
userId,
|
|
totalFiles: files.length,
|
|
enrollmentId: options?.enrollmentId
|
|
});
|
|
|
|
const analysisService = new AnalysisService();
|
|
const results: VoiceAnalysis[] = [];
|
|
const errors: Array<{ name: string; error: string }> = [];
|
|
let synthetic = 0;
|
|
let natural = 0;
|
|
|
|
// Process with concurrency control using chunked Promise.allSettled
|
|
const processChunk = async (chunk: typeof files) => {
|
|
const promises = chunk.map(async (file) => {
|
|
try {
|
|
const result = await analysisService.analyze(userId, file.buffer, {
|
|
enrollmentId: options?.enrollmentId,
|
|
audioUrl: file.audioUrl,
|
|
});
|
|
return { success: true as const, result, name: file.name };
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Analysis failed';
|
|
return { success: false as const, error: message, name: file.name };
|
|
}
|
|
});
|
|
|
|
const outcomes = await Promise.allSettled(promises);
|
|
|
|
for (const outcome of outcomes) {
|
|
if (outcome.status === 'fulfilled') {
|
|
if (outcome.value.success && outcome.value.result) {
|
|
results.push(outcome.value.result);
|
|
if (outcome.value.result.isSynthetic) {
|
|
synthetic++;
|
|
} else {
|
|
natural++;
|
|
}
|
|
} else if (!outcome.value.success) {
|
|
errors.push({ name: outcome.value.name, error: outcome.value.error });
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// Process files in chunks for concurrency control
|
|
for (let i = 0; i < files.length; i += this.maxConcurrency) {
|
|
const chunk = files.slice(i, i + this.maxConcurrency);
|
|
await processChunk(chunk);
|
|
}
|
|
|
|
const failed = errors.length;
|
|
|
|
// TODO: P3-2 - Persist batch jobId to database once schema is fixed
|
|
// Schema errors need to be resolved first (AnalysisJob relation issues)
|
|
logger.info('Batch analysis completed', {
|
|
jobId,
|
|
successfulResults: results.length,
|
|
failedCount: failed,
|
|
synthetic,
|
|
natural
|
|
});
|
|
|
|
return {
|
|
jobId,
|
|
results,
|
|
summary: {
|
|
total: files.length,
|
|
synthetic,
|
|
natural,
|
|
failed,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
// Re-export improved modular implementations
|
|
export { EmbeddingService } from './embedding.service';
|
|
export { FAISSIndex } from './faiss.index';
|
|
|
|
// Export singleton instances for backwards compatibility
|
|
export const audioPreprocessor = new AudioPreprocessor();
|
|
export const voiceEnrollmentService = new VoiceEnrollmentService();
|
|
export const analysisService = new AnalysisService();
|
|
export const batchAnalysisService = new BatchAnalysisService();
|
|
export const embeddingService = new EmbeddingService();
|