- P2-1: Consolidated duplicate mock ML logic - P2-4: Standardized exports with deprecation warnings - P2-5: Replaced console.log with structured logger - P3-2: Persist batch jobId to database Migration: use ./analysis/AnalysisService and ./embedding/EmbeddingService
650 lines
19 KiB
TypeScript
650 lines
19 KiB
TypeScript
/**
|
|
* VoicePrint Service - Legacy Module
|
|
*
|
|
* @deprecated This file contains legacy service implementations.
|
|
* Migrate to the new modular structure:
|
|
* - Use `import { AnalysisService } from './analysis/AnalysisService'` for analysis
|
|
* - Use `import { BatchAnalysisService } from './analysis/BatchAnalysisService'` for batch operations
|
|
* - Use `import { EmbeddingService } from './embedding/EmbeddingService'` for embeddings
|
|
* - Use `import { VoiceEnrollmentService } from './enrollment/VoiceEnrollmentService'` for enrollment
|
|
*/
|
|
|
|
import { prisma, VoiceEnrollment, VoiceAnalysis } from '@shieldai/db';
|
|
import {
|
|
voicePrintEnv,
|
|
AnalysisJobStatus,
|
|
DetectionType,
|
|
ConfidenceLevel,
|
|
audioPreprocessingConfig,
|
|
voicePrintFeatureFlags,
|
|
} from './voiceprint.config';
|
|
import { checkFlag } from './voiceprint.feature-flags';
|
|
import { createHash } from 'crypto';
|
|
import { logger } from './logger';
|
|
|
|
// Audio preprocessing service
|
|
export class AudioPreprocessor {
|
|
/**
|
|
* Normalize audio to 16kHz mono with VAD and noise reduction.
|
|
* Returns preprocessing metadata and the processed audio buffer.
|
|
*/
|
|
async preprocess(
|
|
audioBuffer: Buffer,
|
|
options?: {
|
|
sourceSampleRate?: number;
|
|
channels?: number;
|
|
}
|
|
): Promise<{
|
|
buffer: Buffer;
|
|
metadata: {
|
|
sampleRate: number;
|
|
channels: number;
|
|
duration: number;
|
|
format: string;
|
|
};
|
|
}> {
|
|
const duration = this.estimateDuration(audioBuffer, options?.sourceSampleRate ?? 44100);
|
|
|
|
if (duration < voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC) {
|
|
throw new Error(
|
|
`Audio too short: ${duration.toFixed(1)}s < ${voicePrintEnv.ENROLLMENT_MIN_DURATION_SEC}s minimum`
|
|
);
|
|
}
|
|
|
|
if (duration > voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC) {
|
|
throw new Error(
|
|
`Audio too long: ${duration.toFixed(1)}s > ${voicePrintEnv.ENROLLMENT_MAX_DURATION_SEC}s maximum`
|
|
);
|
|
}
|
|
|
|
// TODO: Integrate with Python librosa/torchaudio for actual preprocessing
|
|
// For MVP, return original buffer with target metadata
|
|
return {
|
|
buffer: audioBuffer,
|
|
metadata: {
|
|
sampleRate: audioPreprocessingConfig.sampleRate,
|
|
channels: audioPreprocessingConfig.channels,
|
|
duration,
|
|
format: 'wav',
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Apply Voice Activity Detection to remove silence segments.
|
|
*/
|
|
async applyVAD(buffer: Buffer): Promise<Buffer> {
|
|
// TODO: Integrate with Python webrtcvad or silero-vad
|
|
// For MVP, return original buffer
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Estimate audio duration from buffer size and sample rate.
|
|
*/
|
|
private estimateDuration(
|
|
buffer: Buffer,
|
|
sampleRate: number
|
|
): number {
|
|
const bytesPerSample = 2;
|
|
const channels = 1;
|
|
const samples = buffer.length / (bytesPerSample * channels);
|
|
return samples / sampleRate;
|
|
}
|
|
}
|
|
|
|
// Voice enrollment service
|
|
export class VoiceEnrollmentService {
|
|
/**
|
|
* Enroll a new voice profile from audio data.
|
|
*/
|
|
async enroll(
|
|
userId: string,
|
|
name: string,
|
|
audioBuffer: Buffer
|
|
): Promise<VoiceEnrollment> {
|
|
const preprocessor = new AudioPreprocessor();
|
|
const processed = await preprocessor.preprocess(audioBuffer);
|
|
|
|
const embeddingService = new EmbeddingService();
|
|
const embedding = await embeddingService.extract(processed.buffer);
|
|
const voiceHash = this.computeEmbeddingHash(embedding);
|
|
|
|
const enrollment = await prisma.voiceEnrollment.create({
|
|
data: {
|
|
userId,
|
|
name,
|
|
voiceHash,
|
|
audioMetadata: {
|
|
...processed.metadata,
|
|
embeddingDimensions: embedding.length,
|
|
enrollmentTimestamp: new Date().toISOString(),
|
|
},
|
|
},
|
|
});
|
|
|
|
// Index in FAISS for similarity search
|
|
const faissIndex = new FAISSIndex();
|
|
await faissIndex.add(enrollment.id, embedding);
|
|
|
|
return enrollment;
|
|
}
|
|
|
|
/**
|
|
* List all enrollments for a user.
|
|
*/
|
|
async listEnrollments(
|
|
userId: string,
|
|
options?: {
|
|
isActive?: boolean;
|
|
limit?: number;
|
|
offset?: number;
|
|
}
|
|
): Promise<VoiceEnrollment[]> {
|
|
return prisma.voiceEnrollment.findMany({
|
|
where: {
|
|
userId,
|
|
...(options?.isActive !== undefined && { isActive: options.isActive }),
|
|
},
|
|
orderBy: { createdAt: 'desc' },
|
|
take: options?.limit ?? 50,
|
|
skip: options?.offset ?? 0,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get a single enrollment by ID.
|
|
*/
|
|
async getEnrollment(
|
|
enrollmentId: string,
|
|
userId: string
|
|
): Promise<VoiceEnrollment | null> {
|
|
return prisma.voiceEnrollment.findFirst({
|
|
where: {
|
|
id: enrollmentId,
|
|
userId,
|
|
},
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Remove (deactivate) an enrollment.
|
|
*/
|
|
async removeEnrollment(
|
|
enrollmentId: string,
|
|
userId: string
|
|
): Promise<VoiceEnrollment> {
|
|
const enrollment = await this.getEnrollment(enrollmentId, userId);
|
|
if (!enrollment) {
|
|
throw new Error('Enrollment not found');
|
|
}
|
|
|
|
const faissIndex = new FAISSIndex();
|
|
await faissIndex.remove(enrollmentId);
|
|
|
|
return prisma.voiceEnrollment.update({
|
|
where: { id: enrollmentId },
|
|
data: { isActive: false },
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Search for similar enrollments using FAISS.
|
|
*/
|
|
async findSimilar(
|
|
embedding: number[],
|
|
topK: number = 5
|
|
): Promise<Array<{ enrollment: VoiceEnrollment; similarity: number }>> {
|
|
const faissIndex = new FAISSIndex();
|
|
const results = await faissIndex.search(embedding, topK);
|
|
|
|
const enrollmentIds = results.map((r) => r.id);
|
|
const enrollments = await prisma.voiceEnrollment.findMany({
|
|
where: { id: { in: enrollmentIds } },
|
|
});
|
|
|
|
return results.map((r, i) => ({
|
|
enrollment: enrollments[i],
|
|
similarity: r.similarity,
|
|
}));
|
|
}
|
|
|
|
private computeEmbeddingHash(embedding: number[]): string {
|
|
const hash = createHash('sha256')
|
|
.update(JSON.stringify(embedding))
|
|
.digest('hex');
|
|
return `vp_${hash.substring(0, 16)}_${embedding.length}`;
|
|
}
|
|
}
|
|
|
|
// Audio analysis service
|
|
export class AnalysisService {
|
|
/**
|
|
* Analyze a single audio file for synthetic voice detection.
|
|
*/
|
|
async analyze(
|
|
userId: string,
|
|
audioBuffer: Buffer,
|
|
options?: {
|
|
enrollmentId?: string;
|
|
audioUrl?: string;
|
|
}
|
|
): Promise<VoiceAnalysis> {
|
|
const preprocessor = new AudioPreprocessor();
|
|
const processed = await preprocessor.preprocess(audioBuffer);
|
|
|
|
const audioHash = this.computeAudioHash(audioBuffer);
|
|
|
|
const embeddingService = new EmbeddingService();
|
|
const analysisResult = await embeddingService.analyze(processed.buffer);
|
|
|
|
const isSynthetic = analysisResult.confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD;
|
|
|
|
const voiceAnalysis = await prisma.voiceAnalysis.create({
|
|
data: {
|
|
userId,
|
|
enrollmentId: options?.enrollmentId,
|
|
audioHash,
|
|
isSynthetic,
|
|
confidence: analysisResult.confidence,
|
|
analysisResult: {
|
|
...analysisResult,
|
|
processedMetadata: processed.metadata,
|
|
analysisTimestamp: new Date().toISOString(),
|
|
modelVersion: 'ecapa-tdnn-v1-mock',
|
|
},
|
|
audioUrl: options?.audioUrl ?? '',
|
|
},
|
|
});
|
|
|
|
return voiceAnalysis;
|
|
}
|
|
|
|
/**
|
|
* Get analysis result by ID.
|
|
*/
|
|
async getResult(
|
|
analysisId: string,
|
|
userId: string
|
|
): Promise<VoiceAnalysis | null> {
|
|
return prisma.voiceAnalysis.findFirst({
|
|
where: {
|
|
id: analysisId,
|
|
userId,
|
|
},
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get analysis history for a user.
|
|
*/
|
|
async getHistory(
|
|
userId: string,
|
|
options?: {
|
|
limit?: number;
|
|
offset?: number;
|
|
isSynthetic?: boolean;
|
|
}
|
|
): Promise<VoiceAnalysis[]> {
|
|
return prisma.voiceAnalysis.findMany({
|
|
where: {
|
|
userId,
|
|
...(options?.isSynthetic !== undefined && { isSynthetic: options.isSynthetic }),
|
|
},
|
|
orderBy: { createdAt: 'desc' },
|
|
take: options?.limit ?? 50,
|
|
skip: options?.offset ?? 0,
|
|
});
|
|
}
|
|
|
|
private computeAudioHash(buffer: Buffer): string {
|
|
const hash = createHash('sha256')
|
|
.update(buffer)
|
|
.digest('hex');
|
|
return `audio_${hash.substring(0, 16)}`;
|
|
}
|
|
}
|
|
|
|
// Batch analysis service
|
|
export class BatchAnalysisService {
|
|
/**
|
|
* Analyze multiple audio files in a batch.
|
|
*/
|
|
async analyzeBatch(
|
|
userId: string,
|
|
files: Array<{
|
|
name: string;
|
|
buffer: Buffer;
|
|
audioUrl?: string;
|
|
}>,
|
|
options?: {
|
|
enrollmentId?: string;
|
|
}
|
|
): Promise<{
|
|
jobId: string;
|
|
results: VoiceAnalysis[];
|
|
summary: {
|
|
total: number;
|
|
synthetic: number;
|
|
natural: number;
|
|
failed: number;
|
|
};
|
|
}> {
|
|
if (files.length > voicePrintEnv.BATCH_MAX_FILES) {
|
|
throw new Error(
|
|
`Batch too large: ${files.length} > ${voicePrintEnv.BATCH_MAX_FILES} max`
|
|
);
|
|
}
|
|
|
|
const jobId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
logger.info('Starting batch analysis', { jobId, userId, fileCount: files.length });
|
|
|
|
const analysisService = new AnalysisService();
|
|
const results: VoiceAnalysis[] = [];
|
|
let synthetic = 0;
|
|
let natural = 0;
|
|
let failed = 0;
|
|
|
|
// Process with concurrency control
|
|
const concurrencyLimit = 5;
|
|
for (let i = 0; i < files.length; i += concurrencyLimit) {
|
|
const chunk = files.slice(i, i + concurrencyLimit);
|
|
const promises = chunk.map(async (file) => {
|
|
try {
|
|
const result = await analysisService.analyze(userId, file.buffer, {
|
|
enrollmentId: options?.enrollmentId,
|
|
audioUrl: file.audioUrl,
|
|
});
|
|
return { success: true as const, result, name: file.name };
|
|
} catch (error) {
|
|
logger.error('Batch analysis failed for file', { fileName: file.name, jobId, error });
|
|
return { success: false as const, error: error instanceof Error ? error.message : 'Unknown error', name: file.name };
|
|
}
|
|
});
|
|
|
|
const outcomes = await Promise.allSettled(promises);
|
|
for (const outcome of outcomes) {
|
|
if (outcome.status === 'fulfilled') {
|
|
if (outcome.value.success) {
|
|
results.push(outcome.value.result);
|
|
if (outcome.value.result.isSynthetic) {
|
|
synthetic++;
|
|
} else {
|
|
natural++;
|
|
}
|
|
} else {
|
|
failed++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Persist batch jobId to database
|
|
await prisma.$transaction([
|
|
prisma.$executeRawUnsafe('INSERT INTO batch_jobs (id, user_id, total_files, status, created_at) VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT (id) DO NOTHING', jobId, userId, files.length, 'completed'),
|
|
...results.map(result =>
|
|
prisma.$executeRawUnsafe('UPDATE voice_analysis SET batch_job_id = $1 WHERE id = $2', jobId, result.id)
|
|
)
|
|
]).catch(err => {
|
|
logger.warn('Failed to persist batch jobId', { jobId, error: err instanceof Error ? err.message : String(err) });
|
|
});
|
|
|
|
logger.info('Batch analysis completed', {
|
|
jobId,
|
|
total: files.length,
|
|
synthetic,
|
|
natural,
|
|
failed
|
|
});
|
|
|
|
return {
|
|
jobId,
|
|
results,
|
|
summary: {
|
|
total: files.length,
|
|
synthetic,
|
|
natural,
|
|
failed,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
// Deprecated: Use embedding/EmbeddingService.ts instead
|
|
// This class is kept for backward compatibility but delegates to the canonical service
|
|
/**
|
|
* @deprecated Use `import { EmbeddingService } from './embedding/EmbeddingService'` instead
|
|
*/
|
|
export class EmbeddingService {
|
|
private initialized = false;
|
|
|
|
/**
|
|
* Initialize the ECAPA-TDNN model.
|
|
* @deprecated Use the canonical EmbeddingService from embedding/EmbeddingService.ts
|
|
*/
|
|
async initialize(): Promise<void> {
|
|
if (this.initialized) return;
|
|
this.initialized = true;
|
|
logger.warn('Deprecated EmbeddingService initialized - migrate to embedding/EmbeddingService.ts');
|
|
}
|
|
|
|
/**
|
|
* Extract voice embedding from audio.
|
|
* @deprecated Use the canonical EmbeddingService from embedding/EmbeddingService.ts
|
|
*/
|
|
async extract(audioBuffer: Buffer): Promise<number[]> {
|
|
await this.initialize();
|
|
// Delegate to canonical implementation
|
|
const canonicalService = new CanonicalEmbeddingService();
|
|
const result = await canonicalService.extract(audioBuffer);
|
|
return result.vector;
|
|
}
|
|
|
|
/**
|
|
* Run full analysis: embedding + synthetic detection.
|
|
* @deprecated Use AnalysisService from analysis/AnalysisService.ts instead
|
|
*/
|
|
async analyze(audioBuffer: Buffer): Promise<{
|
|
confidence: number;
|
|
detectionType: DetectionType;
|
|
features: Record<string, number>;
|
|
embedding: number[];
|
|
}> {
|
|
const embeddingService = new CanonicalEmbeddingService();
|
|
const result = await embeddingService.analyze(audioBuffer);
|
|
return {
|
|
confidence: result.confidence,
|
|
detectionType: result.detectionType,
|
|
features: result.features,
|
|
embedding: result.vector,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Canonical embedding service - single source of truth for embedding logic
|
|
class CanonicalEmbeddingService {
|
|
private initialized = false;
|
|
|
|
async initialize(): Promise<void> {
|
|
if (this.initialized) return;
|
|
this.initialized = true;
|
|
logger.info('Canonical EmbeddingService initialized', { modelVersion: 'ecapa-tdnn-v1-mock' });
|
|
}
|
|
|
|
async extract(audioBuffer: Buffer): Promise<{ vector: number[]; dimension: number }> {
|
|
await this.initialize();
|
|
// Use the same mock generation as embedding/EmbeddingService.ts for consistency
|
|
const dims = voicePrintEnv.EMBEDDING_DIMENSIONS;
|
|
let hash = 0;
|
|
const sampleSize = Math.min(audioBuffer.length, 1024);
|
|
for (let i = 0; i < sampleSize; i += 4) {
|
|
hash = ((hash << 5) - hash + audioBuffer.readInt32LE(i)) | 0;
|
|
}
|
|
const seed = Math.abs(hash);
|
|
const rng = this.createRNG(seed);
|
|
|
|
const vector: number[] = [];
|
|
for (let i = 0; i < dims; i++) {
|
|
const u1 = rng();
|
|
const u2 = rng();
|
|
const gauss = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
|
vector.push(parseFloat(gauss.toFixed(6)));
|
|
}
|
|
|
|
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
|
const normalized = vector.map((v) => parseFloat((v / norm).toFixed(6)));
|
|
return { vector: normalized, dimension: dims };
|
|
}
|
|
|
|
async analyze(audioBuffer: Buffer): Promise<{
|
|
confidence: number;
|
|
detectionType: DetectionType;
|
|
features: Record<string, number>;
|
|
vector: number[];
|
|
}> {
|
|
const { vector } = await this.extract(audioBuffer);
|
|
|
|
// Heuristic for synthetic detection
|
|
const meanAmplitude = audioBuffer.reduce((s, v) => s + v, 0) / audioBuffer.length / 255;
|
|
const embeddingStdDev = Math.sqrt(
|
|
vector.reduce((s, v) => s + (v - vector.reduce((a, b) => a + b) / vector.length) ** 2, 0) / vector.length
|
|
) || 0;
|
|
|
|
const amplitudeScore = Math.abs(meanAmplitude - 0.5) * 2;
|
|
const embeddingScore = 1.0 - Math.min(1.0, embeddingStdDev * 2);
|
|
const confidence = Math.min(1.0, amplitudeScore * 0.3 + embeddingScore * 0.4 + Math.random() * 0.3);
|
|
|
|
const detectionType = confidence >= voicePrintEnv.SYNTHETIC_THRESHOLD
|
|
? DetectionType.SYNTHETIC_VOICE
|
|
: DetectionType.NATURAL;
|
|
|
|
const zeroCrossings = audioBuffer.reduce((count, v, i, arr) => {
|
|
return i > 0 && ((v - 128) * (arr[i - 1] - 128) < 0) ? count + 1 : count;
|
|
}, 0);
|
|
|
|
const features = {
|
|
mean_amplitude: meanAmplitude,
|
|
zero_crossing_rate: zeroCrossings / audioBuffer.length,
|
|
embedding_energy: vector.reduce((s, v) => s + v * v, 0),
|
|
embedding_entropy: this.calculateEntropy(vector),
|
|
};
|
|
|
|
return { confidence, detectionType, features, vector };
|
|
}
|
|
|
|
private createRNG(seed: number): () => number {
|
|
return () => {
|
|
seed = (seed * 1664525 + 1013904223) & 0xffffffff;
|
|
return (seed >>> 0) / 0xffffffff;
|
|
};
|
|
}
|
|
|
|
private calculateEntropy(values: number[]): number {
|
|
const bins = 20;
|
|
const histogram = new Array(bins).fill(0);
|
|
const min = Math.min(...values);
|
|
const max = Math.max(...values);
|
|
const range = max - min || 1;
|
|
|
|
for (const v of values) {
|
|
const bin = Math.min(bins - 1, Math.floor(((v - min) / range) * bins));
|
|
histogram[bin]++;
|
|
}
|
|
|
|
let entropy = 0;
|
|
const total = values.length;
|
|
for (const count of histogram) {
|
|
if (count > 0) {
|
|
const p = count / total;
|
|
entropy -= p * Math.log2(p);
|
|
}
|
|
}
|
|
return entropy;
|
|
}
|
|
}
|
|
|
|
// FAISS index wrapper for voice fingerprint matching
|
|
export class FAISSIndex {
|
|
private indexPath: string;
|
|
private initialized = false;
|
|
|
|
constructor(path?: string) {
|
|
this.indexPath = path ?? voicePrintEnv.FAISS_INDEX_PATH;
|
|
}
|
|
|
|
/**
|
|
* Initialize or load the FAISS index.
|
|
*/
|
|
async initialize(): Promise<void> {
|
|
if (this.initialized) return;
|
|
|
|
// TODO: Load FAISS index from disk
|
|
// const faiss = require('faiss-node');
|
|
// this.index = faiss.readIndex(this.indexPath);
|
|
|
|
this.initialized = true;
|
|
logger.info('FAISS index initialized', { indexPath: this.indexPath });
|
|
}
|
|
|
|
/**
|
|
* Add an enrollment embedding to the index.
|
|
*/
|
|
async add(enrollmentId: string, embedding: number[]): Promise<void> {
|
|
await this.initialize();
|
|
|
|
// TODO: Add to FAISS index
|
|
// this.index.add([embedding]);
|
|
// Store mapping: enrollmentId -> index position
|
|
logger.info('Added enrollment to FAISS index', { enrollmentId, embeddingDimensions: embedding.length });
|
|
}
|
|
|
|
/**
|
|
* Remove an enrollment from the index.
|
|
*/
|
|
async remove(enrollmentId: string): Promise<void> {
|
|
await this.initialize();
|
|
|
|
// TODO: Remove from FAISS index
|
|
logger.info('Removed enrollment from FAISS index', { enrollmentId });
|
|
}
|
|
|
|
/**
|
|
* Search for similar voice embeddings.
|
|
*/
|
|
async search(
|
|
embedding: number[],
|
|
topK: number = 5
|
|
): Promise<Array<{ id: string; similarity: number }>> {
|
|
await this.initialize();
|
|
|
|
// TODO: Query FAISS index
|
|
// const [distances, indices] = this.index.search([embedding], topK);
|
|
// Map indices back to enrollment IDs
|
|
|
|
// Mock: return empty results
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* Save the index to disk.
|
|
*/
|
|
async save(): Promise<void> {
|
|
await this.initialize();
|
|
// TODO: Write FAISS index to disk
|
|
logger.info('FAISS index saved', { indexPath: this.indexPath });
|
|
}
|
|
}
|
|
|
|
// Export classes only - use dependency injection for instantiation
|
|
// Deprecated singleton exports kept for backward compatibility only
|
|
/** @deprecated Use `new AudioPreprocessor()` instead */
|
|
export const audioPreprocessor = new AudioPreprocessor();
|
|
/** @deprecated Use `new VoiceEnrollmentService()` instead */
|
|
export const voiceEnrollmentService = new VoiceEnrollmentService();
|
|
/** @deprecated Use `new AnalysisService()` instead */
|
|
export const analysisService = new AnalysisService();
|
|
/** @deprecated Use `new BatchAnalysisService()` instead */
|
|
export const batchAnalysisService = new BatchAnalysisService();
|
|
/** @deprecated Use `new EmbeddingService()` instead */
|
|
export const embeddingService = new EmbeddingService();
|