diff --git a/packages/api/package.json b/packages/api/package.json index 6828dec..708d5c1 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -16,6 +16,7 @@ "@shieldai/db": "0.1.0", "@shieldai/types": "0.1.0", "fastify": "^5.2.0", - "@shieldai/darkwatch": "0.1.0" + "@shieldai/darkwatch": "0.1.0", + "@shieldai/voiceprint": "0.1.0" } } diff --git a/packages/api/src/routes/index.ts b/packages/api/src/routes/index.ts index 42af8e4..92ee4f0 100644 --- a/packages/api/src/routes/index.ts +++ b/packages/api/src/routes/index.ts @@ -13,3 +13,10 @@ export function darkwatchRoutes(fastify: FastifyInstance) { root.register(scans, { prefix: "/scan" }); }, { prefix: "/api/v1/darkwatch" }); } + +export function voiceprintRoutes(fastify: FastifyInstance) { + fastify.register(async (root) => { + const voiceprint = (await import("./voiceprint.routes")).voiceprintRoutes; + root.register(voiceprint); + }, { prefix: "/api/v1/voiceprint" }); +} diff --git a/packages/api/src/routes/voiceprint.routes.ts b/packages/api/src/routes/voiceprint.routes.ts new file mode 100644 index 0000000..cc9077e --- /dev/null +++ b/packages/api/src/routes/voiceprint.routes.ts @@ -0,0 +1,94 @@ +import { FastifyInstance } from "fastify"; +import { VoiceEnrollmentService } from "@shieldai/voiceprint"; +import { AnalysisService } from "@shieldai/voiceprint"; +import { BatchAnalysisService } from "@shieldai/voiceprint"; + +export function voiceprintRoutes(fastify: FastifyInstance) { + const enrollmentService = new VoiceEnrollmentService(); + const analysisService = new AnalysisService(); + const batchService = new BatchAnalysisService(); + + fastify.post("/enroll", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const body = request.body as { label: string; audio: string; sampleRate?: number }; + + const audioBuffer = Buffer.from(body.audio, "base64"); + const enrollment = await enrollmentService.enroll( + { label: body.label, audioBuffer, sampleRate: body.sampleRate }, + userId + ); + return reply.code(201).send(enrollment); + }); + + fastify.get("/enrollments", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const enrollments = await enrollmentService.listEnrollments(userId); + return reply.send(enrollments); + }); + + fastify.delete("/enrollments/:id", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const enrollmentId = (request.params as { id: string }).id; + const result = await enrollmentService.removeEnrollment(userId, enrollmentId); + return reply.send({ removed: result }); + }); + + fastify.post("/analyze", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const body = request.body as { audio: string; sampleRate?: number; analysisType?: string }; + const audioBuffer = Buffer.from(body.audio, "base64"); + + const result = await analysisService.analyze( + { audioBuffer, sampleRate: body.sampleRate, analysisType: body.analysisType }, + userId + ); + return reply.code(201).send(result); + }); + + fastify.get("/results/:id", async (request, reply) => { + const jobId = (request.params as { id: string }).id; + const result = await analysisService.getResult(jobId); + + if (!result) return reply.code(404).send({ error: "Analysis result not found" }); + return reply.send(result); + }); + + fastify.get("/results", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const limit = parseInt((request.query as { limit?: string }).limit || "20", 10); + const results = await analysisService.getUserResults(userId, limit); + return reply.send(results); + }); + + fastify.post("/batch", async (request, reply) => { + const userId = (request.user as { id: string })?.id; + if (!userId) return reply.code(401).send({ error: "User not authenticated" }); + + const body = request.body as { + files: Array<{ name: string; audio: string; sampleRate?: number }>; + analysisType?: string; + }; + + const audioBuffers = body.files.map((f) => ({ + name: f.name, + buffer: Buffer.from(f.audio, "base64"), + sampleRate: f.sampleRate, + })); + + const result = await batchService.analyzeBatch( + { audioBuffers, analysisType: body.analysisType }, + userId + ); + return reply.code(201).send(result); + }); +} diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index d4826ab..e169899 100644 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -2,7 +2,7 @@ import Fastify from "fastify"; import cors from "@fastify/cors"; import helmet from "@fastify/helmet"; import sensible from "@fastify/sensible"; -import { darkwatchRoutes } from "./routes"; +import { darkwatchRoutes, voiceprintRoutes } from "./routes"; const app = Fastify({ logger: { @@ -16,6 +16,7 @@ async function bootstrap() { await app.register(sensible); await app.register(darkwatchRoutes); + await app.register(voiceprintRoutes); app.get("/health", async () => ({ status: "ok", timestamp: new Date().toISOString() })); diff --git a/packages/core/src/audio/webrtc/audio-pipeline.ts b/packages/core/src/audio/webrtc/audio-pipeline.ts new file mode 100644 index 0000000..662b423 --- /dev/null +++ b/packages/core/src/audio/webrtc/audio-pipeline.ts @@ -0,0 +1,316 @@ +/** + * Audio Processing Pipeline for Real-Time Analysis + * Coordinates WebRTC stream capture with VoicePreprocess for continuous analysis + */ + +import { WebRTCStreamCapture, createWebRTCCapture } from './stream-capture'; + +// Type definitions for real-time processing +export interface AudioChunk { + id: string; + timestamp: number; + data: Float32Array; + duration: number; +} + +export interface VoiceprintResult { + chunkId: string; + timestamp: number; + features: AudioFeatures; + embedding: number[]; + confidence: number; + status: 'complete' | 'error'; +} + +// Audio chunk configuration +export interface AudioChunkConfig { + chunkDuration: number; + overlapDuration: number; + sampleRate: number; +} + +// Preprocessor interfaces (copied from AudioPreprocessor for standalone usage) +export interface PreprocessedAudio { + audio: Buffer; + sampleRate: number; + channels: number; + durationSec: number; +} + +export interface AudioFeatures { + mfccs: number[][]; + zeroCrossingRate: number; + spectralCentroid: number; + spectralRollOff: number; + durationSec: number; +} + +interface PipelineConfig { + chunkDuration: number; // 5000ms + overlapDuration: number; // 1000ms + sampleRate: number; // 16000 Hz + onAnalysisComplete?: (result: VoiceprintResult) => void; + onStreamError?: (error: Error) => void; + onError?: (error: Error) => void; +} + +const DEFAULT_CONFIG: PipelineConfig = { + chunkDuration: 5000, + overlapDuration: 1000, + sampleRate: 16000 +}; + +export class AudioPipeline { + private streamCapture: WebRTCStreamCapture | null = null; + private isRunning: boolean = false; + private chunkBuffer: AudioChunk[] = []; + private maxBufferLength: number = 10; + + private onChunkReady?: (chunk: AudioChunk) => void; + private onAnalysisComplete?: (result: VoiceprintResult) => void; + private onPipelineError?: (error: Error) => void; + + constructor(private config: PipelineConfig = DEFAULT_CONFIG) {} + + /** + * Initialize pipeline components + */ + async initialize(): Promise { + // Initialize WebRTC stream capture + this.streamCapture = createWebRTCCapture({ + chunkDuration: this.config.chunkDuration, + overlapDuration: this.config.overlapDuration, + sampleRate: this.config.sampleRate + }); + + // Connect WebRTC chunk processing + this.streamCapture.onChunkReady = (rawAudio, timestamp) => { + this.handleRawChunk(rawAudio, timestamp); + }; + + // Connect stream error handling + this.streamCapture.onStreamError = (error) => { + this.onPipelineError?.(error); + }; + + console.log('[Pipeline] Initialized'); + } + + /** + * Process raw audio chunk from WebRTC + */ + private async handleRawChunk(rawAudio: Float32Array, timestamp: number): Promise { + try { + // Create audio chunk + const chunk: AudioChunk = { + id: `chunk-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + timestamp, + data: rawAudio, + duration: this.config.chunkDuration / 1000 + }; + + // Add to buffer + this.chunkBuffer.push(chunk); + + // Maintain buffer size + if (this.chunkBuffer.length > this.maxBufferLength) { + const removed = this.chunkBuffer.shift(); + if (removed) { + // Process removed chunk with overlap + await this.processChunk(removed); + } + } + + // Process current chunk + await this.processChunk(chunk); + + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + console.error('[Pipeline] Error processing chunk:', err); + this.onPipelineError?.(err); + } + } + + /** + * Convert Float32Array to 16-bit PCM Buffer + */ + private float32ArrayToBuffer(floatData: Float32Array): Buffer { + const int16Array = new Int16Array(floatData.length); + for (let i = 0; i < floatData.length; i++) { + // Clamp and scale to 16-bit range + const clamped = Math.max(-1, Math.min(1, floatData[i])); + int16Array[i] = Math.round(clamped * 32767); + } + return Buffer.from(int16Array.buffer); + } + + /** + * Process a single audio chunk (mock implementation) + */ + private async processChunk(chunk: AudioChunk): Promise { + try { + // Generate mock features + // Convert Float32Array to Buffer properly + const int16Array = new Int16Array(chunk.data.length); + for (let i = 0; i < chunk.data.length; i++) { + const clamped = Math.max(-1, Math.min(1, chunk.data[i])); + int16Array[i] = Math.round(clamped * 32767); + } + const dataBuffer = Buffer.from(int16Array.buffer); + const features: AudioFeatures = await extractMockFeatures(dataBuffer, chunk.duration); + + // Generate embedding (placeholder - would use actual embedding service) + const embedding = this.generatePlaceholderEmbedding(features); + + // Create result + const result: VoiceprintResult = { + chunkId: chunk.id, + timestamp: chunk.timestamp, + features, + embedding, + confidence: 0.95, // Placeholder - would come from actual analysis + status: 'complete' + }; + + // Emit result + this.onAnalysisComplete?.(result); + + return result; + + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + console.error('[Pipeline] Preprocessing error:', err); + this.onPipelineError?.(err); + throw err; + } + } + + /** + * Generate placeholder embedding (would use actual embedding service) + */ + private generatePlaceholderEmbedding(features: AudioFeatures): number[] { + // Placeholder embedding - would be replaced with actual embedding generation + const embedding: number[] = []; + + // Use spectral features as proxy for embedding + embedding.push(features.spectralCentroid); + embedding.push(features.spectralRollOff); + embedding.push(features.zeroCrossingRate); + + // MFCC summary (first few coefficients) + if (features.mfccs && features.mfccs.length > 0) { + for (let i = 0; i < Math.min(5, features.mfccs[0].length); i++) { + embedding.push(features.mfccs[0][i]); + } + } + + // Normalize and pad + while (embedding.length < 128) { + embedding.push(0); + } + + return embedding; + } + + /** + * Start the real-time analysis pipeline + */ + async start(): Promise { + if (this.isRunning) { + console.log('[Pipeline] Already running'); + return; + } + + try { + await this.initialize(); + if (this.streamCapture) { + await this.streamCapture.start(); + } + this.isRunning = true; + + console.log('[Pipeline] Real-time analysis started'); + + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + console.error('[Pipeline] Failed to start:', err); + this.onPipelineError?.(err); + throw err; + } + } + + /** + * Stop the pipeline + */ + async stop(): Promise { + if (!this.isRunning) { + return; + } + + try { + // Drain remaining buffer + while (this.chunkBuffer.length > 0) { + const chunk = this.chunkBuffer.shift(); + if (chunk) { + await this.processChunk(chunk); + } + } + + // Stop WebRTC capture + if (this.streamCapture) { + this.streamCapture.stop(); + } + + this.isRunning = false; + console.log('[Pipeline] Stopped'); + + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + console.error('[Pipeline] Error stopping:', err); + this.onPipelineError?.(err); + throw err; + } + } + + /** + * Get pipeline status + */ + getStatus(): { + isRunning: boolean; + bufferLength: number; + streamActive: boolean; + } { + return { + isRunning: this.isRunning, + bufferLength: this.chunkBuffer.length, + streamActive: this.streamCapture?.isRecording || false + }; + } +} + +/** + * Extract mock features from audio buffer + */ +async function extractMockFeatures(buffer: Buffer, duration: number): Promise { + // Simple mock feature extraction for demonstration + // In production, this would use actual audio processing + const numMfccs = 13; + const mfccs: number[][] = []; + + // Generate mock MFCCs based on buffer hash + const bufferHash = buffer.reduce((acc, byte) => acc + byte, 0); + for (let i = 0; i < numMfccs; i++) { + const coefficients: number[] = []; + for (let j = 0; j < 20; j++) { + coefficients.push(Math.abs(Math.sin((i * j + bufferHash) * 0.1)) * 0.5 + 0.25); + } + mfccs.push(coefficients); + } + + return { + mfccs, + zeroCrossingRate: 0.02 + Math.random() * 0.03, + spectralCentroid: 1000 + Math.random() * 2000, + spectralRollOff: 3000 + Math.random() * 1000, + durationSec: duration + }; + } diff --git a/packages/core/src/audio/webrtc/stream-capture.ts b/packages/core/src/audio/webrtc/stream-capture.ts new file mode 100644 index 0000000..296e467 --- /dev/null +++ b/packages/core/src/audio/webrtc/stream-capture.ts @@ -0,0 +1,184 @@ +/** + * WebRTC Audio Stream Capture + * Captures audio from screen/audio sharing using WebRTC APIs + * Implements 5-second chunks with 1-second overlap for sliding window analysis + */ + +interface WebRTCStreamConfig { + chunkDuration: number; // 5000ms + overlapDuration: number; // 1000ms + sampleRate: number; // 16000 Hz for voiceprint compatibility +} + +const DEFAULT_CONFIG: WebRTCStreamConfig = { + chunkDuration: 5000, + overlapDuration: 1000, + sampleRate: 16000 +}; + +export class WebRTCStreamCapture { + private stream: MediaStream | null = null; + private audioContext: AudioContext | null = null; + private analyser: AnalyserNode | null = null; + private source: MediaStreamAudioSourceNode | null = null; + private _isRecording: boolean = false; + + private buffer: Float32Array = new Float32Array(0); + public onChunkReady?: (chunk: Float32Array, timestamp: number) => void; + public onStreamError?: (error: Error) => void; + + constructor(private config: WebRTCStreamConfig = DEFAULT_CONFIG) {} + + /** + * Check if currently recording + */ + public isRecording: boolean = false; + + /** + * Start capturing audio from screen/audio sharing + */ + async start(): Promise { + if (this.isRecording) { + console.log('[WebRTC] Already recording'); + return; + } + + try { + // Request screen/audio capture with audio + this.stream = await navigator.mediaDevices.getDisplayMedia({ + video: true, // Required for audio capture + audio: true + }); + + // Stop any existing tracks + this.stream.getTracks().forEach(track => track.stop()); + + // Create audio context and analyser + this.audioContext = new AudioContext({ + sampleRate: this.config.sampleRate + }); + this.analyser = this.audioContext.createAnalyser(); + this.analyser.fftSize = 2048; + + // Connect stream to audio graph + this.source = this.audioContext.createMediaStreamSource(this.stream); + this.source.connect(this.analyser); + + this.isRecording = true; + console.log('[WebRTC] Stream capture started'); + + // Start processing loop + this.processAudio(); + + // Handle stream termination + this.stream.getVideoTracks()[0].onended = () => { + console.log('[WebRTC] User stopped sharing'); + this.stop(); + }; + + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + console.error('[WebRTC] Failed to start stream capture:', err); + this.onStreamError?.(err); + throw err; + } + } + + /** + * Process audio in real-time with sliding window + */ + private processAudio(): void { + if (!this.audioContext || !this.analyser || !this.isRecording) return; + + if (!this.analyser) return; + + const bufferLength = this.analyser.fftSize; + const buffer = new Float32Array(bufferLength); + + const processFrame = () => { + if (!this.isRecording) return; + + if (!this.analyser) return; + + this.analyser.getFloatTimeDomainData(buffer); + + // Get current timestamp + const timestamp = this.audioContext?.currentTime ?? 0; + + // Extract audio data for current frame + // Use first 512 samples for voice analysis (reduced for faster processing) + const audioData = buffer.slice(0, 512); + + // Prepare chunk for analysis + if (audioData.length > 0) { + this.onChunkReady?.(audioData, timestamp); + } + + // Schedule next frame with overlap + const frameDuration = this.config.chunkDuration - this.config.overlapDuration; + setTimeout(processFrame, frameDuration); + }; + + processFrame(); + } + + /** + * Stop audio capture + */ + stop(): void { + this._isRecording = false; + + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()); + this.stream = null; + } + + if (this.source) { + this.source.disconnect(); + this.source = null; + } + + if (this.analyser) { + this.analyser.disconnect(); + this.analyser = null; + } + + if (this.audioContext) { + this.audioContext.close(); + this.audioContext = null; + } + + console.log('[WebRTC] Stream capture stopped'); + } + + /** + * Get stream metadata + */ + getMetadata(): { + isActive: boolean; + sampleRate: number; + channels: number; + } { + if (!this.stream) { + return { isActive: false, sampleRate: 0, channels: 0 }; + } + + const audioTrack = this.stream.getAudioTracks()[0]; + if (!audioTrack) { + return { isActive: true, sampleRate: this.config.sampleRate, channels: 1 }; + } + + return { + isActive: true, + sampleRate: this.config.sampleRate, + channels: audioTrack.getSettings().channelCount || 1 + }; + } +} + +/** + * Factory function for creating stream capture with auto-start + */ +export function createWebRTCCapture(config?: WebRTCStreamConfig): WebRTCStreamCapture { + return new WebRTCStreamCapture(config || DEFAULT_CONFIG); +} diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json new file mode 100644 index 0000000..c9ebbf7 --- /dev/null +++ b/packages/core/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "lib": ["ES2020", "DOM"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "resolveJsonModule": true, + "moduleResolution": "node", + "baseUrl": ".", + "paths": { + "@/*": ["src/*"] + } + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index ce71a4e..d299431 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -57,6 +57,25 @@ enum DataSource { HONEYPOT } +enum AnalysisJobStatus { + PENDING + RUNNING + COMPLETED + FAILED +} + +enum AnalysisType { + SYNTHETIC_DETECTION + VOICE_MATCH + BATCH +} + +enum DetectionVerdict { + NATURAL + SYNTHETIC + UNCERTAIN +} + model User { id String @id @default(uuid()) email String @unique @@ -138,3 +157,54 @@ model ScanJob { @@index([userId, status]) @@index([createdAt]) } + +model VoiceEnrollment { + id String @id @default(uuid()) + userId String + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + label String + embeddingVector Float[] + embeddingDim Int @default(192) + audioFilePath String? + sampleRate Int @default(16000) + durationSec Float? + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@index([userId]) + @@index([embeddingDim]) +} + +model AnalysisJob { + id String @id @default(uuid()) + userId String + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + analysisType AnalysisType + audioFilePath String + status AnalysisJobStatus @default(PENDING) + result AnalysisResult? + errorMessage String? + completedAt DateTime? + createdAt DateTime @default(now()) + + @@index([userId, status]) + @@index([createdAt]) +} + +model AnalysisResult { + id String @id @default(uuid()) + analysisJobId String @unique + analysisJob AnalysisJob @relation(fields: [analysisJobId], references: [id], onDelete: Cascade) + syntheticScore Float + verdict DetectionVerdict + matchedEnrollmentId String? + matchedSimilarity Float? + confidence Float + processingTimeMs Int + modelVersion String? + metadata String? + createdAt DateTime @default(now()) + + @@index([analysisJobId]) + @@index([verdict]) +} diff --git a/packages/jobs/src/voiceprint.jobs.ts b/packages/jobs/src/voiceprint.jobs.ts new file mode 100644 index 0000000..95dc98d --- /dev/null +++ b/packages/jobs/src/voiceprint.jobs.ts @@ -0,0 +1,54 @@ +import { Queue, Worker } from "bullmq"; +import { Redis } from "ioredis"; +import { AnalysisService } from "@shieldai/voiceprint"; + +const redisUrl = process.env.REDIS_URL || "redis://localhost:6379"; +const connection = new Redis(redisUrl); + +const analysisQueue = new Queue("voiceprint-analysis", { connection }); + +const analysisWorker = new Worker( + "voiceprint-analysis", + async (job) => { + const { userId, audioBuffer, sampleRate, analysisType } = job.data; + const analysisService = new AnalysisService(); + const result = await analysisService.analyze( + { + audioBuffer: Buffer.from(audioBuffer, "base64"), + sampleRate, + analysisType, + }, + userId + ); + return { jobId: result.jobId, completedAt: new Date().toISOString() }; + }, + { connection, concurrency: 2 } +); + +analysisWorker.on("completed", (job) => { + console.log(`[VoicePrint] Job ${job.id} completed: ${JSON.stringify(job.returnvalue)}`); +}); + +analysisWorker.on("failed", (job, err) => { + console.error(`[VoicePrint] Job ${job.id} failed: ${err.message}`); +}); + +export async function addAnalysisJob( + userId: string, + audioBuffer: Buffer, + sampleRate?: number, + analysisType?: string +) { + return analysisQueue.add("analyze", { + userId, + audioBuffer: audioBuffer.toString("base64"), + sampleRate, + analysisType, + }, { + attempts: 3, + backoff: { type: "exponential", delay: 5000 }, + jobId: `vp-${userId}-${Date.now()}`, + }); +} + +console.log("[VoicePrint] Analysis worker started"); diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index e58df93..101b7f2 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -82,3 +82,62 @@ export interface AlertInput { channel: AlertChannel; dedupKey: string; } + +export const AnalysisJobStatus = { + PENDING: "PENDING", + RUNNING: "RUNNING", + COMPLETED: "COMPLETED", + FAILED: "FAILED", +} as const; +export type AnalysisJobStatus = (typeof AnalysisJobStatus)[keyof typeof AnalysisJobStatus]; + +export const AnalysisType = { + SYNTHETIC_DETECTION: "SYNTHETIC_DETECTION", + VOICE_MATCH: "VOICE_MATCH", + BATCH: "BATCH", +} as const; +export type AnalysisType = (typeof AnalysisType)[keyof typeof AnalysisType]; + +export const DetectionVerdict = { + NATURAL: "NATURAL", + SYNTHETIC: "SYNTHETIC", + UNCERTAIN: "UNCERTAIN", +} as const; +export type DetectionVerdict = (typeof DetectionVerdict)[keyof typeof DetectionVerdict]; + +export interface VoiceEnrollmentInput { + label: string; + audioBuffer: Buffer; + sampleRate?: number; +} + +export interface AnalyzeAudioInput { + audioBuffer: Buffer; + sampleRate?: number; + analysisType?: AnalysisType; +} + +export interface BatchAnalyzeInput { + audioBuffers: Array<{ name: string; buffer: Buffer; sampleRate?: number }>; + analysisType?: AnalysisType; +} + +export interface AnalysisResultOutput { + jobId: string; + syntheticScore: number; + verdict: DetectionVerdict; + confidence: number; + matchedEnrollmentId?: string; + matchedSimilarity?: number; + processingTimeMs: number; + modelVersion?: string; +} + +export interface VoiceEnrollmentOutput { + id: string; + label: string; + embeddingDim: number; + sampleRate: number; + durationSec?: number; + createdAt: Date; +} diff --git a/services/voiceprint/package.json b/services/voiceprint/package.json new file mode 100644 index 0000000..0d7f612 --- /dev/null +++ b/services/voiceprint/package.json @@ -0,0 +1,19 @@ +{ + "name": "@shieldai/voiceprint", + "version": "0.1.0", + "main": "./dist/index.js", + "types": "./dist/index.js", + "scripts": { + "build": "tsc", + "test": "vitest run", + "lint": "eslint src/" + }, + "dependencies": { + "@shieldai/db": "0.1.0", + "@shieldai/types": "0.1.0", + "node-cache": "^5.1.2" + }, + "exports": { + ".": "./src/index.ts" + } +} diff --git a/services/voiceprint/src/analysis/AnalysisService.ts b/services/voiceprint/src/analysis/AnalysisService.ts new file mode 100644 index 0000000..48a4f45 --- /dev/null +++ b/services/voiceprint/src/analysis/AnalysisService.ts @@ -0,0 +1,183 @@ +import prisma from "@shieldai/db"; +import { AudioPreprocessor, AudioFeatures } from "../preprocessor/AudioPreprocessor"; +import { EmbeddingService, EmbeddingOutput } from "../embedding/EmbeddingService"; +import { VoiceEnrollmentService } from "../enrollment/VoiceEnrollmentService"; +import { + AnalyzeAudioInput, + AnalysisJobStatus, + AnalysisType, + DetectionVerdict, + AnalysisResultOutput, +} from "@shieldai/types"; + +export class AnalysisService { + private preprocessor: AudioPreprocessor; + private embeddingService: EmbeddingService; + private enrollmentService: VoiceEnrollmentService; + private readonly syntheticThreshold = 0.7; + private readonly uncertainThreshold = 0.4; + + constructor() { + this.preprocessor = new AudioPreprocessor(); + this.embeddingService = new EmbeddingService(); + this.enrollmentService = new VoiceEnrollmentService(); + } + + async analyze(input: AnalyzeAudioInput, userId: string): Promise { + const startTime = Date.now(); + + const job = await prisma.analysisJob.create({ + data: { + userId, + analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION, + audioFilePath: `voiceprint/${userId}/${Date.now()}.wav`, + status: AnalysisJobStatus.RUNNING, + }, + }); + + try { + const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate); + const features = await this.preprocessor.extractFeatures(preprocessed.audio); + const embedding = await this.embeddingService.extract(preprocessed.audio); + + const syntheticScore = await this.classifySynthetic(features, embedding); + const verdict = this.determineVerdict(syntheticScore); + const confidence = this.computeConfidence(syntheticScore, verdict); + + let matchedEnrollmentId: string | undefined; + let matchedSimilarity: number | undefined; + + if (input.analysisType === AnalysisType.VOICE_MATCH) { + const match = await this.enrollmentService.matchVoice(input.audioBuffer, userId); + if (match) { + matchedEnrollmentId = match.enrollmentId; + matchedSimilarity = match.similarity; + } + } + + const processingTimeMs = Date.now() - startTime; + + const result = await prisma.analysisResult.create({ + data: { + analysisJobId: job.id, + syntheticScore, + verdict, + confidence, + processingTimeMs, + matchedEnrollmentId, + matchedSimilarity, + modelVersion: this.embeddingService.getModelVersion(), + }, + }); + + await prisma.analysisJob.update({ + where: { id: job.id }, + data: { + status: AnalysisJobStatus.COMPLETED, + completedAt: new Date(), + }, + }); + + return { + jobId: job.id, + syntheticScore: result.syntheticScore, + verdict: result.verdict, + confidence: result.confidence, + matchedEnrollmentId: result.matchedEnrollmentId || undefined, + matchedSimilarity: result.matchedSimilarity || undefined, + processingTimeMs: result.processingTimeMs, + modelVersion: result.modelVersion || undefined, + }; + } catch (err) { + const message = err instanceof Error ? err.message : "Analysis failed"; + await prisma.analysisJob.update({ + where: { id: job.id }, + data: { + status: AnalysisJobStatus.FAILED, + errorMessage: message, + completedAt: new Date(), + }, + }); + throw err; + } + } + + async getResult(jobId: string): Promise { + const job = await prisma.analysisJob.findUnique({ + where: { id: jobId }, + include: { result: true }, + }); + + if (!job || !job.result) return null; + + const r = job.result; + return { + jobId, + syntheticScore: r.syntheticScore, + verdict: r.verdict, + confidence: r.confidence, + matchedEnrollmentId: r.matchedEnrollmentId || undefined, + matchedSimilarity: r.matchedSimilarity || undefined, + processingTimeMs: r.processingTimeMs, + modelVersion: r.modelVersion || undefined, + }; + } + + async getUserResults(userId: string, limit: number = 20): Promise { + const jobs = await prisma.analysisJob.findMany({ + where: { userId, status: AnalysisJobStatus.COMPLETED }, + orderBy: { createdAt: "desc" }, + take: limit, + include: { result: true }, + }); + + return jobs + .filter((j) => j.result) + .map((j) => { + const r = j.result!; + return { + jobId: j.id, + syntheticScore: r.syntheticScore, + verdict: r.verdict, + confidence: r.confidence, + matchedEnrollmentId: r.matchedEnrollmentId || undefined, + matchedSimilarity: r.matchedSimilarity || undefined, + processingTimeMs: r.processingTimeMs, + modelVersion: r.modelVersion || undefined, + }; + }); + } + + private async classifySynthetic( + features: AudioFeatures, + embedding: EmbeddingOutput + ): Promise { + const modelScore = await this.embeddingService.classify(embedding.vector); + + const zcrAnomaly = Math.abs(features.zeroCrossingRate - 0.05) / 0.05; + const spectralAnomaly = Math.abs(features.spectralCentroid - 500) / 500; + + const artifactScore = Math.min(1, (zcrAnomaly + spectralAnomaly) / 4); + + return 0.7 * modelScore + 0.3 * artifactScore; + } + + private determineVerdict(score: number): DetectionVerdict { + if (score >= this.syntheticThreshold) return DetectionVerdict.SYNTHETIC; + if (score <= this.uncertainThreshold) return DetectionVerdict.NATURAL; + return DetectionVerdict.UNCERTAIN; + } + + private computeConfidence(score: number, verdict: DetectionVerdict): number { + if (verdict === DetectionVerdict.SYNTHETIC) { + return Math.min(1, (score - this.syntheticThreshold) / (1 - this.syntheticThreshold)); + } + if (verdict === DetectionVerdict.NATURAL) { + return Math.min(1, (this.uncertainThreshold - score) / this.uncertainThreshold); + } + return 1 - Math.min( + Math.abs(score - this.uncertainThreshold) / (this.syntheticThreshold - this.uncertainThreshold), + 1 + ); + } +} diff --git a/services/voiceprint/src/analysis/BatchAnalysisService.ts b/services/voiceprint/src/analysis/BatchAnalysisService.ts new file mode 100644 index 0000000..b4a4c7b --- /dev/null +++ b/services/voiceprint/src/analysis/BatchAnalysisService.ts @@ -0,0 +1,125 @@ +import prisma from "@shieldai/db"; +import { AnalysisService } from "./AnalysisService"; +import { + BatchAnalyzeInput, + AnalysisJobStatus, + AnalysisType, + AnalysisResultOutput, +} from "@shieldai/types"; + +export class BatchAnalysisService { + private analysisService: AnalysisService; + + constructor() { + this.analysisService = new AnalysisService(); + } + + async analyzeBatch( + input: BatchAnalyzeInput, + userId: string + ): Promise { + const batchId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; + const results: AnalysisResultOutput[] = []; + const errors: Array<{ name: string; error: string }> = []; + + for (const audioInput of input.audioBuffers) { + try { + const result = await this.analysisService.analyze( + { + audioBuffer: audioInput.buffer, + sampleRate: audioInput.sampleRate, + analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION, + }, + userId + ); + results.push(result); + } catch (err) { + const message = err instanceof Error ? err.message : "Analysis failed"; + errors.push({ name: audioInput.name, error: message }); + } + } + + const batchJob = await prisma.analysisJob.create({ + data: { + userId, + analysisType: AnalysisType.BATCH, + audioFilePath: `voiceprint/${userId}/${batchId}`, + status: errors.length === input.audioBuffers.length + ? AnalysisJobStatus.FAILED + : AnalysisJobStatus.COMPLETED, + errorMessage: + errors.length > 0 ? `${errors.length} of ${input.audioBuffers.length} files failed` : undefined, + completedAt: new Date(), + }, + }); + + return { + batchId, + jobId: batchJob.id, + totalFiles: input.audioBuffers.length, + successfulResults: results.length, + failedCount: errors.length, + results, + errors, + }; + } + + async getBatchResult(batchJobId: string): Promise { + const job = await prisma.analysisJob.findUnique({ + where: { id: batchJobId }, + include: { result: true }, + }); + + if (!job) return null; + + const childJobs = await prisma.analysisJob.findMany({ + where: { + userId: job.userId, + createdAt: { gte: job.createdAt, lt: new Date(job.createdAt.getTime() + 60000) }, + id: { not: job.id }, + }, + include: { result: true }, + }); + + const results: AnalysisResultOutput[] = []; + const errors: Array<{ name: string; error: string }> = []; + + for (const childJob of childJobs) { + if (childJob.result) { + const r = childJob.result; + results.push({ + jobId: childJob.id, + syntheticScore: r.syntheticScore, + verdict: r.verdict, + confidence: r.confidence, + matchedEnrollmentId: r.matchedEnrollmentId || undefined, + matchedSimilarity: r.matchedSimilarity || undefined, + processingTimeMs: r.processingTimeMs, + modelVersion: r.modelVersion || undefined, + }); + } else if (childJob.errorMessage) { + errors.push({ name: childJob.audioFilePath, error: childJob.errorMessage }); + } + } + + return { + batchId: job.audioFilePath.split("/").pop() || job.id, + jobId: job.id, + totalFiles: childJobs.length, + successfulResults: results.length, + failedCount: errors.length, + results, + errors, + }; + } +} + +export interface BatchResult { + batchId: string; + jobId: string; + totalFiles: number; + successfulResults: number; + failedCount: number; + results: AnalysisResultOutput[]; + errors: Array<{ name: string; error: string }>; +} diff --git a/services/voiceprint/src/embedding/EmbeddingService.ts b/services/voiceprint/src/embedding/EmbeddingService.ts new file mode 100644 index 0000000..6946276 --- /dev/null +++ b/services/voiceprint/src/embedding/EmbeddingService.ts @@ -0,0 +1,196 @@ +import { spawn } from "child_process"; + +const EMBEDDING_DIM = 192; +const MODEL_VERSION = "ecapa-tdnn-0.1.0-mock"; + +export class EmbeddingService { + private mlServiceUrl: string; + + constructor() { + this.mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001"; + } + + async extract(audioBuffer: Buffer): Promise { + const mlAvailable = await this.checkMLService(); + + if (mlAvailable) { + return this.extractViaML(audioBuffer); + } + + return this.extractMock(audioBuffer); + } + + async classify(embedding: number[]): Promise { + const mlAvailable = await this.checkMLService(); + + if (mlAvailable) { + return this.classifyViaML(embedding); + } + + return this.classifyMock(embedding); + } + + getModelVersion(): string { + return MODEL_VERSION; + } + + private async extractViaML(audioBuffer: Buffer): Promise { + return new Promise((resolve, reject) => { + const jsonInput = audioBuffer.toString("base64"); + const proc = spawn("python3", [ + "-c", + ` +import urllib.request, json, sys +req = urllib.request.Request( + "${this.mlServiceUrl}/embedding", + data=json.dumps({"audio": "${jsonInput.substring(0, 5000)}"}).encode(), + headers={"Content-Type": "application/json"} +) +try: + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read()) + sys.stdout.write(json.dumps({"ok": True, "vector": data.get("embedding", []), "dim": data.get("dimension", ${EMBEDDING_DIM})})) +except Exception as e: + sys.stdout.write(json.dumps({"ok": False, "error": str(e)})) +`, + ]); + + let output = ""; + proc.stdout.on("data", (chunk) => { output += chunk.toString(); }); + proc.on("close", (code) => { + try { + const result = JSON.parse(output); + if (result.ok && result.vector.length === EMBEDDING_DIM) { + resolve({ vector: result.vector, dimension: EMBEDDING_DIM }); + } else { + resolve(this.generateMockFromBuffer(audioBuffer)); + } + } catch { + resolve(this.generateMockFromBuffer(audioBuffer)); + } + }); + }); + } + + private async classifyViaML(embedding: number[]): Promise { + return new Promise((resolve) => { + const proc = spawn("python3", [ + "-c", + ` +import urllib.request, json, sys +req = urllib.request.Request( + "${this.mlServiceUrl}/classify", + data=json.dumps({"embedding": ${JSON.stringify(embedding)}}).encode(), + headers={"Content-Type": "application/json"} +) +try: + with urllib.request.urlopen(req, timeout=30) as resp: + data = json.loads(resp.read()) + sys.stdout.write(json.dumps({"score": data.get("synthetic_score", 0.5)})) +except: + sys.stdout.write(json.dumps({"score": 0.5})) +`, + ]); + + let output = ""; + proc.stdout.on("data", (chunk) => { output += chunk.toString(); }); + proc.on("close", () => { + try { + const result = JSON.parse(output); + resolve(result.score || 0.5); + } catch { + resolve(0.5); + } + }); + }); + } + + private async extractMock(audioBuffer: Buffer): Promise { + return this.generateMockFromBuffer(audioBuffer); + } + + private async classifyMock(embedding: number[]): Promise { + const mean = embedding.reduce((s, v) => s + v, 0) / embedding.length; + const variance = embedding.reduce((s, v) => s + (v - mean) ** 2, 0) / embedding.length; + const stdDev = Math.sqrt(variance); + + const syntheticIndicators = [ + stdDev < 0.1 ? 0.8 : 0.2, + Math.abs(mean) > 0.5 ? 0.7 : 0.3, + this.hasArtifacts(embedding) ? 0.9 : 0.1, + ]; + + return syntheticIndicators.reduce((s, v) => s + v, 0) / syntheticIndicators.length; + } + + private generateMockFromBuffer(audioBuffer: Buffer): EmbeddingOutput { + const seed = this.computeSeed(audioBuffer); + const rng = this.createRNG(seed); + const vector: number[] = []; + + for (let i = 0; i < EMBEDDING_DIM; i++) { + const u1 = rng(); + const u2 = rng(); + const gauss = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2); + vector.push(parseFloat(gauss.toFixed(6))); + } + + const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0)); + const normalized = vector.map((v) => parseFloat((v / norm).toFixed(6))); + + return { vector: normalized, dimension: EMBEDDING_DIM }; + } + + private hasArtifacts(embedding: number[]): boolean { + const window = 16; + let artifactCount = 0; + + for (let i = 0; i < embedding.length - window; i += window) { + const slice = embedding.slice(i, i + window); + const localMean = slice.reduce((s, v) => s + v, 0) / slice.length; + const localVar = slice.reduce((s, v) => s + (v - localMean) ** 2, 0) / slice.length; + + if (localVar < 0.001) artifactCount++; + } + + return artifactCount > embedding.length / window / 3; + } + + private async checkMLService(): Promise { + return new Promise((resolve) => { + const proc = spawn("python3", [ + "-c", + ` +import urllib.request, sys +try: + urllib.request.urlopen("${this.mlServiceUrl}/health", timeout=2) + sys.exit(0) +except: + sys.exit(1) +`, + ]); + proc.on("close", (code) => resolve(code === 0)); + }); + } + + private computeSeed(buffer: Buffer): number { + let hash = 0; + const sampleSize = Math.min(buffer.length, 1024); + for (let i = 0; i < sampleSize; i += 4) { + hash = ((hash << 5) - hash + buffer.readInt32LE(i)) | 0; + } + return Math.abs(hash); + } + + private createRNG(seed: number): () => number { + return () => { + seed = (seed * 1664525 + 1013904223) & 0xffffffff; + return (seed >>> 0) / 0xffffffff; + }; + } +} + +export interface EmbeddingOutput { + vector: number[]; + dimension: number; +} diff --git a/services/voiceprint/src/enrollment/VoiceEnrollmentService.ts b/services/voiceprint/src/enrollment/VoiceEnrollmentService.ts new file mode 100644 index 0000000..1b3b218 --- /dev/null +++ b/services/voiceprint/src/enrollment/VoiceEnrollmentService.ts @@ -0,0 +1,151 @@ +import prisma from "@shieldai/db"; +import { EmbeddingService } from "../embedding/EmbeddingService"; +import { FAISSIndex } from "../indexer/FAISSIndex"; +import { AudioPreprocessor } from "../preprocessor/AudioPreprocessor"; +import { VoiceEnrollmentInput, VoiceEnrollmentOutput } from "@shieldai/types"; + +export class VoiceEnrollmentService { + private embeddingService: EmbeddingService; + private faissIndex: FAISSIndex; + private preprocessor: AudioPreprocessor; + + constructor() { + this.embeddingService = new EmbeddingService(); + this.faissIndex = new FAISSIndex(); + this.preprocessor = new AudioPreprocessor(); + } + + async enroll(input: VoiceEnrollmentInput, userId: string): Promise { + const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate); + + const embedding = await this.embeddingService.extract(preprocessed.audio); + + const enrollment = await prisma.voiceEnrollment.create({ + data: { + userId, + label: input.label, + embeddingVector: embedding.vector, + embeddingDim: embedding.dimension, + sampleRate: preprocessed.sampleRate, + durationSec: preprocessed.durationSec, + }, + }); + + await this.faissIndex.add(enrollment.id, embedding.vector); + + return { + id: enrollment.id, + label: enrollment.label, + embeddingDim: enrollment.embeddingDim, + sampleRate: enrollment.sampleRate, + durationSec: enrollment.durationSec, + createdAt: enrollment.createdAt, + }; + } + + async listEnrollments(userId: string): Promise { + const enrollments = await prisma.voiceEnrollment.findMany({ + where: { userId }, + orderBy: { createdAt: "desc" }, + select: { + id: true, + label: true, + embeddingDim: true, + sampleRate: true, + durationSec: true, + createdAt: true, + }, + }); + + return enrollments.map((e) => ({ + id: e.id, + label: e.label, + embeddingDim: e.embeddingDim, + sampleRate: e.sampleRate, + durationSec: e.durationSec, + createdAt: e.createdAt, + })); + } + + async removeEnrollment(userId: string, enrollmentId: string): Promise { + const enrollment = await prisma.voiceEnrollment.findFirst({ + where: { id: enrollmentId, userId }, + }); + + if (!enrollment) { + throw new Error(`Enrollment ${enrollmentId} not found for user ${userId}`); + } + + await prisma.voiceEnrollment.delete({ where: { id: enrollmentId } }); + await this.faissIndex.remove(enrollmentId); + + return true; + } + + async getEnrollment(enrollmentId: string): Promise { + const enrollment = await prisma.voiceEnrollment.findUnique({ + where: { id: enrollmentId }, + select: { + id: true, + label: true, + embeddingDim: true, + sampleRate: true, + durationSec: true, + createdAt: true, + }, + }); + + if (!enrollment) return null; + + return { + id: enrollment.id, + label: enrollment.label, + embeddingDim: enrollment.embeddingDim, + sampleRate: enrollment.sampleRate, + durationSec: enrollment.durationSec, + createdAt: enrollment.createdAt, + }; + } + + async matchVoice( + audioBuffer: Buffer, + userId: string, + threshold: number = 0.75 + ): Promise { + const preprocessed = await this.preprocessor.preprocess(audioBuffer); + const embedding = await this.embeddingService.extract(preprocessed.audio); + + const matches = await this.faissIndex.search(embedding.vector, 5); + + const enrollmentIds = matches.map((m) => m.id); + const enrollments = await prisma.voiceEnrollment.findMany({ + where: { + id: { in: enrollmentIds }, + userId, + }, + }); + + let bestMatch: MatchResult | null = null; + + for (const match of matches) { + const enrollment = enrollments.find((e) => e.id === match.id); + if (enrollment && match.similarity >= threshold) { + if (!bestMatch || match.similarity > bestMatch.similarity) { + bestMatch = { + enrollmentId: enrollment.id, + label: enrollment.label, + similarity: match.similarity, + }; + } + } + } + + return bestMatch; + } +} + +export interface MatchResult { + enrollmentId: string; + label: string; + similarity: number; +} diff --git a/services/voiceprint/src/index.ts b/services/voiceprint/src/index.ts new file mode 100644 index 0000000..0554fdb --- /dev/null +++ b/services/voiceprint/src/index.ts @@ -0,0 +1,6 @@ +export * from "./preprocessor/AudioPreprocessor"; +export * from "./enrollment/VoiceEnrollmentService"; +export * from "./analysis/AnalysisService"; +export * from "./analysis/BatchAnalysisService"; +export * from "./embedding/EmbeddingService"; +export * from "./indexer/FAISSIndex"; diff --git a/services/voiceprint/src/indexer/FAISSIndex.ts b/services/voiceprint/src/indexer/FAISSIndex.ts new file mode 100644 index 0000000..9b3109d --- /dev/null +++ b/services/voiceprint/src/indexer/FAISSIndex.ts @@ -0,0 +1,86 @@ +export class FAISSIndex { + private store: Map = new Map(); + private readonly dimension = 192; + private initialized = false; + + async initialize(): Promise { + if (this.initialized) return; + this.initialized = true; + } + + async add(id: string, vector: number[]): Promise { + this.normalizeInPlace(vector); + this.store.set(id, [...vector]); + } + + async remove(id: string): Promise { + this.store.delete(id); + } + + async search( + queryVector: number[], + topK: number = 5 + ): Promise { + const normalized = [...queryVector]; + this.normalizeInPlace(normalized); + + const scores: Array<{ id: string; similarity: number }> = []; + + for (const [id, vector] of this.store.entries()) { + const similarity = this.cosineSimilarity(normalized, vector); + scores.push({ id, similarity }); + } + + scores.sort((a, b) => b.similarity - a.similarity); + return scores.slice(0, topK).map((s, i) => ({ rank: i + 1, id: s.id, similarity: s.similarity })); + } + + async count(): Promise { + return this.store.size; + } + + async clear(): Promise { + this.store.clear(); + } + + async loadFromDatabase(): Promise { + const prisma = (await import("@shieldai/db")).default; + const enrollments = await prisma.voiceEnrollment.findMany({ + select: { id: true, embeddingVector: true }, + }); + + for (const enrollment of enrollments) { + this.store.set(enrollment.id, enrollment.embeddingVector); + } + } + + private cosineSimilarity(a: number[], b: number[]): number { + let dotProduct = 0; + let normA = 0; + let normB = 0; + + for (let i = 0; i < a.length; i++) { + dotProduct += a[i] * b[i]; + normA += a[i] * a[i]; + normB += b[i] * b[i]; + } + + const denominator = Math.sqrt(normA) * Math.sqrt(normB); + return denominator > 0 ? dotProduct / denominator : 0; + } + + private normalizeInPlace(vector: number[]): void { + const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0)); + if (norm > 0) { + for (let i = 0; i < vector.length; i++) { + vector[i] /= norm; + } + } + } +} + +export interface SearchResult { + rank: number; + id: string; + similarity: number; +} diff --git a/services/voiceprint/src/preprocessor/AudioPreprocessor.ts b/services/voiceprint/src/preprocessor/AudioPreprocessor.ts new file mode 100644 index 0000000..1b1e749 --- /dev/null +++ b/services/voiceprint/src/preprocessor/AudioPreprocessor.ts @@ -0,0 +1,327 @@ +import { spawn } from "child_process"; +import { randomBytes } from "crypto"; +import { tmpdir } from "os"; +import { join } from "path"; + +export class AudioPreprocessor { + private readonly targetSampleRate = 16000; + private readonly channels = 1; + + async preprocess(audioBuffer: Buffer, inputSampleRate?: number): Promise { + const tempInput = this.writeTempFile(audioBuffer, ".wav"); + const tempOutput = this.writeTempFile(Buffer.alloc(0), ".wav"); + + const outputSampleRate = inputSampleRate || this.detectSampleRate(audioBuffer); + + try { + await this.runPythonPreprocess(tempInput, tempOutput, outputSampleRate); + + const processed = await this.readFile(tempOutput); + return { + audio: processed, + sampleRate: this.targetSampleRate, + channels: this.channels, + durationSec: processed.length / (this.targetSampleRate * this.channels * 2), + }; + } catch (err) { + const fallback = await this.jsFallback(audioBuffer, outputSampleRate); + return fallback; + } + } + + async preprocessBatch( + inputs: Array<{ buffer: Buffer; sampleRate?: number }> + ): Promise { + const promises = inputs.map(async (input) => { + return this.preprocess(input.buffer, input.sampleRate); + }); + return Promise.all(promises); + } + + applyVAD(audioBuffer: Buffer, sampleRate: number): Buffer { + const int16Array = this.bufferToInt16(audioBuffer); + const silenceThreshold = 500; + const windowSize = Math.floor(sampleRate * 0.02); + + const segments: number[][] = []; + let currentSegment: number[] = []; + + for (let i = 0; i < int16Array.length; i += windowSize) { + const window = int16Array.slice(i, i + windowSize); + const rms = Math.sqrt( + window.reduce((sum, val) => sum + val * val, 0) / window.length + ); + + if (rms > silenceThreshold) { + currentSegment.push(...window); + } else if (currentSegment.length > 0) { + segments.push(currentSegment); + currentSegment = []; + } + } + + if (currentSegment.length > 0) { + segments.push(currentSegment); + } + + const merged = segments.flat(); + return this.int16ToBuffer(merged); + } + + normalizeAudio(audioBuffer: Buffer): Buffer { + const int16Array = this.bufferToInt16(audioBuffer); + const maxAmplitude = Math.max(...int16Array.map((v) => Math.abs(v))); + const targetMax = 32767 * 0.95; + + if (maxAmplitude === 0) return audioBuffer; + + const scale = targetMax / maxAmplitude; + const normalized = int16Array.map((v) => Math.round(v * scale)); + return this.int16ToBuffer(normalized); + } + + async extractFeatures(audioBuffer: Buffer): Promise { + const int16Array = this.bufferToInt16(audioBuffer); + const sampleRate = this.targetSampleRate; + const windowSize = Math.floor(sampleRate * 0.025); + const hopLength = Math.floor(sampleRate * 0.01); + + const mfccs: number[][] = []; + const numCoeffs = 13; + + for (let i = 0; i < int16Array.length - windowSize; i += hopLength) { + const frame = int16Array.slice(i, i + windowSize); + const coeffs = this.computeMFCC(frame, numCoeffs); + mfccs.push(coeffs); + } + + const zeroCrossingRate = this.computeZCR(int16Array); + const spectralCentroid = this.computeSpectralCentroid(int16Array); + const spectralRollOff = this.computeSpectralRollOff(int16Array); + + return { + mfccs, + zeroCrossingRate, + spectralCentroid, + spectralRollOff, + durationSec: int16Array.length / sampleRate, + }; + } + + private async runPythonPreprocess( + inputPath: string, + outputPath: string, + inputSampleRate: number + ): Promise { + return new Promise((resolve, reject) => { + const mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001"; + const proc = spawn("python3", [ + "-c", + ` +import urllib.request, json, sys +req = urllib.request.Request( + "${mlServiceUrl}/preprocess", + data=json.dumps({"input_path": "${inputPath}", "output_path": "${outputPath}", "input_sr": ${inputSampleRate}}).encode(), + headers={"Content-Type": "application/json"} +) +try: + with urllib.request.urlopen(req, timeout=30) as resp: + sys.exit(0) if resp.status == 200 else sys.exit(1) +except: + sys.exit(1) +`, + ]); + + proc.on("close", (code) => { + code === 0 ? resolve() : reject(new Error(`Python preprocess exited with code ${code}`)); + }); + }); + } + + private async jsFallback( + audioBuffer: Buffer, + inputSampleRate: number + ): Promise { + let processed = audioBuffer; + + if (inputSampleRate !== this.targetSampleRate) { + processed = this.resample(audioBuffer, inputSampleRate, this.targetSampleRate); + } + + processed = this.applyVAD(processed, this.targetSampleRate); + processed = this.normalizeAudio(processed); + + return { + audio: processed, + sampleRate: this.targetSampleRate, + channels: this.channels, + durationSec: processed.length / (this.targetSampleRate * this.channels * 2), + }; + } + + private resample(buffer: Buffer, fromRate: number, toRate: number): Buffer { + const int16 = this.bufferToInt16(buffer); + const ratio = fromRate / toRate; + const newLength = Math.round(int16.length / ratio); + const resampled: number[] = []; + + for (let i = 0; i < newLength; i++) { + const srcIdx = Math.floor(i * ratio); + const nextIdx = Math.min(srcIdx + 1, int16.length - 1); + const frac = (i * ratio) - srcIdx; + resampled.push(Math.round(int16[srcIdx] * (1 - frac) + int16[nextIdx] * frac)); + } + + return this.int16ToBuffer(resampled); + } + + private detectSampleRate(buffer: Buffer): number { + if (buffer.length < 44) return 16000; + + const fmtOffset = buffer.toString("ascii", 12, 16).trim() === "fmt " ? 16 : 40; + if (fmtOffset + 4 <= buffer.length) { + const sr = buffer.readUInt32LE(fmtOffset + 4); + if (sr >= 8000 && sr <= 48000) return sr; + } + return 16000; + } + + private bufferToInt16(buffer: Buffer): number[] { + const arr: number[] = new Array(buffer.length / 2); + for (let i = 0; i < arr.length; i++) { + arr[i] = buffer.readInt16LE(i * 2); + } + return arr; + } + + private int16ToBuffer(arr: number[]): Buffer { + const buf = Buffer.alloc(arr.length * 2); + for (let i = 0; i < arr.length; i++) { + buf.writeInt16LE(arr[i], i * 2); + } + return buf; + } + + private computeMFCC(frame: number[], numCoeffs: number): number[] { + const n = frame.length; + const fftSize = Math.pow(2, Math.ceil(Math.log2(n)) + 1); + const spectrum: number[] = new Array(fftSize / 2 + 1); + + for (let k = 0; k < spectrum.length; k++) { + let real = 0, imag = 0; + for (let n = 0; n < frame.length; n++) { + const angle = (2 * Math.PI * k * n) / fftSize; + real += frame[n] * Math.cos(angle); + imag -= frame[n] * Math.sin(angle); + } + spectrum[k] = Math.sqrt(real * real + imag * imag); + } + + const numFilters = 20; + const filterbank = this.createFilterbank(spectrum.length, numFilters); + const logEnergies: number[] = []; + + for (let m = 0; m < numFilters; m++) { + let energy = 0; + for (let k = 0; k < spectrum.length; k++) { + energy += filterbank[m][k] * spectrum[k] * spectrum[k]; + } + logEnergies.push(Math.log(Math.max(energy, 1e-10))); + } + + const mfccs: number[] = []; + for (let d = 0; d < numCoeffs; d++) { + let coeff = 0; + for (let m = 0; m < numFilters; m++) { + coeff += logEnergies[m] * Math.cos(((2 * m + 1) * (d + 1) * Math.PI) / (2 * numFilters)); + } + mfccs.push(coeff); + } + + return mfccs; + } + + private createFilterbank(numBins: number, numFilters: number): number[][] { + const filterbank: number[][] = Array.from({ length: numFilters }, () => + new Array(numBins).fill(0) + ); + const low = Math.floor(20 * numBins / 8000); + const high = Math.floor(5500 * numBins / 8000); + const spacing = (high - low) / (numFilters + 1); + + for (let m = 1; m <= numFilters; m++) { + const center = Math.floor(low + m * spacing); + const prev = Math.floor(low + (m - 1) * spacing); + const next = Math.floor(low + (m + 1) * spacing); + + for (let k = prev; k < center; k++) { + if (k > 0) filterbank[m - 1][k] = (k - prev) / (center - prev); + } + for (let k = center; k < next; k++) { + if (next - center > 0) filterbank[m - 1][k] = (next - k) / (next - center); + } + } + + return filterbank; + } + + private computeZCR(int16: number[]): number { + let crossings = 0; + for (let i = 1; i < int16.length; i++) { + if ((int16[i] > 0 && int16[i - 1] <= 0) || (int16[i] <= 0 && int16[i - 1] > 0)) { + crossings++; + } + } + return crossings / int16.length; + } + + private computeSpectralCentroid(int16: number[]): number { + const n = int16.length; + let num = 0, den = 0; + for (let i = 0; i < n; i++) { + num += i * int16[i] * int16[i]; + den += int16[i] * int16[i]; + } + return den > 0 ? num / den : 0; + } + + private computeSpectralRollOff(int16: number[]): number { + const n = int16.length; + let totalEnergy = 0; + for (let i = 0; i < n; i++) { + totalEnergy += int16[i] * int16[i]; + } + const threshold = totalEnergy * 0.85; + let cumulative = 0; + for (let i = 0; i < n; i++) { + cumulative += int16[i] * int16[i]; + if (cumulative >= threshold) return i; + } + return n - 1; + } + + private writeTempFile(content: Buffer, ext: string): string { + const file = join(tmpdir(), `vp_${randomBytes(8).toString("hex")}${ext}`); + require("fs").writeFileSync(file, content); + return file; + } + + private async readFile(path: string): Promise { + return require("fs").readFileSync(path); + } +} + +export interface PreprocessedAudio { + audio: Buffer; + sampleRate: number; + channels: number; + durationSec: number; +} + +export interface AudioFeatures { + mfccs: number[][]; + zeroCrossingRate: number; + spectralCentroid: number; + spectralRollOff: number; + durationSec: number; +} diff --git a/services/voiceprint/tsconfig.json b/services/voiceprint/tsconfig.json new file mode 100644 index 0000000..90d76d7 --- /dev/null +++ b/services/voiceprint/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"] +}