for first push
This commit is contained in:
@@ -16,6 +16,7 @@
|
|||||||
"@shieldai/db": "0.1.0",
|
"@shieldai/db": "0.1.0",
|
||||||
"@shieldai/types": "0.1.0",
|
"@shieldai/types": "0.1.0",
|
||||||
"fastify": "^5.2.0",
|
"fastify": "^5.2.0",
|
||||||
"@shieldai/darkwatch": "0.1.0"
|
"@shieldai/darkwatch": "0.1.0",
|
||||||
|
"@shieldai/voiceprint": "0.1.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,3 +13,10 @@ export function darkwatchRoutes(fastify: FastifyInstance) {
|
|||||||
root.register(scans, { prefix: "/scan" });
|
root.register(scans, { prefix: "/scan" });
|
||||||
}, { prefix: "/api/v1/darkwatch" });
|
}, { prefix: "/api/v1/darkwatch" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function voiceprintRoutes(fastify: FastifyInstance) {
|
||||||
|
fastify.register(async (root) => {
|
||||||
|
const voiceprint = (await import("./voiceprint.routes")).voiceprintRoutes;
|
||||||
|
root.register(voiceprint);
|
||||||
|
}, { prefix: "/api/v1/voiceprint" });
|
||||||
|
}
|
||||||
|
|||||||
94
packages/api/src/routes/voiceprint.routes.ts
Normal file
94
packages/api/src/routes/voiceprint.routes.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import { FastifyInstance } from "fastify";
|
||||||
|
import { VoiceEnrollmentService } from "@shieldai/voiceprint";
|
||||||
|
import { AnalysisService } from "@shieldai/voiceprint";
|
||||||
|
import { BatchAnalysisService } from "@shieldai/voiceprint";
|
||||||
|
|
||||||
|
export function voiceprintRoutes(fastify: FastifyInstance) {
|
||||||
|
const enrollmentService = new VoiceEnrollmentService();
|
||||||
|
const analysisService = new AnalysisService();
|
||||||
|
const batchService = new BatchAnalysisService();
|
||||||
|
|
||||||
|
fastify.post("/enroll", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const body = request.body as { label: string; audio: string; sampleRate?: number };
|
||||||
|
|
||||||
|
const audioBuffer = Buffer.from(body.audio, "base64");
|
||||||
|
const enrollment = await enrollmentService.enroll(
|
||||||
|
{ label: body.label, audioBuffer, sampleRate: body.sampleRate },
|
||||||
|
userId
|
||||||
|
);
|
||||||
|
return reply.code(201).send(enrollment);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get("/enrollments", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const enrollments = await enrollmentService.listEnrollments(userId);
|
||||||
|
return reply.send(enrollments);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.delete("/enrollments/:id", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const enrollmentId = (request.params as { id: string }).id;
|
||||||
|
const result = await enrollmentService.removeEnrollment(userId, enrollmentId);
|
||||||
|
return reply.send({ removed: result });
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.post("/analyze", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const body = request.body as { audio: string; sampleRate?: number; analysisType?: string };
|
||||||
|
const audioBuffer = Buffer.from(body.audio, "base64");
|
||||||
|
|
||||||
|
const result = await analysisService.analyze(
|
||||||
|
{ audioBuffer, sampleRate: body.sampleRate, analysisType: body.analysisType },
|
||||||
|
userId
|
||||||
|
);
|
||||||
|
return reply.code(201).send(result);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get("/results/:id", async (request, reply) => {
|
||||||
|
const jobId = (request.params as { id: string }).id;
|
||||||
|
const result = await analysisService.getResult(jobId);
|
||||||
|
|
||||||
|
if (!result) return reply.code(404).send({ error: "Analysis result not found" });
|
||||||
|
return reply.send(result);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get("/results", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const limit = parseInt((request.query as { limit?: string }).limit || "20", 10);
|
||||||
|
const results = await analysisService.getUserResults(userId, limit);
|
||||||
|
return reply.send(results);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.post("/batch", async (request, reply) => {
|
||||||
|
const userId = (request.user as { id: string })?.id;
|
||||||
|
if (!userId) return reply.code(401).send({ error: "User not authenticated" });
|
||||||
|
|
||||||
|
const body = request.body as {
|
||||||
|
files: Array<{ name: string; audio: string; sampleRate?: number }>;
|
||||||
|
analysisType?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const audioBuffers = body.files.map((f) => ({
|
||||||
|
name: f.name,
|
||||||
|
buffer: Buffer.from(f.audio, "base64"),
|
||||||
|
sampleRate: f.sampleRate,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const result = await batchService.analyzeBatch(
|
||||||
|
{ audioBuffers, analysisType: body.analysisType },
|
||||||
|
userId
|
||||||
|
);
|
||||||
|
return reply.code(201).send(result);
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@ import Fastify from "fastify";
|
|||||||
import cors from "@fastify/cors";
|
import cors from "@fastify/cors";
|
||||||
import helmet from "@fastify/helmet";
|
import helmet from "@fastify/helmet";
|
||||||
import sensible from "@fastify/sensible";
|
import sensible from "@fastify/sensible";
|
||||||
import { darkwatchRoutes } from "./routes";
|
import { darkwatchRoutes, voiceprintRoutes } from "./routes";
|
||||||
|
|
||||||
const app = Fastify({
|
const app = Fastify({
|
||||||
logger: {
|
logger: {
|
||||||
@@ -16,6 +16,7 @@ async function bootstrap() {
|
|||||||
await app.register(sensible);
|
await app.register(sensible);
|
||||||
|
|
||||||
await app.register(darkwatchRoutes);
|
await app.register(darkwatchRoutes);
|
||||||
|
await app.register(voiceprintRoutes);
|
||||||
|
|
||||||
app.get("/health", async () => ({ status: "ok", timestamp: new Date().toISOString() }));
|
app.get("/health", async () => ({ status: "ok", timestamp: new Date().toISOString() }));
|
||||||
|
|
||||||
|
|||||||
316
packages/core/src/audio/webrtc/audio-pipeline.ts
Normal file
316
packages/core/src/audio/webrtc/audio-pipeline.ts
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
/**
|
||||||
|
* Audio Processing Pipeline for Real-Time Analysis
|
||||||
|
* Coordinates WebRTC stream capture with VoicePreprocess for continuous analysis
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { WebRTCStreamCapture, createWebRTCCapture } from './stream-capture';
|
||||||
|
|
||||||
|
// Type definitions for real-time processing
|
||||||
|
export interface AudioChunk {
|
||||||
|
id: string;
|
||||||
|
timestamp: number;
|
||||||
|
data: Float32Array;
|
||||||
|
duration: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceprintResult {
|
||||||
|
chunkId: string;
|
||||||
|
timestamp: number;
|
||||||
|
features: AudioFeatures;
|
||||||
|
embedding: number[];
|
||||||
|
confidence: number;
|
||||||
|
status: 'complete' | 'error';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Audio chunk configuration
|
||||||
|
export interface AudioChunkConfig {
|
||||||
|
chunkDuration: number;
|
||||||
|
overlapDuration: number;
|
||||||
|
sampleRate: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preprocessor interfaces (copied from AudioPreprocessor for standalone usage)
|
||||||
|
export interface PreprocessedAudio {
|
||||||
|
audio: Buffer;
|
||||||
|
sampleRate: number;
|
||||||
|
channels: number;
|
||||||
|
durationSec: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AudioFeatures {
|
||||||
|
mfccs: number[][];
|
||||||
|
zeroCrossingRate: number;
|
||||||
|
spectralCentroid: number;
|
||||||
|
spectralRollOff: number;
|
||||||
|
durationSec: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PipelineConfig {
|
||||||
|
chunkDuration: number; // 5000ms
|
||||||
|
overlapDuration: number; // 1000ms
|
||||||
|
sampleRate: number; // 16000 Hz
|
||||||
|
onAnalysisComplete?: (result: VoiceprintResult) => void;
|
||||||
|
onStreamError?: (error: Error) => void;
|
||||||
|
onError?: (error: Error) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_CONFIG: PipelineConfig = {
|
||||||
|
chunkDuration: 5000,
|
||||||
|
overlapDuration: 1000,
|
||||||
|
sampleRate: 16000
|
||||||
|
};
|
||||||
|
|
||||||
|
export class AudioPipeline {
|
||||||
|
private streamCapture: WebRTCStreamCapture | null = null;
|
||||||
|
private isRunning: boolean = false;
|
||||||
|
private chunkBuffer: AudioChunk[] = [];
|
||||||
|
private maxBufferLength: number = 10;
|
||||||
|
|
||||||
|
private onChunkReady?: (chunk: AudioChunk) => void;
|
||||||
|
private onAnalysisComplete?: (result: VoiceprintResult) => void;
|
||||||
|
private onPipelineError?: (error: Error) => void;
|
||||||
|
|
||||||
|
constructor(private config: PipelineConfig = DEFAULT_CONFIG) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize pipeline components
|
||||||
|
*/
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
// Initialize WebRTC stream capture
|
||||||
|
this.streamCapture = createWebRTCCapture({
|
||||||
|
chunkDuration: this.config.chunkDuration,
|
||||||
|
overlapDuration: this.config.overlapDuration,
|
||||||
|
sampleRate: this.config.sampleRate
|
||||||
|
});
|
||||||
|
|
||||||
|
// Connect WebRTC chunk processing
|
||||||
|
this.streamCapture.onChunkReady = (rawAudio, timestamp) => {
|
||||||
|
this.handleRawChunk(rawAudio, timestamp);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Connect stream error handling
|
||||||
|
this.streamCapture.onStreamError = (error) => {
|
||||||
|
this.onPipelineError?.(error);
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('[Pipeline] Initialized');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process raw audio chunk from WebRTC
|
||||||
|
*/
|
||||||
|
private async handleRawChunk(rawAudio: Float32Array, timestamp: number): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Create audio chunk
|
||||||
|
const chunk: AudioChunk = {
|
||||||
|
id: `chunk-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||||
|
timestamp,
|
||||||
|
data: rawAudio,
|
||||||
|
duration: this.config.chunkDuration / 1000
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add to buffer
|
||||||
|
this.chunkBuffer.push(chunk);
|
||||||
|
|
||||||
|
// Maintain buffer size
|
||||||
|
if (this.chunkBuffer.length > this.maxBufferLength) {
|
||||||
|
const removed = this.chunkBuffer.shift();
|
||||||
|
if (removed) {
|
||||||
|
// Process removed chunk with overlap
|
||||||
|
await this.processChunk(removed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process current chunk
|
||||||
|
await this.processChunk(chunk);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error instanceof Error ? error : new Error(String(error));
|
||||||
|
console.error('[Pipeline] Error processing chunk:', err);
|
||||||
|
this.onPipelineError?.(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Float32Array to 16-bit PCM Buffer
|
||||||
|
*/
|
||||||
|
private float32ArrayToBuffer(floatData: Float32Array): Buffer {
|
||||||
|
const int16Array = new Int16Array(floatData.length);
|
||||||
|
for (let i = 0; i < floatData.length; i++) {
|
||||||
|
// Clamp and scale to 16-bit range
|
||||||
|
const clamped = Math.max(-1, Math.min(1, floatData[i]));
|
||||||
|
int16Array[i] = Math.round(clamped * 32767);
|
||||||
|
}
|
||||||
|
return Buffer.from(int16Array.buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a single audio chunk (mock implementation)
|
||||||
|
*/
|
||||||
|
private async processChunk(chunk: AudioChunk): Promise<VoiceprintResult> {
|
||||||
|
try {
|
||||||
|
// Generate mock features
|
||||||
|
// Convert Float32Array to Buffer properly
|
||||||
|
const int16Array = new Int16Array(chunk.data.length);
|
||||||
|
for (let i = 0; i < chunk.data.length; i++) {
|
||||||
|
const clamped = Math.max(-1, Math.min(1, chunk.data[i]));
|
||||||
|
int16Array[i] = Math.round(clamped * 32767);
|
||||||
|
}
|
||||||
|
const dataBuffer = Buffer.from(int16Array.buffer);
|
||||||
|
const features: AudioFeatures = await extractMockFeatures(dataBuffer, chunk.duration);
|
||||||
|
|
||||||
|
// Generate embedding (placeholder - would use actual embedding service)
|
||||||
|
const embedding = this.generatePlaceholderEmbedding(features);
|
||||||
|
|
||||||
|
// Create result
|
||||||
|
const result: VoiceprintResult = {
|
||||||
|
chunkId: chunk.id,
|
||||||
|
timestamp: chunk.timestamp,
|
||||||
|
features,
|
||||||
|
embedding,
|
||||||
|
confidence: 0.95, // Placeholder - would come from actual analysis
|
||||||
|
status: 'complete'
|
||||||
|
};
|
||||||
|
|
||||||
|
// Emit result
|
||||||
|
this.onAnalysisComplete?.(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error instanceof Error ? error : new Error(String(error));
|
||||||
|
console.error('[Pipeline] Preprocessing error:', err);
|
||||||
|
this.onPipelineError?.(err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate placeholder embedding (would use actual embedding service)
|
||||||
|
*/
|
||||||
|
private generatePlaceholderEmbedding(features: AudioFeatures): number[] {
|
||||||
|
// Placeholder embedding - would be replaced with actual embedding generation
|
||||||
|
const embedding: number[] = [];
|
||||||
|
|
||||||
|
// Use spectral features as proxy for embedding
|
||||||
|
embedding.push(features.spectralCentroid);
|
||||||
|
embedding.push(features.spectralRollOff);
|
||||||
|
embedding.push(features.zeroCrossingRate);
|
||||||
|
|
||||||
|
// MFCC summary (first few coefficients)
|
||||||
|
if (features.mfccs && features.mfccs.length > 0) {
|
||||||
|
for (let i = 0; i < Math.min(5, features.mfccs[0].length); i++) {
|
||||||
|
embedding.push(features.mfccs[0][i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize and pad
|
||||||
|
while (embedding.length < 128) {
|
||||||
|
embedding.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the real-time analysis pipeline
|
||||||
|
*/
|
||||||
|
async start(): Promise<void> {
|
||||||
|
if (this.isRunning) {
|
||||||
|
console.log('[Pipeline] Already running');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.initialize();
|
||||||
|
if (this.streamCapture) {
|
||||||
|
await this.streamCapture.start();
|
||||||
|
}
|
||||||
|
this.isRunning = true;
|
||||||
|
|
||||||
|
console.log('[Pipeline] Real-time analysis started');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error instanceof Error ? error : new Error(String(error));
|
||||||
|
console.error('[Pipeline] Failed to start:', err);
|
||||||
|
this.onPipelineError?.(err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the pipeline
|
||||||
|
*/
|
||||||
|
async stop(): Promise<void> {
|
||||||
|
if (!this.isRunning) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Drain remaining buffer
|
||||||
|
while (this.chunkBuffer.length > 0) {
|
||||||
|
const chunk = this.chunkBuffer.shift();
|
||||||
|
if (chunk) {
|
||||||
|
await this.processChunk(chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop WebRTC capture
|
||||||
|
if (this.streamCapture) {
|
||||||
|
this.streamCapture.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isRunning = false;
|
||||||
|
console.log('[Pipeline] Stopped');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error instanceof Error ? error : new Error(String(error));
|
||||||
|
console.error('[Pipeline] Error stopping:', err);
|
||||||
|
this.onPipelineError?.(err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get pipeline status
|
||||||
|
*/
|
||||||
|
getStatus(): {
|
||||||
|
isRunning: boolean;
|
||||||
|
bufferLength: number;
|
||||||
|
streamActive: boolean;
|
||||||
|
} {
|
||||||
|
return {
|
||||||
|
isRunning: this.isRunning,
|
||||||
|
bufferLength: this.chunkBuffer.length,
|
||||||
|
streamActive: this.streamCapture?.isRecording || false
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract mock features from audio buffer
|
||||||
|
*/
|
||||||
|
async function extractMockFeatures(buffer: Buffer, duration: number): Promise<AudioFeatures> {
|
||||||
|
// Simple mock feature extraction for demonstration
|
||||||
|
// In production, this would use actual audio processing
|
||||||
|
const numMfccs = 13;
|
||||||
|
const mfccs: number[][] = [];
|
||||||
|
|
||||||
|
// Generate mock MFCCs based on buffer hash
|
||||||
|
const bufferHash = buffer.reduce((acc, byte) => acc + byte, 0);
|
||||||
|
for (let i = 0; i < numMfccs; i++) {
|
||||||
|
const coefficients: number[] = [];
|
||||||
|
for (let j = 0; j < 20; j++) {
|
||||||
|
coefficients.push(Math.abs(Math.sin((i * j + bufferHash) * 0.1)) * 0.5 + 0.25);
|
||||||
|
}
|
||||||
|
mfccs.push(coefficients);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
mfccs,
|
||||||
|
zeroCrossingRate: 0.02 + Math.random() * 0.03,
|
||||||
|
spectralCentroid: 1000 + Math.random() * 2000,
|
||||||
|
spectralRollOff: 3000 + Math.random() * 1000,
|
||||||
|
durationSec: duration
|
||||||
|
};
|
||||||
|
}
|
||||||
184
packages/core/src/audio/webrtc/stream-capture.ts
Normal file
184
packages/core/src/audio/webrtc/stream-capture.ts
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
/**
|
||||||
|
* WebRTC Audio Stream Capture
|
||||||
|
* Captures audio from screen/audio sharing using WebRTC APIs
|
||||||
|
* Implements 5-second chunks with 1-second overlap for sliding window analysis
|
||||||
|
*/
|
||||||
|
|
||||||
|
interface WebRTCStreamConfig {
|
||||||
|
chunkDuration: number; // 5000ms
|
||||||
|
overlapDuration: number; // 1000ms
|
||||||
|
sampleRate: number; // 16000 Hz for voiceprint compatibility
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_CONFIG: WebRTCStreamConfig = {
|
||||||
|
chunkDuration: 5000,
|
||||||
|
overlapDuration: 1000,
|
||||||
|
sampleRate: 16000
|
||||||
|
};
|
||||||
|
|
||||||
|
export class WebRTCStreamCapture {
|
||||||
|
private stream: MediaStream | null = null;
|
||||||
|
private audioContext: AudioContext | null = null;
|
||||||
|
private analyser: AnalyserNode | null = null;
|
||||||
|
private source: MediaStreamAudioSourceNode | null = null;
|
||||||
|
private _isRecording: boolean = false;
|
||||||
|
|
||||||
|
private buffer: Float32Array = new Float32Array(0);
|
||||||
|
public onChunkReady?: (chunk: Float32Array, timestamp: number) => void;
|
||||||
|
public onStreamError?: (error: Error) => void;
|
||||||
|
|
||||||
|
constructor(private config: WebRTCStreamConfig = DEFAULT_CONFIG) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if currently recording
|
||||||
|
*/
|
||||||
|
public isRecording: boolean = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start capturing audio from screen/audio sharing
|
||||||
|
*/
|
||||||
|
async start(): Promise<void> {
|
||||||
|
if (this.isRecording) {
|
||||||
|
console.log('[WebRTC] Already recording');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Request screen/audio capture with audio
|
||||||
|
this.stream = await navigator.mediaDevices.getDisplayMedia({
|
||||||
|
video: true, // Required for audio capture
|
||||||
|
audio: true
|
||||||
|
});
|
||||||
|
|
||||||
|
// Stop any existing tracks
|
||||||
|
this.stream.getTracks().forEach(track => track.stop());
|
||||||
|
|
||||||
|
// Create audio context and analyser
|
||||||
|
this.audioContext = new AudioContext({
|
||||||
|
sampleRate: this.config.sampleRate
|
||||||
|
});
|
||||||
|
this.analyser = this.audioContext.createAnalyser();
|
||||||
|
this.analyser.fftSize = 2048;
|
||||||
|
|
||||||
|
// Connect stream to audio graph
|
||||||
|
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
||||||
|
this.source.connect(this.analyser);
|
||||||
|
|
||||||
|
this.isRecording = true;
|
||||||
|
console.log('[WebRTC] Stream capture started');
|
||||||
|
|
||||||
|
// Start processing loop
|
||||||
|
this.processAudio();
|
||||||
|
|
||||||
|
// Handle stream termination
|
||||||
|
this.stream.getVideoTracks()[0].onended = () => {
|
||||||
|
console.log('[WebRTC] User stopped sharing');
|
||||||
|
this.stop();
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error instanceof Error ? error : new Error(String(error));
|
||||||
|
console.error('[WebRTC] Failed to start stream capture:', err);
|
||||||
|
this.onStreamError?.(err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process audio in real-time with sliding window
|
||||||
|
*/
|
||||||
|
private processAudio(): void {
|
||||||
|
if (!this.audioContext || !this.analyser || !this.isRecording) return;
|
||||||
|
|
||||||
|
if (!this.analyser) return;
|
||||||
|
|
||||||
|
const bufferLength = this.analyser.fftSize;
|
||||||
|
const buffer = new Float32Array(bufferLength);
|
||||||
|
|
||||||
|
const processFrame = () => {
|
||||||
|
if (!this.isRecording) return;
|
||||||
|
|
||||||
|
if (!this.analyser) return;
|
||||||
|
|
||||||
|
this.analyser.getFloatTimeDomainData(buffer);
|
||||||
|
|
||||||
|
// Get current timestamp
|
||||||
|
const timestamp = this.audioContext?.currentTime ?? 0;
|
||||||
|
|
||||||
|
// Extract audio data for current frame
|
||||||
|
// Use first 512 samples for voice analysis (reduced for faster processing)
|
||||||
|
const audioData = buffer.slice(0, 512);
|
||||||
|
|
||||||
|
// Prepare chunk for analysis
|
||||||
|
if (audioData.length > 0) {
|
||||||
|
this.onChunkReady?.(audioData, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Schedule next frame with overlap
|
||||||
|
const frameDuration = this.config.chunkDuration - this.config.overlapDuration;
|
||||||
|
setTimeout(processFrame, frameDuration);
|
||||||
|
};
|
||||||
|
|
||||||
|
processFrame();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop audio capture
|
||||||
|
*/
|
||||||
|
stop(): void {
|
||||||
|
this._isRecording = false;
|
||||||
|
|
||||||
|
if (this.stream) {
|
||||||
|
this.stream.getTracks().forEach(track => track.stop());
|
||||||
|
this.stream = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.source) {
|
||||||
|
this.source.disconnect();
|
||||||
|
this.source = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.analyser) {
|
||||||
|
this.analyser.disconnect();
|
||||||
|
this.analyser = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.audioContext) {
|
||||||
|
this.audioContext.close();
|
||||||
|
this.audioContext = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[WebRTC] Stream capture stopped');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get stream metadata
|
||||||
|
*/
|
||||||
|
getMetadata(): {
|
||||||
|
isActive: boolean;
|
||||||
|
sampleRate: number;
|
||||||
|
channels: number;
|
||||||
|
} {
|
||||||
|
if (!this.stream) {
|
||||||
|
return { isActive: false, sampleRate: 0, channels: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioTrack = this.stream.getAudioTracks()[0];
|
||||||
|
if (!audioTrack) {
|
||||||
|
return { isActive: true, sampleRate: this.config.sampleRate, channels: 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
isActive: true,
|
||||||
|
sampleRate: this.config.sampleRate,
|
||||||
|
channels: audioTrack.getSettings().channelCount || 1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory function for creating stream capture with auto-start
|
||||||
|
*/
|
||||||
|
export function createWebRTCCapture(config?: WebRTCStreamConfig): WebRTCStreamCapture {
|
||||||
|
return new WebRTCStreamCapture(config || DEFAULT_CONFIG);
|
||||||
|
}
|
||||||
22
packages/core/tsconfig.json
Normal file
22
packages/core/tsconfig.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2020",
|
||||||
|
"module": "commonjs",
|
||||||
|
"lib": ["ES2020", "DOM"],
|
||||||
|
"outDir": "./dist",
|
||||||
|
"rootDir": "./src",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"declaration": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"baseUrl": ".",
|
||||||
|
"paths": {
|
||||||
|
"@/*": ["src/*"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"],
|
||||||
|
"exclude": ["node_modules", "dist"]
|
||||||
|
}
|
||||||
@@ -57,6 +57,25 @@ enum DataSource {
|
|||||||
HONEYPOT
|
HONEYPOT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum AnalysisJobStatus {
|
||||||
|
PENDING
|
||||||
|
RUNNING
|
||||||
|
COMPLETED
|
||||||
|
FAILED
|
||||||
|
}
|
||||||
|
|
||||||
|
enum AnalysisType {
|
||||||
|
SYNTHETIC_DETECTION
|
||||||
|
VOICE_MATCH
|
||||||
|
BATCH
|
||||||
|
}
|
||||||
|
|
||||||
|
enum DetectionVerdict {
|
||||||
|
NATURAL
|
||||||
|
SYNTHETIC
|
||||||
|
UNCERTAIN
|
||||||
|
}
|
||||||
|
|
||||||
model User {
|
model User {
|
||||||
id String @id @default(uuid())
|
id String @id @default(uuid())
|
||||||
email String @unique
|
email String @unique
|
||||||
@@ -138,3 +157,54 @@ model ScanJob {
|
|||||||
@@index([userId, status])
|
@@index([userId, status])
|
||||||
@@index([createdAt])
|
@@index([createdAt])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model VoiceEnrollment {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
userId String
|
||||||
|
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||||
|
label String
|
||||||
|
embeddingVector Float[]
|
||||||
|
embeddingDim Int @default(192)
|
||||||
|
audioFilePath String?
|
||||||
|
sampleRate Int @default(16000)
|
||||||
|
durationSec Float?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
@@index([userId])
|
||||||
|
@@index([embeddingDim])
|
||||||
|
}
|
||||||
|
|
||||||
|
model AnalysisJob {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
userId String
|
||||||
|
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||||
|
analysisType AnalysisType
|
||||||
|
audioFilePath String
|
||||||
|
status AnalysisJobStatus @default(PENDING)
|
||||||
|
result AnalysisResult?
|
||||||
|
errorMessage String?
|
||||||
|
completedAt DateTime?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
@@index([userId, status])
|
||||||
|
@@index([createdAt])
|
||||||
|
}
|
||||||
|
|
||||||
|
model AnalysisResult {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
analysisJobId String @unique
|
||||||
|
analysisJob AnalysisJob @relation(fields: [analysisJobId], references: [id], onDelete: Cascade)
|
||||||
|
syntheticScore Float
|
||||||
|
verdict DetectionVerdict
|
||||||
|
matchedEnrollmentId String?
|
||||||
|
matchedSimilarity Float?
|
||||||
|
confidence Float
|
||||||
|
processingTimeMs Int
|
||||||
|
modelVersion String?
|
||||||
|
metadata String?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
@@index([analysisJobId])
|
||||||
|
@@index([verdict])
|
||||||
|
}
|
||||||
|
|||||||
54
packages/jobs/src/voiceprint.jobs.ts
Normal file
54
packages/jobs/src/voiceprint.jobs.ts
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import { Queue, Worker } from "bullmq";
|
||||||
|
import { Redis } from "ioredis";
|
||||||
|
import { AnalysisService } from "@shieldai/voiceprint";
|
||||||
|
|
||||||
|
const redisUrl = process.env.REDIS_URL || "redis://localhost:6379";
|
||||||
|
const connection = new Redis(redisUrl);
|
||||||
|
|
||||||
|
const analysisQueue = new Queue("voiceprint-analysis", { connection });
|
||||||
|
|
||||||
|
const analysisWorker = new Worker(
|
||||||
|
"voiceprint-analysis",
|
||||||
|
async (job) => {
|
||||||
|
const { userId, audioBuffer, sampleRate, analysisType } = job.data;
|
||||||
|
const analysisService = new AnalysisService();
|
||||||
|
const result = await analysisService.analyze(
|
||||||
|
{
|
||||||
|
audioBuffer: Buffer.from(audioBuffer, "base64"),
|
||||||
|
sampleRate,
|
||||||
|
analysisType,
|
||||||
|
},
|
||||||
|
userId
|
||||||
|
);
|
||||||
|
return { jobId: result.jobId, completedAt: new Date().toISOString() };
|
||||||
|
},
|
||||||
|
{ connection, concurrency: 2 }
|
||||||
|
);
|
||||||
|
|
||||||
|
analysisWorker.on("completed", (job) => {
|
||||||
|
console.log(`[VoicePrint] Job ${job.id} completed: ${JSON.stringify(job.returnvalue)}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
analysisWorker.on("failed", (job, err) => {
|
||||||
|
console.error(`[VoicePrint] Job ${job.id} failed: ${err.message}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function addAnalysisJob(
|
||||||
|
userId: string,
|
||||||
|
audioBuffer: Buffer,
|
||||||
|
sampleRate?: number,
|
||||||
|
analysisType?: string
|
||||||
|
) {
|
||||||
|
return analysisQueue.add("analyze", {
|
||||||
|
userId,
|
||||||
|
audioBuffer: audioBuffer.toString("base64"),
|
||||||
|
sampleRate,
|
||||||
|
analysisType,
|
||||||
|
}, {
|
||||||
|
attempts: 3,
|
||||||
|
backoff: { type: "exponential", delay: 5000 },
|
||||||
|
jobId: `vp-${userId}-${Date.now()}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("[VoicePrint] Analysis worker started");
|
||||||
@@ -82,3 +82,62 @@ export interface AlertInput {
|
|||||||
channel: AlertChannel;
|
channel: AlertChannel;
|
||||||
dedupKey: string;
|
dedupKey: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const AnalysisJobStatus = {
|
||||||
|
PENDING: "PENDING",
|
||||||
|
RUNNING: "RUNNING",
|
||||||
|
COMPLETED: "COMPLETED",
|
||||||
|
FAILED: "FAILED",
|
||||||
|
} as const;
|
||||||
|
export type AnalysisJobStatus = (typeof AnalysisJobStatus)[keyof typeof AnalysisJobStatus];
|
||||||
|
|
||||||
|
export const AnalysisType = {
|
||||||
|
SYNTHETIC_DETECTION: "SYNTHETIC_DETECTION",
|
||||||
|
VOICE_MATCH: "VOICE_MATCH",
|
||||||
|
BATCH: "BATCH",
|
||||||
|
} as const;
|
||||||
|
export type AnalysisType = (typeof AnalysisType)[keyof typeof AnalysisType];
|
||||||
|
|
||||||
|
export const DetectionVerdict = {
|
||||||
|
NATURAL: "NATURAL",
|
||||||
|
SYNTHETIC: "SYNTHETIC",
|
||||||
|
UNCERTAIN: "UNCERTAIN",
|
||||||
|
} as const;
|
||||||
|
export type DetectionVerdict = (typeof DetectionVerdict)[keyof typeof DetectionVerdict];
|
||||||
|
|
||||||
|
export interface VoiceEnrollmentInput {
|
||||||
|
label: string;
|
||||||
|
audioBuffer: Buffer;
|
||||||
|
sampleRate?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AnalyzeAudioInput {
|
||||||
|
audioBuffer: Buffer;
|
||||||
|
sampleRate?: number;
|
||||||
|
analysisType?: AnalysisType;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BatchAnalyzeInput {
|
||||||
|
audioBuffers: Array<{ name: string; buffer: Buffer; sampleRate?: number }>;
|
||||||
|
analysisType?: AnalysisType;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AnalysisResultOutput {
|
||||||
|
jobId: string;
|
||||||
|
syntheticScore: number;
|
||||||
|
verdict: DetectionVerdict;
|
||||||
|
confidence: number;
|
||||||
|
matchedEnrollmentId?: string;
|
||||||
|
matchedSimilarity?: number;
|
||||||
|
processingTimeMs: number;
|
||||||
|
modelVersion?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceEnrollmentOutput {
|
||||||
|
id: string;
|
||||||
|
label: string;
|
||||||
|
embeddingDim: number;
|
||||||
|
sampleRate: number;
|
||||||
|
durationSec?: number;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|||||||
19
services/voiceprint/package.json
Normal file
19
services/voiceprint/package.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"name": "@shieldai/voiceprint",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"main": "./dist/index.js",
|
||||||
|
"types": "./dist/index.js",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
|
"test": "vitest run",
|
||||||
|
"lint": "eslint src/"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@shieldai/db": "0.1.0",
|
||||||
|
"@shieldai/types": "0.1.0",
|
||||||
|
"node-cache": "^5.1.2"
|
||||||
|
},
|
||||||
|
"exports": {
|
||||||
|
".": "./src/index.ts"
|
||||||
|
}
|
||||||
|
}
|
||||||
183
services/voiceprint/src/analysis/AnalysisService.ts
Normal file
183
services/voiceprint/src/analysis/AnalysisService.ts
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
import prisma from "@shieldai/db";
|
||||||
|
import { AudioPreprocessor, AudioFeatures } from "../preprocessor/AudioPreprocessor";
|
||||||
|
import { EmbeddingService, EmbeddingOutput } from "../embedding/EmbeddingService";
|
||||||
|
import { VoiceEnrollmentService } from "../enrollment/VoiceEnrollmentService";
|
||||||
|
import {
|
||||||
|
AnalyzeAudioInput,
|
||||||
|
AnalysisJobStatus,
|
||||||
|
AnalysisType,
|
||||||
|
DetectionVerdict,
|
||||||
|
AnalysisResultOutput,
|
||||||
|
} from "@shieldai/types";
|
||||||
|
|
||||||
|
export class AnalysisService {
|
||||||
|
private preprocessor: AudioPreprocessor;
|
||||||
|
private embeddingService: EmbeddingService;
|
||||||
|
private enrollmentService: VoiceEnrollmentService;
|
||||||
|
private readonly syntheticThreshold = 0.7;
|
||||||
|
private readonly uncertainThreshold = 0.4;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.preprocessor = new AudioPreprocessor();
|
||||||
|
this.embeddingService = new EmbeddingService();
|
||||||
|
this.enrollmentService = new VoiceEnrollmentService();
|
||||||
|
}
|
||||||
|
|
||||||
|
async analyze(input: AnalyzeAudioInput, userId: string): Promise<AnalysisResultOutput> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
const job = await prisma.analysisJob.create({
|
||||||
|
data: {
|
||||||
|
userId,
|
||||||
|
analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION,
|
||||||
|
audioFilePath: `voiceprint/${userId}/${Date.now()}.wav`,
|
||||||
|
status: AnalysisJobStatus.RUNNING,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate);
|
||||||
|
const features = await this.preprocessor.extractFeatures(preprocessed.audio);
|
||||||
|
const embedding = await this.embeddingService.extract(preprocessed.audio);
|
||||||
|
|
||||||
|
const syntheticScore = await this.classifySynthetic(features, embedding);
|
||||||
|
const verdict = this.determineVerdict(syntheticScore);
|
||||||
|
const confidence = this.computeConfidence(syntheticScore, verdict);
|
||||||
|
|
||||||
|
let matchedEnrollmentId: string | undefined;
|
||||||
|
let matchedSimilarity: number | undefined;
|
||||||
|
|
||||||
|
if (input.analysisType === AnalysisType.VOICE_MATCH) {
|
||||||
|
const match = await this.enrollmentService.matchVoice(input.audioBuffer, userId);
|
||||||
|
if (match) {
|
||||||
|
matchedEnrollmentId = match.enrollmentId;
|
||||||
|
matchedSimilarity = match.similarity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const processingTimeMs = Date.now() - startTime;
|
||||||
|
|
||||||
|
const result = await prisma.analysisResult.create({
|
||||||
|
data: {
|
||||||
|
analysisJobId: job.id,
|
||||||
|
syntheticScore,
|
||||||
|
verdict,
|
||||||
|
confidence,
|
||||||
|
processingTimeMs,
|
||||||
|
matchedEnrollmentId,
|
||||||
|
matchedSimilarity,
|
||||||
|
modelVersion: this.embeddingService.getModelVersion(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await prisma.analysisJob.update({
|
||||||
|
where: { id: job.id },
|
||||||
|
data: {
|
||||||
|
status: AnalysisJobStatus.COMPLETED,
|
||||||
|
completedAt: new Date(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
jobId: job.id,
|
||||||
|
syntheticScore: result.syntheticScore,
|
||||||
|
verdict: result.verdict,
|
||||||
|
confidence: result.confidence,
|
||||||
|
matchedEnrollmentId: result.matchedEnrollmentId || undefined,
|
||||||
|
matchedSimilarity: result.matchedSimilarity || undefined,
|
||||||
|
processingTimeMs: result.processingTimeMs,
|
||||||
|
modelVersion: result.modelVersion || undefined,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : "Analysis failed";
|
||||||
|
await prisma.analysisJob.update({
|
||||||
|
where: { id: job.id },
|
||||||
|
data: {
|
||||||
|
status: AnalysisJobStatus.FAILED,
|
||||||
|
errorMessage: message,
|
||||||
|
completedAt: new Date(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getResult(jobId: string): Promise<AnalysisResultOutput | null> {
|
||||||
|
const job = await prisma.analysisJob.findUnique({
|
||||||
|
where: { id: jobId },
|
||||||
|
include: { result: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!job || !job.result) return null;
|
||||||
|
|
||||||
|
const r = job.result;
|
||||||
|
return {
|
||||||
|
jobId,
|
||||||
|
syntheticScore: r.syntheticScore,
|
||||||
|
verdict: r.verdict,
|
||||||
|
confidence: r.confidence,
|
||||||
|
matchedEnrollmentId: r.matchedEnrollmentId || undefined,
|
||||||
|
matchedSimilarity: r.matchedSimilarity || undefined,
|
||||||
|
processingTimeMs: r.processingTimeMs,
|
||||||
|
modelVersion: r.modelVersion || undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async getUserResults(userId: string, limit: number = 20): Promise<AnalysisResultOutput[]> {
|
||||||
|
const jobs = await prisma.analysisJob.findMany({
|
||||||
|
where: { userId, status: AnalysisJobStatus.COMPLETED },
|
||||||
|
orderBy: { createdAt: "desc" },
|
||||||
|
take: limit,
|
||||||
|
include: { result: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
return jobs
|
||||||
|
.filter((j) => j.result)
|
||||||
|
.map((j) => {
|
||||||
|
const r = j.result!;
|
||||||
|
return {
|
||||||
|
jobId: j.id,
|
||||||
|
syntheticScore: r.syntheticScore,
|
||||||
|
verdict: r.verdict,
|
||||||
|
confidence: r.confidence,
|
||||||
|
matchedEnrollmentId: r.matchedEnrollmentId || undefined,
|
||||||
|
matchedSimilarity: r.matchedSimilarity || undefined,
|
||||||
|
processingTimeMs: r.processingTimeMs,
|
||||||
|
modelVersion: r.modelVersion || undefined,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async classifySynthetic(
|
||||||
|
features: AudioFeatures,
|
||||||
|
embedding: EmbeddingOutput
|
||||||
|
): Promise<number> {
|
||||||
|
const modelScore = await this.embeddingService.classify(embedding.vector);
|
||||||
|
|
||||||
|
const zcrAnomaly = Math.abs(features.zeroCrossingRate - 0.05) / 0.05;
|
||||||
|
const spectralAnomaly = Math.abs(features.spectralCentroid - 500) / 500;
|
||||||
|
|
||||||
|
const artifactScore = Math.min(1, (zcrAnomaly + spectralAnomaly) / 4);
|
||||||
|
|
||||||
|
return 0.7 * modelScore + 0.3 * artifactScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
private determineVerdict(score: number): DetectionVerdict {
|
||||||
|
if (score >= this.syntheticThreshold) return DetectionVerdict.SYNTHETIC;
|
||||||
|
if (score <= this.uncertainThreshold) return DetectionVerdict.NATURAL;
|
||||||
|
return DetectionVerdict.UNCERTAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeConfidence(score: number, verdict: DetectionVerdict): number {
|
||||||
|
if (verdict === DetectionVerdict.SYNTHETIC) {
|
||||||
|
return Math.min(1, (score - this.syntheticThreshold) / (1 - this.syntheticThreshold));
|
||||||
|
}
|
||||||
|
if (verdict === DetectionVerdict.NATURAL) {
|
||||||
|
return Math.min(1, (this.uncertainThreshold - score) / this.uncertainThreshold);
|
||||||
|
}
|
||||||
|
return 1 - Math.min(
|
||||||
|
Math.abs(score - this.uncertainThreshold) / (this.syntheticThreshold - this.uncertainThreshold),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
125
services/voiceprint/src/analysis/BatchAnalysisService.ts
Normal file
125
services/voiceprint/src/analysis/BatchAnalysisService.ts
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import prisma from "@shieldai/db";
|
||||||
|
import { AnalysisService } from "./AnalysisService";
|
||||||
|
import {
|
||||||
|
BatchAnalyzeInput,
|
||||||
|
AnalysisJobStatus,
|
||||||
|
AnalysisType,
|
||||||
|
AnalysisResultOutput,
|
||||||
|
} from "@shieldai/types";
|
||||||
|
|
||||||
|
export class BatchAnalysisService {
|
||||||
|
private analysisService: AnalysisService;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.analysisService = new AnalysisService();
|
||||||
|
}
|
||||||
|
|
||||||
|
async analyzeBatch(
|
||||||
|
input: BatchAnalyzeInput,
|
||||||
|
userId: string
|
||||||
|
): Promise<BatchResult> {
|
||||||
|
const batchId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
||||||
|
const results: AnalysisResultOutput[] = [];
|
||||||
|
const errors: Array<{ name: string; error: string }> = [];
|
||||||
|
|
||||||
|
for (const audioInput of input.audioBuffers) {
|
||||||
|
try {
|
||||||
|
const result = await this.analysisService.analyze(
|
||||||
|
{
|
||||||
|
audioBuffer: audioInput.buffer,
|
||||||
|
sampleRate: audioInput.sampleRate,
|
||||||
|
analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION,
|
||||||
|
},
|
||||||
|
userId
|
||||||
|
);
|
||||||
|
results.push(result);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : "Analysis failed";
|
||||||
|
errors.push({ name: audioInput.name, error: message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const batchJob = await prisma.analysisJob.create({
|
||||||
|
data: {
|
||||||
|
userId,
|
||||||
|
analysisType: AnalysisType.BATCH,
|
||||||
|
audioFilePath: `voiceprint/${userId}/${batchId}`,
|
||||||
|
status: errors.length === input.audioBuffers.length
|
||||||
|
? AnalysisJobStatus.FAILED
|
||||||
|
: AnalysisJobStatus.COMPLETED,
|
||||||
|
errorMessage:
|
||||||
|
errors.length > 0 ? `${errors.length} of ${input.audioBuffers.length} files failed` : undefined,
|
||||||
|
completedAt: new Date(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
batchId,
|
||||||
|
jobId: batchJob.id,
|
||||||
|
totalFiles: input.audioBuffers.length,
|
||||||
|
successfulResults: results.length,
|
||||||
|
failedCount: errors.length,
|
||||||
|
results,
|
||||||
|
errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async getBatchResult(batchJobId: string): Promise<BatchResult | null> {
|
||||||
|
const job = await prisma.analysisJob.findUnique({
|
||||||
|
where: { id: batchJobId },
|
||||||
|
include: { result: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!job) return null;
|
||||||
|
|
||||||
|
const childJobs = await prisma.analysisJob.findMany({
|
||||||
|
where: {
|
||||||
|
userId: job.userId,
|
||||||
|
createdAt: { gte: job.createdAt, lt: new Date(job.createdAt.getTime() + 60000) },
|
||||||
|
id: { not: job.id },
|
||||||
|
},
|
||||||
|
include: { result: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
const results: AnalysisResultOutput[] = [];
|
||||||
|
const errors: Array<{ name: string; error: string }> = [];
|
||||||
|
|
||||||
|
for (const childJob of childJobs) {
|
||||||
|
if (childJob.result) {
|
||||||
|
const r = childJob.result;
|
||||||
|
results.push({
|
||||||
|
jobId: childJob.id,
|
||||||
|
syntheticScore: r.syntheticScore,
|
||||||
|
verdict: r.verdict,
|
||||||
|
confidence: r.confidence,
|
||||||
|
matchedEnrollmentId: r.matchedEnrollmentId || undefined,
|
||||||
|
matchedSimilarity: r.matchedSimilarity || undefined,
|
||||||
|
processingTimeMs: r.processingTimeMs,
|
||||||
|
modelVersion: r.modelVersion || undefined,
|
||||||
|
});
|
||||||
|
} else if (childJob.errorMessage) {
|
||||||
|
errors.push({ name: childJob.audioFilePath, error: childJob.errorMessage });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
batchId: job.audioFilePath.split("/").pop() || job.id,
|
||||||
|
jobId: job.id,
|
||||||
|
totalFiles: childJobs.length,
|
||||||
|
successfulResults: results.length,
|
||||||
|
failedCount: errors.length,
|
||||||
|
results,
|
||||||
|
errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BatchResult {
|
||||||
|
batchId: string;
|
||||||
|
jobId: string;
|
||||||
|
totalFiles: number;
|
||||||
|
successfulResults: number;
|
||||||
|
failedCount: number;
|
||||||
|
results: AnalysisResultOutput[];
|
||||||
|
errors: Array<{ name: string; error: string }>;
|
||||||
|
}
|
||||||
196
services/voiceprint/src/embedding/EmbeddingService.ts
Normal file
196
services/voiceprint/src/embedding/EmbeddingService.ts
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
import { spawn } from "child_process";
|
||||||
|
|
||||||
|
const EMBEDDING_DIM = 192;
|
||||||
|
const MODEL_VERSION = "ecapa-tdnn-0.1.0-mock";
|
||||||
|
|
||||||
|
export class EmbeddingService {
|
||||||
|
private mlServiceUrl: string;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001";
|
||||||
|
}
|
||||||
|
|
||||||
|
async extract(audioBuffer: Buffer): Promise<EmbeddingOutput> {
|
||||||
|
const mlAvailable = await this.checkMLService();
|
||||||
|
|
||||||
|
if (mlAvailable) {
|
||||||
|
return this.extractViaML(audioBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.extractMock(audioBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
async classify(embedding: number[]): Promise<number> {
|
||||||
|
const mlAvailable = await this.checkMLService();
|
||||||
|
|
||||||
|
if (mlAvailable) {
|
||||||
|
return this.classifyViaML(embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.classifyMock(embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
getModelVersion(): string {
|
||||||
|
return MODEL_VERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async extractViaML(audioBuffer: Buffer): Promise<EmbeddingOutput> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const jsonInput = audioBuffer.toString("base64");
|
||||||
|
const proc = spawn("python3", [
|
||||||
|
"-c",
|
||||||
|
`
|
||||||
|
import urllib.request, json, sys
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"${this.mlServiceUrl}/embedding",
|
||||||
|
data=json.dumps({"audio": "${jsonInput.substring(0, 5000)}"}).encode(),
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
sys.stdout.write(json.dumps({"ok": True, "vector": data.get("embedding", []), "dim": data.get("dimension", ${EMBEDDING_DIM})}))
|
||||||
|
except Exception as e:
|
||||||
|
sys.stdout.write(json.dumps({"ok": False, "error": str(e)}))
|
||||||
|
`,
|
||||||
|
]);
|
||||||
|
|
||||||
|
let output = "";
|
||||||
|
proc.stdout.on("data", (chunk) => { output += chunk.toString(); });
|
||||||
|
proc.on("close", (code) => {
|
||||||
|
try {
|
||||||
|
const result = JSON.parse(output);
|
||||||
|
if (result.ok && result.vector.length === EMBEDDING_DIM) {
|
||||||
|
resolve({ vector: result.vector, dimension: EMBEDDING_DIM });
|
||||||
|
} else {
|
||||||
|
resolve(this.generateMockFromBuffer(audioBuffer));
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
resolve(this.generateMockFromBuffer(audioBuffer));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async classifyViaML(embedding: number[]): Promise<number> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const proc = spawn("python3", [
|
||||||
|
"-c",
|
||||||
|
`
|
||||||
|
import urllib.request, json, sys
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"${this.mlServiceUrl}/classify",
|
||||||
|
data=json.dumps({"embedding": ${JSON.stringify(embedding)}}).encode(),
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
sys.stdout.write(json.dumps({"score": data.get("synthetic_score", 0.5)}))
|
||||||
|
except:
|
||||||
|
sys.stdout.write(json.dumps({"score": 0.5}))
|
||||||
|
`,
|
||||||
|
]);
|
||||||
|
|
||||||
|
let output = "";
|
||||||
|
proc.stdout.on("data", (chunk) => { output += chunk.toString(); });
|
||||||
|
proc.on("close", () => {
|
||||||
|
try {
|
||||||
|
const result = JSON.parse(output);
|
||||||
|
resolve(result.score || 0.5);
|
||||||
|
} catch {
|
||||||
|
resolve(0.5);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async extractMock(audioBuffer: Buffer): Promise<EmbeddingOutput> {
|
||||||
|
return this.generateMockFromBuffer(audioBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async classifyMock(embedding: number[]): Promise<number> {
|
||||||
|
const mean = embedding.reduce((s, v) => s + v, 0) / embedding.length;
|
||||||
|
const variance = embedding.reduce((s, v) => s + (v - mean) ** 2, 0) / embedding.length;
|
||||||
|
const stdDev = Math.sqrt(variance);
|
||||||
|
|
||||||
|
const syntheticIndicators = [
|
||||||
|
stdDev < 0.1 ? 0.8 : 0.2,
|
||||||
|
Math.abs(mean) > 0.5 ? 0.7 : 0.3,
|
||||||
|
this.hasArtifacts(embedding) ? 0.9 : 0.1,
|
||||||
|
];
|
||||||
|
|
||||||
|
return syntheticIndicators.reduce((s, v) => s + v, 0) / syntheticIndicators.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private generateMockFromBuffer(audioBuffer: Buffer): EmbeddingOutput {
|
||||||
|
const seed = this.computeSeed(audioBuffer);
|
||||||
|
const rng = this.createRNG(seed);
|
||||||
|
const vector: number[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < EMBEDDING_DIM; i++) {
|
||||||
|
const u1 = rng();
|
||||||
|
const u2 = rng();
|
||||||
|
const gauss = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
||||||
|
vector.push(parseFloat(gauss.toFixed(6)));
|
||||||
|
}
|
||||||
|
|
||||||
|
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
||||||
|
const normalized = vector.map((v) => parseFloat((v / norm).toFixed(6)));
|
||||||
|
|
||||||
|
return { vector: normalized, dimension: EMBEDDING_DIM };
|
||||||
|
}
|
||||||
|
|
||||||
|
private hasArtifacts(embedding: number[]): boolean {
|
||||||
|
const window = 16;
|
||||||
|
let artifactCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < embedding.length - window; i += window) {
|
||||||
|
const slice = embedding.slice(i, i + window);
|
||||||
|
const localMean = slice.reduce((s, v) => s + v, 0) / slice.length;
|
||||||
|
const localVar = slice.reduce((s, v) => s + (v - localMean) ** 2, 0) / slice.length;
|
||||||
|
|
||||||
|
if (localVar < 0.001) artifactCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return artifactCount > embedding.length / window / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async checkMLService(): Promise<boolean> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const proc = spawn("python3", [
|
||||||
|
"-c",
|
||||||
|
`
|
||||||
|
import urllib.request, sys
|
||||||
|
try:
|
||||||
|
urllib.request.urlopen("${this.mlServiceUrl}/health", timeout=2)
|
||||||
|
sys.exit(0)
|
||||||
|
except:
|
||||||
|
sys.exit(1)
|
||||||
|
`,
|
||||||
|
]);
|
||||||
|
proc.on("close", (code) => resolve(code === 0));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeSeed(buffer: Buffer): number {
|
||||||
|
let hash = 0;
|
||||||
|
const sampleSize = Math.min(buffer.length, 1024);
|
||||||
|
for (let i = 0; i < sampleSize; i += 4) {
|
||||||
|
hash = ((hash << 5) - hash + buffer.readInt32LE(i)) | 0;
|
||||||
|
}
|
||||||
|
return Math.abs(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
private createRNG(seed: number): () => number {
|
||||||
|
return () => {
|
||||||
|
seed = (seed * 1664525 + 1013904223) & 0xffffffff;
|
||||||
|
return (seed >>> 0) / 0xffffffff;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EmbeddingOutput {
|
||||||
|
vector: number[];
|
||||||
|
dimension: number;
|
||||||
|
}
|
||||||
151
services/voiceprint/src/enrollment/VoiceEnrollmentService.ts
Normal file
151
services/voiceprint/src/enrollment/VoiceEnrollmentService.ts
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
import prisma from "@shieldai/db";
|
||||||
|
import { EmbeddingService } from "../embedding/EmbeddingService";
|
||||||
|
import { FAISSIndex } from "../indexer/FAISSIndex";
|
||||||
|
import { AudioPreprocessor } from "../preprocessor/AudioPreprocessor";
|
||||||
|
import { VoiceEnrollmentInput, VoiceEnrollmentOutput } from "@shieldai/types";
|
||||||
|
|
||||||
|
export class VoiceEnrollmentService {
|
||||||
|
private embeddingService: EmbeddingService;
|
||||||
|
private faissIndex: FAISSIndex;
|
||||||
|
private preprocessor: AudioPreprocessor;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.embeddingService = new EmbeddingService();
|
||||||
|
this.faissIndex = new FAISSIndex();
|
||||||
|
this.preprocessor = new AudioPreprocessor();
|
||||||
|
}
|
||||||
|
|
||||||
|
async enroll(input: VoiceEnrollmentInput, userId: string): Promise<VoiceEnrollmentOutput> {
|
||||||
|
const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate);
|
||||||
|
|
||||||
|
const embedding = await this.embeddingService.extract(preprocessed.audio);
|
||||||
|
|
||||||
|
const enrollment = await prisma.voiceEnrollment.create({
|
||||||
|
data: {
|
||||||
|
userId,
|
||||||
|
label: input.label,
|
||||||
|
embeddingVector: embedding.vector,
|
||||||
|
embeddingDim: embedding.dimension,
|
||||||
|
sampleRate: preprocessed.sampleRate,
|
||||||
|
durationSec: preprocessed.durationSec,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await this.faissIndex.add(enrollment.id, embedding.vector);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: enrollment.id,
|
||||||
|
label: enrollment.label,
|
||||||
|
embeddingDim: enrollment.embeddingDim,
|
||||||
|
sampleRate: enrollment.sampleRate,
|
||||||
|
durationSec: enrollment.durationSec,
|
||||||
|
createdAt: enrollment.createdAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async listEnrollments(userId: string): Promise<VoiceEnrollmentOutput[]> {
|
||||||
|
const enrollments = await prisma.voiceEnrollment.findMany({
|
||||||
|
where: { userId },
|
||||||
|
orderBy: { createdAt: "desc" },
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
label: true,
|
||||||
|
embeddingDim: true,
|
||||||
|
sampleRate: true,
|
||||||
|
durationSec: true,
|
||||||
|
createdAt: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return enrollments.map((e) => ({
|
||||||
|
id: e.id,
|
||||||
|
label: e.label,
|
||||||
|
embeddingDim: e.embeddingDim,
|
||||||
|
sampleRate: e.sampleRate,
|
||||||
|
durationSec: e.durationSec,
|
||||||
|
createdAt: e.createdAt,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
async removeEnrollment(userId: string, enrollmentId: string): Promise<boolean> {
|
||||||
|
const enrollment = await prisma.voiceEnrollment.findFirst({
|
||||||
|
where: { id: enrollmentId, userId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!enrollment) {
|
||||||
|
throw new Error(`Enrollment ${enrollmentId} not found for user ${userId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await prisma.voiceEnrollment.delete({ where: { id: enrollmentId } });
|
||||||
|
await this.faissIndex.remove(enrollmentId);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getEnrollment(enrollmentId: string): Promise<VoiceEnrollmentOutput | null> {
|
||||||
|
const enrollment = await prisma.voiceEnrollment.findUnique({
|
||||||
|
where: { id: enrollmentId },
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
label: true,
|
||||||
|
embeddingDim: true,
|
||||||
|
sampleRate: true,
|
||||||
|
durationSec: true,
|
||||||
|
createdAt: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!enrollment) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: enrollment.id,
|
||||||
|
label: enrollment.label,
|
||||||
|
embeddingDim: enrollment.embeddingDim,
|
||||||
|
sampleRate: enrollment.sampleRate,
|
||||||
|
durationSec: enrollment.durationSec,
|
||||||
|
createdAt: enrollment.createdAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async matchVoice(
|
||||||
|
audioBuffer: Buffer,
|
||||||
|
userId: string,
|
||||||
|
threshold: number = 0.75
|
||||||
|
): Promise<MatchResult | null> {
|
||||||
|
const preprocessed = await this.preprocessor.preprocess(audioBuffer);
|
||||||
|
const embedding = await this.embeddingService.extract(preprocessed.audio);
|
||||||
|
|
||||||
|
const matches = await this.faissIndex.search(embedding.vector, 5);
|
||||||
|
|
||||||
|
const enrollmentIds = matches.map((m) => m.id);
|
||||||
|
const enrollments = await prisma.voiceEnrollment.findMany({
|
||||||
|
where: {
|
||||||
|
id: { in: enrollmentIds },
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
let bestMatch: MatchResult | null = null;
|
||||||
|
|
||||||
|
for (const match of matches) {
|
||||||
|
const enrollment = enrollments.find((e) => e.id === match.id);
|
||||||
|
if (enrollment && match.similarity >= threshold) {
|
||||||
|
if (!bestMatch || match.similarity > bestMatch.similarity) {
|
||||||
|
bestMatch = {
|
||||||
|
enrollmentId: enrollment.id,
|
||||||
|
label: enrollment.label,
|
||||||
|
similarity: match.similarity,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestMatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MatchResult {
|
||||||
|
enrollmentId: string;
|
||||||
|
label: string;
|
||||||
|
similarity: number;
|
||||||
|
}
|
||||||
6
services/voiceprint/src/index.ts
Normal file
6
services/voiceprint/src/index.ts
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
export * from "./preprocessor/AudioPreprocessor";
|
||||||
|
export * from "./enrollment/VoiceEnrollmentService";
|
||||||
|
export * from "./analysis/AnalysisService";
|
||||||
|
export * from "./analysis/BatchAnalysisService";
|
||||||
|
export * from "./embedding/EmbeddingService";
|
||||||
|
export * from "./indexer/FAISSIndex";
|
||||||
86
services/voiceprint/src/indexer/FAISSIndex.ts
Normal file
86
services/voiceprint/src/indexer/FAISSIndex.ts
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
export class FAISSIndex {
|
||||||
|
private store: Map<string, number[]> = new Map();
|
||||||
|
private readonly dimension = 192;
|
||||||
|
private initialized = false;
|
||||||
|
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
if (this.initialized) return;
|
||||||
|
this.initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async add(id: string, vector: number[]): Promise<void> {
|
||||||
|
this.normalizeInPlace(vector);
|
||||||
|
this.store.set(id, [...vector]);
|
||||||
|
}
|
||||||
|
|
||||||
|
async remove(id: string): Promise<void> {
|
||||||
|
this.store.delete(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(
|
||||||
|
queryVector: number[],
|
||||||
|
topK: number = 5
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const normalized = [...queryVector];
|
||||||
|
this.normalizeInPlace(normalized);
|
||||||
|
|
||||||
|
const scores: Array<{ id: string; similarity: number }> = [];
|
||||||
|
|
||||||
|
for (const [id, vector] of this.store.entries()) {
|
||||||
|
const similarity = this.cosineSimilarity(normalized, vector);
|
||||||
|
scores.push({ id, similarity });
|
||||||
|
}
|
||||||
|
|
||||||
|
scores.sort((a, b) => b.similarity - a.similarity);
|
||||||
|
return scores.slice(0, topK).map((s, i) => ({ rank: i + 1, id: s.id, similarity: s.similarity }));
|
||||||
|
}
|
||||||
|
|
||||||
|
async count(): Promise<number> {
|
||||||
|
return this.store.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
async clear(): Promise<void> {
|
||||||
|
this.store.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadFromDatabase(): Promise<void> {
|
||||||
|
const prisma = (await import("@shieldai/db")).default;
|
||||||
|
const enrollments = await prisma.voiceEnrollment.findMany({
|
||||||
|
select: { id: true, embeddingVector: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const enrollment of enrollments) {
|
||||||
|
this.store.set(enrollment.id, enrollment.embeddingVector);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
let dotProduct = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dotProduct += a[i] * b[i];
|
||||||
|
normA += a[i] * a[i];
|
||||||
|
normB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
||||||
|
return denominator > 0 ? dotProduct / denominator : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeInPlace(vector: number[]): void {
|
||||||
|
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
||||||
|
if (norm > 0) {
|
||||||
|
for (let i = 0; i < vector.length; i++) {
|
||||||
|
vector[i] /= norm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchResult {
|
||||||
|
rank: number;
|
||||||
|
id: string;
|
||||||
|
similarity: number;
|
||||||
|
}
|
||||||
327
services/voiceprint/src/preprocessor/AudioPreprocessor.ts
Normal file
327
services/voiceprint/src/preprocessor/AudioPreprocessor.ts
Normal file
@@ -0,0 +1,327 @@
|
|||||||
|
import { spawn } from "child_process";
|
||||||
|
import { randomBytes } from "crypto";
|
||||||
|
import { tmpdir } from "os";
|
||||||
|
import { join } from "path";
|
||||||
|
|
||||||
|
export class AudioPreprocessor {
|
||||||
|
private readonly targetSampleRate = 16000;
|
||||||
|
private readonly channels = 1;
|
||||||
|
|
||||||
|
async preprocess(audioBuffer: Buffer, inputSampleRate?: number): Promise<PreprocessedAudio> {
|
||||||
|
const tempInput = this.writeTempFile(audioBuffer, ".wav");
|
||||||
|
const tempOutput = this.writeTempFile(Buffer.alloc(0), ".wav");
|
||||||
|
|
||||||
|
const outputSampleRate = inputSampleRate || this.detectSampleRate(audioBuffer);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.runPythonPreprocess(tempInput, tempOutput, outputSampleRate);
|
||||||
|
|
||||||
|
const processed = await this.readFile(tempOutput);
|
||||||
|
return {
|
||||||
|
audio: processed,
|
||||||
|
sampleRate: this.targetSampleRate,
|
||||||
|
channels: this.channels,
|
||||||
|
durationSec: processed.length / (this.targetSampleRate * this.channels * 2),
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
const fallback = await this.jsFallback(audioBuffer, outputSampleRate);
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async preprocessBatch(
|
||||||
|
inputs: Array<{ buffer: Buffer; sampleRate?: number }>
|
||||||
|
): Promise<PreprocessedAudio[]> {
|
||||||
|
const promises = inputs.map(async (input) => {
|
||||||
|
return this.preprocess(input.buffer, input.sampleRate);
|
||||||
|
});
|
||||||
|
return Promise.all(promises);
|
||||||
|
}
|
||||||
|
|
||||||
|
applyVAD(audioBuffer: Buffer, sampleRate: number): Buffer {
|
||||||
|
const int16Array = this.bufferToInt16(audioBuffer);
|
||||||
|
const silenceThreshold = 500;
|
||||||
|
const windowSize = Math.floor(sampleRate * 0.02);
|
||||||
|
|
||||||
|
const segments: number[][] = [];
|
||||||
|
let currentSegment: number[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < int16Array.length; i += windowSize) {
|
||||||
|
const window = int16Array.slice(i, i + windowSize);
|
||||||
|
const rms = Math.sqrt(
|
||||||
|
window.reduce((sum, val) => sum + val * val, 0) / window.length
|
||||||
|
);
|
||||||
|
|
||||||
|
if (rms > silenceThreshold) {
|
||||||
|
currentSegment.push(...window);
|
||||||
|
} else if (currentSegment.length > 0) {
|
||||||
|
segments.push(currentSegment);
|
||||||
|
currentSegment = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentSegment.length > 0) {
|
||||||
|
segments.push(currentSegment);
|
||||||
|
}
|
||||||
|
|
||||||
|
const merged = segments.flat();
|
||||||
|
return this.int16ToBuffer(merged);
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeAudio(audioBuffer: Buffer): Buffer {
|
||||||
|
const int16Array = this.bufferToInt16(audioBuffer);
|
||||||
|
const maxAmplitude = Math.max(...int16Array.map((v) => Math.abs(v)));
|
||||||
|
const targetMax = 32767 * 0.95;
|
||||||
|
|
||||||
|
if (maxAmplitude === 0) return audioBuffer;
|
||||||
|
|
||||||
|
const scale = targetMax / maxAmplitude;
|
||||||
|
const normalized = int16Array.map((v) => Math.round(v * scale));
|
||||||
|
return this.int16ToBuffer(normalized);
|
||||||
|
}
|
||||||
|
|
||||||
|
async extractFeatures(audioBuffer: Buffer): Promise<AudioFeatures> {
|
||||||
|
const int16Array = this.bufferToInt16(audioBuffer);
|
||||||
|
const sampleRate = this.targetSampleRate;
|
||||||
|
const windowSize = Math.floor(sampleRate * 0.025);
|
||||||
|
const hopLength = Math.floor(sampleRate * 0.01);
|
||||||
|
|
||||||
|
const mfccs: number[][] = [];
|
||||||
|
const numCoeffs = 13;
|
||||||
|
|
||||||
|
for (let i = 0; i < int16Array.length - windowSize; i += hopLength) {
|
||||||
|
const frame = int16Array.slice(i, i + windowSize);
|
||||||
|
const coeffs = this.computeMFCC(frame, numCoeffs);
|
||||||
|
mfccs.push(coeffs);
|
||||||
|
}
|
||||||
|
|
||||||
|
const zeroCrossingRate = this.computeZCR(int16Array);
|
||||||
|
const spectralCentroid = this.computeSpectralCentroid(int16Array);
|
||||||
|
const spectralRollOff = this.computeSpectralRollOff(int16Array);
|
||||||
|
|
||||||
|
return {
|
||||||
|
mfccs,
|
||||||
|
zeroCrossingRate,
|
||||||
|
spectralCentroid,
|
||||||
|
spectralRollOff,
|
||||||
|
durationSec: int16Array.length / sampleRate,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runPythonPreprocess(
|
||||||
|
inputPath: string,
|
||||||
|
outputPath: string,
|
||||||
|
inputSampleRate: number
|
||||||
|
): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001";
|
||||||
|
const proc = spawn("python3", [
|
||||||
|
"-c",
|
||||||
|
`
|
||||||
|
import urllib.request, json, sys
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"${mlServiceUrl}/preprocess",
|
||||||
|
data=json.dumps({"input_path": "${inputPath}", "output_path": "${outputPath}", "input_sr": ${inputSampleRate}}).encode(),
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
sys.exit(0) if resp.status == 200 else sys.exit(1)
|
||||||
|
except:
|
||||||
|
sys.exit(1)
|
||||||
|
`,
|
||||||
|
]);
|
||||||
|
|
||||||
|
proc.on("close", (code) => {
|
||||||
|
code === 0 ? resolve() : reject(new Error(`Python preprocess exited with code ${code}`));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async jsFallback(
|
||||||
|
audioBuffer: Buffer,
|
||||||
|
inputSampleRate: number
|
||||||
|
): Promise<PreprocessedAudio> {
|
||||||
|
let processed = audioBuffer;
|
||||||
|
|
||||||
|
if (inputSampleRate !== this.targetSampleRate) {
|
||||||
|
processed = this.resample(audioBuffer, inputSampleRate, this.targetSampleRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
processed = this.applyVAD(processed, this.targetSampleRate);
|
||||||
|
processed = this.normalizeAudio(processed);
|
||||||
|
|
||||||
|
return {
|
||||||
|
audio: processed,
|
||||||
|
sampleRate: this.targetSampleRate,
|
||||||
|
channels: this.channels,
|
||||||
|
durationSec: processed.length / (this.targetSampleRate * this.channels * 2),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private resample(buffer: Buffer, fromRate: number, toRate: number): Buffer {
|
||||||
|
const int16 = this.bufferToInt16(buffer);
|
||||||
|
const ratio = fromRate / toRate;
|
||||||
|
const newLength = Math.round(int16.length / ratio);
|
||||||
|
const resampled: number[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < newLength; i++) {
|
||||||
|
const srcIdx = Math.floor(i * ratio);
|
||||||
|
const nextIdx = Math.min(srcIdx + 1, int16.length - 1);
|
||||||
|
const frac = (i * ratio) - srcIdx;
|
||||||
|
resampled.push(Math.round(int16[srcIdx] * (1 - frac) + int16[nextIdx] * frac));
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.int16ToBuffer(resampled);
|
||||||
|
}
|
||||||
|
|
||||||
|
private detectSampleRate(buffer: Buffer): number {
|
||||||
|
if (buffer.length < 44) return 16000;
|
||||||
|
|
||||||
|
const fmtOffset = buffer.toString("ascii", 12, 16).trim() === "fmt " ? 16 : 40;
|
||||||
|
if (fmtOffset + 4 <= buffer.length) {
|
||||||
|
const sr = buffer.readUInt32LE(fmtOffset + 4);
|
||||||
|
if (sr >= 8000 && sr <= 48000) return sr;
|
||||||
|
}
|
||||||
|
return 16000;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bufferToInt16(buffer: Buffer): number[] {
|
||||||
|
const arr: number[] = new Array(buffer.length / 2);
|
||||||
|
for (let i = 0; i < arr.length; i++) {
|
||||||
|
arr[i] = buffer.readInt16LE(i * 2);
|
||||||
|
}
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int16ToBuffer(arr: number[]): Buffer {
|
||||||
|
const buf = Buffer.alloc(arr.length * 2);
|
||||||
|
for (let i = 0; i < arr.length; i++) {
|
||||||
|
buf.writeInt16LE(arr[i], i * 2);
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeMFCC(frame: number[], numCoeffs: number): number[] {
|
||||||
|
const n = frame.length;
|
||||||
|
const fftSize = Math.pow(2, Math.ceil(Math.log2(n)) + 1);
|
||||||
|
const spectrum: number[] = new Array(fftSize / 2 + 1);
|
||||||
|
|
||||||
|
for (let k = 0; k < spectrum.length; k++) {
|
||||||
|
let real = 0, imag = 0;
|
||||||
|
for (let n = 0; n < frame.length; n++) {
|
||||||
|
const angle = (2 * Math.PI * k * n) / fftSize;
|
||||||
|
real += frame[n] * Math.cos(angle);
|
||||||
|
imag -= frame[n] * Math.sin(angle);
|
||||||
|
}
|
||||||
|
spectrum[k] = Math.sqrt(real * real + imag * imag);
|
||||||
|
}
|
||||||
|
|
||||||
|
const numFilters = 20;
|
||||||
|
const filterbank = this.createFilterbank(spectrum.length, numFilters);
|
||||||
|
const logEnergies: number[] = [];
|
||||||
|
|
||||||
|
for (let m = 0; m < numFilters; m++) {
|
||||||
|
let energy = 0;
|
||||||
|
for (let k = 0; k < spectrum.length; k++) {
|
||||||
|
energy += filterbank[m][k] * spectrum[k] * spectrum[k];
|
||||||
|
}
|
||||||
|
logEnergies.push(Math.log(Math.max(energy, 1e-10)));
|
||||||
|
}
|
||||||
|
|
||||||
|
const mfccs: number[] = [];
|
||||||
|
for (let d = 0; d < numCoeffs; d++) {
|
||||||
|
let coeff = 0;
|
||||||
|
for (let m = 0; m < numFilters; m++) {
|
||||||
|
coeff += logEnergies[m] * Math.cos(((2 * m + 1) * (d + 1) * Math.PI) / (2 * numFilters));
|
||||||
|
}
|
||||||
|
mfccs.push(coeff);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mfccs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private createFilterbank(numBins: number, numFilters: number): number[][] {
|
||||||
|
const filterbank: number[][] = Array.from({ length: numFilters }, () =>
|
||||||
|
new Array(numBins).fill(0)
|
||||||
|
);
|
||||||
|
const low = Math.floor(20 * numBins / 8000);
|
||||||
|
const high = Math.floor(5500 * numBins / 8000);
|
||||||
|
const spacing = (high - low) / (numFilters + 1);
|
||||||
|
|
||||||
|
for (let m = 1; m <= numFilters; m++) {
|
||||||
|
const center = Math.floor(low + m * spacing);
|
||||||
|
const prev = Math.floor(low + (m - 1) * spacing);
|
||||||
|
const next = Math.floor(low + (m + 1) * spacing);
|
||||||
|
|
||||||
|
for (let k = prev; k < center; k++) {
|
||||||
|
if (k > 0) filterbank[m - 1][k] = (k - prev) / (center - prev);
|
||||||
|
}
|
||||||
|
for (let k = center; k < next; k++) {
|
||||||
|
if (next - center > 0) filterbank[m - 1][k] = (next - k) / (next - center);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filterbank;
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeZCR(int16: number[]): number {
|
||||||
|
let crossings = 0;
|
||||||
|
for (let i = 1; i < int16.length; i++) {
|
||||||
|
if ((int16[i] > 0 && int16[i - 1] <= 0) || (int16[i] <= 0 && int16[i - 1] > 0)) {
|
||||||
|
crossings++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return crossings / int16.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeSpectralCentroid(int16: number[]): number {
|
||||||
|
const n = int16.length;
|
||||||
|
let num = 0, den = 0;
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
num += i * int16[i] * int16[i];
|
||||||
|
den += int16[i] * int16[i];
|
||||||
|
}
|
||||||
|
return den > 0 ? num / den : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private computeSpectralRollOff(int16: number[]): number {
|
||||||
|
const n = int16.length;
|
||||||
|
let totalEnergy = 0;
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
totalEnergy += int16[i] * int16[i];
|
||||||
|
}
|
||||||
|
const threshold = totalEnergy * 0.85;
|
||||||
|
let cumulative = 0;
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
cumulative += int16[i] * int16[i];
|
||||||
|
if (cumulative >= threshold) return i;
|
||||||
|
}
|
||||||
|
return n - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private writeTempFile(content: Buffer, ext: string): string {
|
||||||
|
const file = join(tmpdir(), `vp_${randomBytes(8).toString("hex")}${ext}`);
|
||||||
|
require("fs").writeFileSync(file, content);
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async readFile(path: string): Promise<Buffer> {
|
||||||
|
return require("fs").readFileSync(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PreprocessedAudio {
|
||||||
|
audio: Buffer;
|
||||||
|
sampleRate: number;
|
||||||
|
channels: number;
|
||||||
|
durationSec: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AudioFeatures {
|
||||||
|
mfccs: number[][];
|
||||||
|
zeroCrossingRate: number;
|
||||||
|
spectralCentroid: number;
|
||||||
|
spectralRollOff: number;
|
||||||
|
durationSec: number;
|
||||||
|
}
|
||||||
8
services/voiceprint/tsconfig.json
Normal file
8
services/voiceprint/tsconfig.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "./dist",
|
||||||
|
"rootDir": "./src"
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user