for first push

2026-04-29 16:29:03 -04:00
parent 218de3b03b
commit 509259bcf2
19 changed files with 1911 additions and 2 deletions
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -16,6 +16,7 @@
    "@shieldai/db": "0.1.0",
    "@shieldai/types": "0.1.0",
    "fastify": "^5.2.0",
-    "@shieldai/darkwatch": "0.1.0"
+    "@shieldai/darkwatch": "0.1.0",
    "@shieldai/voiceprint": "0.1.0"
  }
 }
--- a/packages/api/src/routes/index.ts
+++ b/packages/api/src/routes/index.ts
@@ -13,3 +13,10 @@ export function darkwatchRoutes(fastify: FastifyInstance) {
    root.register(scans, { prefix: "/scan" });
  }, { prefix: "/api/v1/darkwatch" });
 }
 export function voiceprintRoutes(fastify: FastifyInstance) {
  fastify.register(async (root) => {
    const voiceprint = (await import("./voiceprint.routes")).voiceprintRoutes;
    root.register(voiceprint);
  }, { prefix: "/api/v1/voiceprint" });
 }
--- a/packages/api/src/routes/voiceprint.routes.ts
+++ b/packages/api/src/routes/voiceprint.routes.ts
@@ -0,0 +1,94 @@
 import { FastifyInstance } from "fastify";
 import { VoiceEnrollmentService } from "@shieldai/voiceprint";
 import { AnalysisService } from "@shieldai/voiceprint";
 import { BatchAnalysisService } from "@shieldai/voiceprint";
 export function voiceprintRoutes(fastify: FastifyInstance) {
  const enrollmentService = new VoiceEnrollmentService();
  const analysisService = new AnalysisService();
  const batchService = new BatchAnalysisService();
  fastify.post("/enroll", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const body = request.body as { label: string; audio: string; sampleRate?: number };
    const audioBuffer = Buffer.from(body.audio, "base64");
    const enrollment = await enrollmentService.enroll(
      { label: body.label, audioBuffer, sampleRate: body.sampleRate },
      userId
    );
    return reply.code(201).send(enrollment);
  });
  fastify.get("/enrollments", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const enrollments = await enrollmentService.listEnrollments(userId);
    return reply.send(enrollments);
  });
  fastify.delete("/enrollments/:id", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const enrollmentId = (request.params as { id: string }).id;
    const result = await enrollmentService.removeEnrollment(userId, enrollmentId);
    return reply.send({ removed: result });
  });
  fastify.post("/analyze", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const body = request.body as { audio: string; sampleRate?: number; analysisType?: string };
    const audioBuffer = Buffer.from(body.audio, "base64");
    const result = await analysisService.analyze(
      { audioBuffer, sampleRate: body.sampleRate, analysisType: body.analysisType },
      userId
    );
    return reply.code(201).send(result);
  });
  fastify.get("/results/:id", async (request, reply) => {
    const jobId = (request.params as { id: string }).id;
    const result = await analysisService.getResult(jobId);
    if (!result) return reply.code(404).send({ error: "Analysis result not found" });
    return reply.send(result);
  });
  fastify.get("/results", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const limit = parseInt((request.query as { limit?: string }).limit || "20", 10);
    const results = await analysisService.getUserResults(userId, limit);
    return reply.send(results);
  });
  fastify.post("/batch", async (request, reply) => {
    const userId = (request.user as { id: string })?.id;
    if (!userId) return reply.code(401).send({ error: "User not authenticated" });
    const body = request.body as {
      files: Array<{ name: string; audio: string; sampleRate?: number }>;
      analysisType?: string;
    };
    const audioBuffers = body.files.map((f) => ({
      name: f.name,
      buffer: Buffer.from(f.audio, "base64"),
      sampleRate: f.sampleRate,
    }));
    const result = await batchService.analyzeBatch(
      { audioBuffers, analysisType: body.analysisType },
      userId
    );
    return reply.code(201).send(result);
  });
 }
--- a/packages/api/src/server.ts
+++ b/packages/api/src/server.ts
@@ -2,7 +2,7 @@ import Fastify from "fastify";
 import cors from "@fastify/cors";
 import helmet from "@fastify/helmet";
 import sensible from "@fastify/sensible";
-import { darkwatchRoutes } from "./routes";
+import { darkwatchRoutes, voiceprintRoutes } from "./routes";
 const app = Fastify({
  logger: {
@@ -16,6 +16,7 @@ async function bootstrap() {
  await app.register(sensible);
  await app.register(darkwatchRoutes);
  await app.register(voiceprintRoutes);
  app.get("/health", async () => ({ status: "ok", timestamp: new Date().toISOString() }));
--- a/packages/core/src/audio/webrtc/audio-pipeline.ts
+++ b/packages/core/src/audio/webrtc/audio-pipeline.ts
@@ -0,0 +1,316 @@
 /**
 * Audio Processing Pipeline for Real-Time Analysis
 * Coordinates WebRTC stream capture with VoicePreprocess for continuous analysis
 */
 import { WebRTCStreamCapture, createWebRTCCapture } from './stream-capture';
 // Type definitions for real-time processing
 export interface AudioChunk {
  id: string;
  timestamp: number;
  data: Float32Array;
  duration: number;
 }
 export interface VoiceprintResult {
  chunkId: string;
  timestamp: number;
  features: AudioFeatures;
  embedding: number[];
  confidence: number;
  status: 'complete' | 'error';
 }
 // Audio chunk configuration
 export interface AudioChunkConfig {
  chunkDuration: number;
  overlapDuration: number;
  sampleRate: number;
 }
 // Preprocessor interfaces (copied from AudioPreprocessor for standalone usage)
 export interface PreprocessedAudio {
  audio: Buffer;
  sampleRate: number;
  channels: number;
  durationSec: number;
 }
 export interface AudioFeatures {
  mfccs: number[][];
  zeroCrossingRate: number;
  spectralCentroid: number;
  spectralRollOff: number;
  durationSec: number;
 }
 interface PipelineConfig {
  chunkDuration: number;     // 5000ms
  overlapDuration: number;   // 1000ms
  sampleRate: number;        // 16000 Hz
  onAnalysisComplete?: (result: VoiceprintResult) => void;
  onStreamError?: (error: Error) => void;
  onError?: (error: Error) => void;
 }
 const DEFAULT_CONFIG: PipelineConfig = {
  chunkDuration: 5000,
  overlapDuration: 1000,
  sampleRate: 16000
 };
 export class AudioPipeline {
  private streamCapture: WebRTCStreamCapture | null = null;
  private isRunning: boolean = false;
  private chunkBuffer: AudioChunk[] = [];
  private maxBufferLength: number = 10;
  private onChunkReady?: (chunk: AudioChunk) => void;
  private onAnalysisComplete?: (result: VoiceprintResult) => void;
  private onPipelineError?: (error: Error) => void;
  constructor(private config: PipelineConfig = DEFAULT_CONFIG) {}
  /**
   * Initialize pipeline components
   */
  async initialize(): Promise<void> {
    // Initialize WebRTC stream capture
    this.streamCapture = createWebRTCCapture({
      chunkDuration: this.config.chunkDuration,
      overlapDuration: this.config.overlapDuration,
      sampleRate: this.config.sampleRate
    });
    // Connect WebRTC chunk processing
    this.streamCapture.onChunkReady = (rawAudio, timestamp) => {
      this.handleRawChunk(rawAudio, timestamp);
    };
    // Connect stream error handling
    this.streamCapture.onStreamError = (error) => {
      this.onPipelineError?.(error);
    };
    console.log('[Pipeline] Initialized');
  }
  /**
   * Process raw audio chunk from WebRTC
   */
  private async handleRawChunk(rawAudio: Float32Array, timestamp: number): Promise<void> {
    try {
      // Create audio chunk
      const chunk: AudioChunk = {
        id: `chunk-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
        timestamp,
        data: rawAudio,
        duration: this.config.chunkDuration / 1000
      };
      // Add to buffer
      this.chunkBuffer.push(chunk);
      // Maintain buffer size
      if (this.chunkBuffer.length > this.maxBufferLength) {
        const removed = this.chunkBuffer.shift();
        if (removed) {
          // Process removed chunk with overlap
          await this.processChunk(removed);
        }
      }
      // Process current chunk
      await this.processChunk(chunk);
    } catch (error) {
      const err = error instanceof Error ? error : new Error(String(error));
      console.error('[Pipeline] Error processing chunk:', err);
      this.onPipelineError?.(err);
    }
  }
  /**
   * Convert Float32Array to 16-bit PCM Buffer
   */
  private float32ArrayToBuffer(floatData: Float32Array): Buffer {
    const int16Array = new Int16Array(floatData.length);
    for (let i = 0; i < floatData.length; i++) {
      // Clamp and scale to 16-bit range
      const clamped = Math.max(-1, Math.min(1, floatData[i]));
      int16Array[i] = Math.round(clamped * 32767);
    }
    return Buffer.from(int16Array.buffer);
  }
  /**
   * Process a single audio chunk (mock implementation)
   */
  private async processChunk(chunk: AudioChunk): Promise<VoiceprintResult> {
    try {
      // Generate mock features
      // Convert Float32Array to Buffer properly
      const int16Array = new Int16Array(chunk.data.length);
      for (let i = 0; i < chunk.data.length; i++) {
        const clamped = Math.max(-1, Math.min(1, chunk.data[i]));
        int16Array[i] = Math.round(clamped * 32767);
      }
      const dataBuffer = Buffer.from(int16Array.buffer);
      const features: AudioFeatures = await extractMockFeatures(dataBuffer, chunk.duration);
      // Generate embedding (placeholder - would use actual embedding service)
      const embedding = this.generatePlaceholderEmbedding(features);
      // Create result
      const result: VoiceprintResult = {
        chunkId: chunk.id,
        timestamp: chunk.timestamp,
        features,
        embedding,
        confidence: 0.95, // Placeholder - would come from actual analysis
        status: 'complete'
      };
      // Emit result
      this.onAnalysisComplete?.(result);
      return result;
    } catch (error) {
      const err = error instanceof Error ? error : new Error(String(error));
      console.error('[Pipeline] Preprocessing error:', err);
      this.onPipelineError?.(err);
      throw err;
    }
  }
  /**
   * Generate placeholder embedding (would use actual embedding service)
   */
  private generatePlaceholderEmbedding(features: AudioFeatures): number[] {
    // Placeholder embedding - would be replaced with actual embedding generation
    const embedding: number[] = [];
    // Use spectral features as proxy for embedding
    embedding.push(features.spectralCentroid);
    embedding.push(features.spectralRollOff);
    embedding.push(features.zeroCrossingRate);
    // MFCC summary (first few coefficients)
    if (features.mfccs && features.mfccs.length > 0) {
      for (let i = 0; i < Math.min(5, features.mfccs[0].length); i++) {
        embedding.push(features.mfccs[0][i]);
      }
    }
    // Normalize and pad
    while (embedding.length < 128) {
      embedding.push(0);
    }
    return embedding;
  }
  /**
   * Start the real-time analysis pipeline
   */
  async start(): Promise<void> {
    if (this.isRunning) {
      console.log('[Pipeline] Already running');
      return;
    }
    try {
      await this.initialize();
      if (this.streamCapture) {
        await this.streamCapture.start();
      }
      this.isRunning = true;
      console.log('[Pipeline] Real-time analysis started');
    } catch (error) {
      const err = error instanceof Error ? error : new Error(String(error));
      console.error('[Pipeline] Failed to start:', err);
      this.onPipelineError?.(err);
      throw err;
    }
  }
  /**
   * Stop the pipeline
   */
  async stop(): Promise<void> {
    if (!this.isRunning) {
      return;
    }
    try {
      // Drain remaining buffer
      while (this.chunkBuffer.length > 0) {
        const chunk = this.chunkBuffer.shift();
        if (chunk) {
          await this.processChunk(chunk);
        }
      }
      // Stop WebRTC capture
      if (this.streamCapture) {
        this.streamCapture.stop();
      }
      this.isRunning = false;
      console.log('[Pipeline] Stopped');
    } catch (error) {
      const err = error instanceof Error ? error : new Error(String(error));
      console.error('[Pipeline] Error stopping:', err);
      this.onPipelineError?.(err);
      throw err;
    }
  }
  /**
   * Get pipeline status
   */
  getStatus(): {
    isRunning: boolean;
    bufferLength: number;
    streamActive: boolean;
  } {
    return {
      isRunning: this.isRunning,
      bufferLength: this.chunkBuffer.length,
      streamActive: this.streamCapture?.isRecording || false
    };
  }
 }
 /**
 * Extract mock features from audio buffer
 */
 async function extractMockFeatures(buffer: Buffer, duration: number): Promise<AudioFeatures> {
    // Simple mock feature extraction for demonstration
    // In production, this would use actual audio processing
    const numMfccs = 13;
    const mfccs: number[][] = [];
    // Generate mock MFCCs based on buffer hash
    const bufferHash = buffer.reduce((acc, byte) => acc + byte, 0);
    for (let i = 0; i < numMfccs; i++) {
      const coefficients: number[] = [];
      for (let j = 0; j < 20; j++) {
        coefficients.push(Math.abs(Math.sin((i * j + bufferHash) * 0.1)) * 0.5 + 0.25);
      }
      mfccs.push(coefficients);
    }
    return {
      mfccs,
      zeroCrossingRate: 0.02 + Math.random() * 0.03,
      spectralCentroid: 1000 + Math.random() * 2000,
      spectralRollOff: 3000 + Math.random() * 1000,
      durationSec: duration
    };
  }
--- a/packages/core/src/audio/webrtc/stream-capture.ts
+++ b/packages/core/src/audio/webrtc/stream-capture.ts
@@ -0,0 +1,184 @@
 /**
 * WebRTC Audio Stream Capture
 * Captures audio from screen/audio sharing using WebRTC APIs
 * Implements 5-second chunks with 1-second overlap for sliding window analysis
 */
 interface WebRTCStreamConfig {
  chunkDuration: number;  // 5000ms
  overlapDuration: number; // 1000ms
  sampleRate: number;     // 16000 Hz for voiceprint compatibility
 }
 const DEFAULT_CONFIG: WebRTCStreamConfig = {
  chunkDuration: 5000,
  overlapDuration: 1000,
  sampleRate: 16000
 };
 export class WebRTCStreamCapture {
  private stream: MediaStream | null = null;
  private audioContext: AudioContext | null = null;
  private analyser: AnalyserNode | null = null;
  private source: MediaStreamAudioSourceNode | null = null;
  private _isRecording: boolean = false;
  private buffer: Float32Array = new Float32Array(0);
  public onChunkReady?: (chunk: Float32Array, timestamp: number) => void;
  public onStreamError?: (error: Error) => void;
  constructor(private config: WebRTCStreamConfig = DEFAULT_CONFIG) {}
  /**
   * Check if currently recording
   */
  public isRecording: boolean = false;
  /**
   * Start capturing audio from screen/audio sharing
   */
  async start(): Promise<void> {
    if (this.isRecording) {
      console.log('[WebRTC] Already recording');
      return;
    }
    try {
      // Request screen/audio capture with audio
      this.stream = await navigator.mediaDevices.getDisplayMedia({
        video: true,      // Required for audio capture
        audio: true
      });
      // Stop any existing tracks
      this.stream.getTracks().forEach(track => track.stop());
      // Create audio context and analyser
      this.audioContext = new AudioContext({
        sampleRate: this.config.sampleRate
      });
      this.analyser = this.audioContext.createAnalyser();
      this.analyser.fftSize = 2048;
      // Connect stream to audio graph
      this.source = this.audioContext.createMediaStreamSource(this.stream);
      this.source.connect(this.analyser);
      this.isRecording = true;
      console.log('[WebRTC] Stream capture started');
      // Start processing loop
      this.processAudio();
      // Handle stream termination
      this.stream.getVideoTracks()[0].onended = () => {
        console.log('[WebRTC] User stopped sharing');
        this.stop();
      };
    } catch (error) {
      const err = error instanceof Error ? error : new Error(String(error));
      console.error('[WebRTC] Failed to start stream capture:', err);
      this.onStreamError?.(err);
      throw err;
    }
  }
  /**
   * Process audio in real-time with sliding window
   */
  private processAudio(): void {
    if (!this.audioContext || !this.analyser || !this.isRecording) return;
    if (!this.analyser) return;
    const bufferLength = this.analyser.fftSize;
    const buffer = new Float32Array(bufferLength);
    const processFrame = () => {
      if (!this.isRecording) return;
      if (!this.analyser) return;
      this.analyser.getFloatTimeDomainData(buffer);
      // Get current timestamp
      const timestamp = this.audioContext?.currentTime ?? 0;
      // Extract audio data for current frame
      // Use first 512 samples for voice analysis (reduced for faster processing)
      const audioData = buffer.slice(0, 512);
      // Prepare chunk for analysis
      if (audioData.length > 0) {
        this.onChunkReady?.(audioData, timestamp);
      }
      // Schedule next frame with overlap
      const frameDuration = this.config.chunkDuration - this.config.overlapDuration;
      setTimeout(processFrame, frameDuration);
    };
    processFrame();
  }
  /**
   * Stop audio capture
   */
  stop(): void {
    this._isRecording = false;
    if (this.stream) {
      this.stream.getTracks().forEach(track => track.stop());
      this.stream = null;
    }
    if (this.source) {
      this.source.disconnect();
      this.source = null;
    }
    if (this.analyser) {
      this.analyser.disconnect();
      this.analyser = null;
    }
    if (this.audioContext) {
      this.audioContext.close();
      this.audioContext = null;
    }
    console.log('[WebRTC] Stream capture stopped');
  }
  /**
   * Get stream metadata
   */
  getMetadata(): {
    isActive: boolean;
    sampleRate: number;
    channels: number;
  } {
    if (!this.stream) {
      return { isActive: false, sampleRate: 0, channels: 0 };
    }
    const audioTrack = this.stream.getAudioTracks()[0];
    if (!audioTrack) {
      return { isActive: true, sampleRate: this.config.sampleRate, channels: 1 };
    }
    return {
      isActive: true,
      sampleRate: this.config.sampleRate,
      channels: audioTrack.getSettings().channelCount || 1
    };
  }
 }
 /**
 * Factory function for creating stream capture with auto-start
 */
 export function createWebRTCCapture(config?: WebRTCStreamConfig): WebRTCStreamCapture {
  return new WebRTCStreamCapture(config || DEFAULT_CONFIG);
 }
--- a/packages/core/tsconfig.json
+++ b/packages/core/tsconfig.json
@@ -0,0 +1,22 @@
 {
  "compilerOptions": {
    "target": "ES2020",
    "module": "commonjs",
    "lib": ["ES2020", "DOM"],
    "outDir": "./dist",
    "rootDir": "./src",
    "strict": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "declaration": true,
    "resolveJsonModule": true,
    "moduleResolution": "node",
    "baseUrl": ".",
    "paths": {
      "@/*": ["src/*"]
    }
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist"]
 }
--- a/packages/db/prisma/schema.prisma
+++ b/packages/db/prisma/schema.prisma
@@ -57,6 +57,25 @@ enum DataSource {
  HONEYPOT
 }
 enum AnalysisJobStatus {
  PENDING
  RUNNING
  COMPLETED
  FAILED
 }
 enum AnalysisType {
  SYNTHETIC_DETECTION
  VOICE_MATCH
  BATCH
 }
 enum DetectionVerdict {
  NATURAL
  SYNTHETIC
  UNCERTAIN
 }
 model User {
  id                  String    @id @default(uuid())
  email               String    @unique
@@ -138,3 +157,54 @@ model ScanJob {
  @@index([userId, status])
  @@index([createdAt])
 }
 model VoiceEnrollment {
  id              String    @id @default(uuid())
  userId          String
  user            User      @relation(fields: [userId], references: [id], onDelete: Cascade)
  label           String
  embeddingVector Float[]
  embeddingDim    Int       @default(192)
  audioFilePath   String?
  sampleRate      Int       @default(16000)
  durationSec     Float?
  createdAt       DateTime  @default(now())
  updatedAt       DateTime  @updatedAt
  @@index([userId])
  @@index([embeddingDim])
 }
 model AnalysisJob {
  id              String            @id @default(uuid())
  userId          String
  user            User              @relation(fields: [userId], references: [id], onDelete: Cascade)
  analysisType    AnalysisType
  audioFilePath   String
  status          AnalysisJobStatus @default(PENDING)
  result          AnalysisResult?
  errorMessage    String?
  completedAt     DateTime?
  createdAt       DateTime          @default(now())
  @@index([userId, status])
  @@index([createdAt])
 }
 model AnalysisResult {
  id                    String            @id @default(uuid())
  analysisJobId         String            @unique
  analysisJob           AnalysisJob       @relation(fields: [analysisJobId], references: [id], onDelete: Cascade)
  syntheticScore        Float
  verdict               DetectionVerdict
  matchedEnrollmentId   String?
  matchedSimilarity     Float?
  confidence            Float
  processingTimeMs      Int
  modelVersion          String?
  metadata              String?
  createdAt             DateTime          @default(now())
  @@index([analysisJobId])
  @@index([verdict])
 }
--- a/packages/jobs/src/voiceprint.jobs.ts
+++ b/packages/jobs/src/voiceprint.jobs.ts
@@ -0,0 +1,54 @@
 import { Queue, Worker } from "bullmq";
 import { Redis } from "ioredis";
 import { AnalysisService } from "@shieldai/voiceprint";
 const redisUrl = process.env.REDIS_URL || "redis://localhost:6379";
 const connection = new Redis(redisUrl);
 const analysisQueue = new Queue("voiceprint-analysis", { connection });
 const analysisWorker = new Worker(
  "voiceprint-analysis",
  async (job) => {
    const { userId, audioBuffer, sampleRate, analysisType } = job.data;
    const analysisService = new AnalysisService();
    const result = await analysisService.analyze(
      {
        audioBuffer: Buffer.from(audioBuffer, "base64"),
        sampleRate,
        analysisType,
      },
      userId
    );
    return { jobId: result.jobId, completedAt: new Date().toISOString() };
  },
  { connection, concurrency: 2 }
 );
 analysisWorker.on("completed", (job) => {
  console.log(`[VoicePrint] Job ${job.id} completed: ${JSON.stringify(job.returnvalue)}`);
 });
 analysisWorker.on("failed", (job, err) => {
  console.error(`[VoicePrint] Job ${job.id} failed: ${err.message}`);
 });
 export async function addAnalysisJob(
  userId: string,
  audioBuffer: Buffer,
  sampleRate?: number,
  analysisType?: string
 ) {
  return analysisQueue.add("analyze", {
    userId,
    audioBuffer: audioBuffer.toString("base64"),
    sampleRate,
    analysisType,
  }, {
    attempts: 3,
    backoff: { type: "exponential", delay: 5000 },
    jobId: `vp-${userId}-${Date.now()}`,
  });
 }
 console.log("[VoicePrint] Analysis worker started");
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@@ -82,3 +82,62 @@ export interface AlertInput {
  channel: AlertChannel;
  dedupKey: string;
 }
 export const AnalysisJobStatus = {
  PENDING: "PENDING",
  RUNNING: "RUNNING",
  COMPLETED: "COMPLETED",
  FAILED: "FAILED",
 } as const;
 export type AnalysisJobStatus = (typeof AnalysisJobStatus)[keyof typeof AnalysisJobStatus];
 export const AnalysisType = {
  SYNTHETIC_DETECTION: "SYNTHETIC_DETECTION",
  VOICE_MATCH: "VOICE_MATCH",
  BATCH: "BATCH",
 } as const;
 export type AnalysisType = (typeof AnalysisType)[keyof typeof AnalysisType];
 export const DetectionVerdict = {
  NATURAL: "NATURAL",
  SYNTHETIC: "SYNTHETIC",
  UNCERTAIN: "UNCERTAIN",
 } as const;
 export type DetectionVerdict = (typeof DetectionVerdict)[keyof typeof DetectionVerdict];
 export interface VoiceEnrollmentInput {
  label: string;
  audioBuffer: Buffer;
  sampleRate?: number;
 }
 export interface AnalyzeAudioInput {
  audioBuffer: Buffer;
  sampleRate?: number;
  analysisType?: AnalysisType;
 }
 export interface BatchAnalyzeInput {
  audioBuffers: Array<{ name: string; buffer: Buffer; sampleRate?: number }>;
  analysisType?: AnalysisType;
 }
 export interface AnalysisResultOutput {
  jobId: string;
  syntheticScore: number;
  verdict: DetectionVerdict;
  confidence: number;
  matchedEnrollmentId?: string;
  matchedSimilarity?: number;
  processingTimeMs: number;
  modelVersion?: string;
 }
 export interface VoiceEnrollmentOutput {
  id: string;
  label: string;
  embeddingDim: number;
  sampleRate: number;
  durationSec?: number;
  createdAt: Date;
 }
--- a/services/voiceprint/package.json
+++ b/services/voiceprint/package.json
@@ -0,0 +1,19 @@
 {
  "name": "@shieldai/voiceprint",
  "version": "0.1.0",
  "main": "./dist/index.js",
  "types": "./dist/index.js",
  "scripts": {
    "build": "tsc",
    "test": "vitest run",
    "lint": "eslint src/"
  },
  "dependencies": {
    "@shieldai/db": "0.1.0",
    "@shieldai/types": "0.1.0",
    "node-cache": "^5.1.2"
  },
  "exports": {
    ".": "./src/index.ts"
  }
 }
--- a/services/voiceprint/src/analysis/AnalysisService.ts
+++ b/services/voiceprint/src/analysis/AnalysisService.ts
@@ -0,0 +1,183 @@
 import prisma from "@shieldai/db";
 import { AudioPreprocessor, AudioFeatures } from "../preprocessor/AudioPreprocessor";
 import { EmbeddingService, EmbeddingOutput } from "../embedding/EmbeddingService";
 import { VoiceEnrollmentService } from "../enrollment/VoiceEnrollmentService";
 import {
  AnalyzeAudioInput,
  AnalysisJobStatus,
  AnalysisType,
  DetectionVerdict,
  AnalysisResultOutput,
 } from "@shieldai/types";
 export class AnalysisService {
  private preprocessor: AudioPreprocessor;
  private embeddingService: EmbeddingService;
  private enrollmentService: VoiceEnrollmentService;
  private readonly syntheticThreshold = 0.7;
  private readonly uncertainThreshold = 0.4;
  constructor() {
    this.preprocessor = new AudioPreprocessor();
    this.embeddingService = new EmbeddingService();
    this.enrollmentService = new VoiceEnrollmentService();
  }
  async analyze(input: AnalyzeAudioInput, userId: string): Promise<AnalysisResultOutput> {
    const startTime = Date.now();
    const job = await prisma.analysisJob.create({
      data: {
        userId,
        analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION,
        audioFilePath: `voiceprint/${userId}/${Date.now()}.wav`,
        status: AnalysisJobStatus.RUNNING,
      },
    });
    try {
      const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate);
      const features = await this.preprocessor.extractFeatures(preprocessed.audio);
      const embedding = await this.embeddingService.extract(preprocessed.audio);
      const syntheticScore = await this.classifySynthetic(features, embedding);
      const verdict = this.determineVerdict(syntheticScore);
      const confidence = this.computeConfidence(syntheticScore, verdict);
      let matchedEnrollmentId: string | undefined;
      let matchedSimilarity: number | undefined;
      if (input.analysisType === AnalysisType.VOICE_MATCH) {
        const match = await this.enrollmentService.matchVoice(input.audioBuffer, userId);
        if (match) {
          matchedEnrollmentId = match.enrollmentId;
          matchedSimilarity = match.similarity;
        }
      }
      const processingTimeMs = Date.now() - startTime;
      const result = await prisma.analysisResult.create({
        data: {
          analysisJobId: job.id,
          syntheticScore,
          verdict,
          confidence,
          processingTimeMs,
          matchedEnrollmentId,
          matchedSimilarity,
          modelVersion: this.embeddingService.getModelVersion(),
        },
      });
      await prisma.analysisJob.update({
        where: { id: job.id },
        data: {
          status: AnalysisJobStatus.COMPLETED,
          completedAt: new Date(),
        },
      });
      return {
        jobId: job.id,
        syntheticScore: result.syntheticScore,
        verdict: result.verdict,
        confidence: result.confidence,
        matchedEnrollmentId: result.matchedEnrollmentId || undefined,
        matchedSimilarity: result.matchedSimilarity || undefined,
        processingTimeMs: result.processingTimeMs,
        modelVersion: result.modelVersion || undefined,
      };
    } catch (err) {
      const message = err instanceof Error ? err.message : "Analysis failed";
      await prisma.analysisJob.update({
        where: { id: job.id },
        data: {
          status: AnalysisJobStatus.FAILED,
          errorMessage: message,
          completedAt: new Date(),
        },
      });
      throw err;
    }
  }
  async getResult(jobId: string): Promise<AnalysisResultOutput | null> {
    const job = await prisma.analysisJob.findUnique({
      where: { id: jobId },
      include: { result: true },
    });
    if (!job || !job.result) return null;
    const r = job.result;
    return {
      jobId,
      syntheticScore: r.syntheticScore,
      verdict: r.verdict,
      confidence: r.confidence,
      matchedEnrollmentId: r.matchedEnrollmentId || undefined,
      matchedSimilarity: r.matchedSimilarity || undefined,
      processingTimeMs: r.processingTimeMs,
      modelVersion: r.modelVersion || undefined,
    };
  }
  async getUserResults(userId: string, limit: number = 20): Promise<AnalysisResultOutput[]> {
    const jobs = await prisma.analysisJob.findMany({
      where: { userId, status: AnalysisJobStatus.COMPLETED },
      orderBy: { createdAt: "desc" },
      take: limit,
      include: { result: true },
    });
    return jobs
      .filter((j) => j.result)
      .map((j) => {
        const r = j.result!;
        return {
          jobId: j.id,
          syntheticScore: r.syntheticScore,
          verdict: r.verdict,
          confidence: r.confidence,
          matchedEnrollmentId: r.matchedEnrollmentId || undefined,
          matchedSimilarity: r.matchedSimilarity || undefined,
          processingTimeMs: r.processingTimeMs,
          modelVersion: r.modelVersion || undefined,
        };
      });
  }
  private async classifySynthetic(
    features: AudioFeatures,
    embedding: EmbeddingOutput
  ): Promise<number> {
    const modelScore = await this.embeddingService.classify(embedding.vector);
    const zcrAnomaly = Math.abs(features.zeroCrossingRate - 0.05) / 0.05;
    const spectralAnomaly = Math.abs(features.spectralCentroid - 500) / 500;
    const artifactScore = Math.min(1, (zcrAnomaly + spectralAnomaly) / 4);
    return 0.7 * modelScore + 0.3 * artifactScore;
  }
  private determineVerdict(score: number): DetectionVerdict {
    if (score >= this.syntheticThreshold) return DetectionVerdict.SYNTHETIC;
    if (score <= this.uncertainThreshold) return DetectionVerdict.NATURAL;
    return DetectionVerdict.UNCERTAIN;
  }
  private computeConfidence(score: number, verdict: DetectionVerdict): number {
    if (verdict === DetectionVerdict.SYNTHETIC) {
      return Math.min(1, (score - this.syntheticThreshold) / (1 - this.syntheticThreshold));
    }
    if (verdict === DetectionVerdict.NATURAL) {
      return Math.min(1, (this.uncertainThreshold - score) / this.uncertainThreshold);
    }
    return 1 - Math.min(
      Math.abs(score - this.uncertainThreshold) / (this.syntheticThreshold - this.uncertainThreshold),
      1
    );
  }
 }
--- a/services/voiceprint/src/analysis/BatchAnalysisService.ts
+++ b/services/voiceprint/src/analysis/BatchAnalysisService.ts
@@ -0,0 +1,125 @@
 import prisma from "@shieldai/db";
 import { AnalysisService } from "./AnalysisService";
 import {
  BatchAnalyzeInput,
  AnalysisJobStatus,
  AnalysisType,
  AnalysisResultOutput,
 } from "@shieldai/types";
 export class BatchAnalysisService {
  private analysisService: AnalysisService;
  constructor() {
    this.analysisService = new AnalysisService();
  }
  async analyzeBatch(
    input: BatchAnalyzeInput,
    userId: string
  ): Promise<BatchResult> {
    const batchId = `batch_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
    const results: AnalysisResultOutput[] = [];
    const errors: Array<{ name: string; error: string }> = [];
    for (const audioInput of input.audioBuffers) {
      try {
        const result = await this.analysisService.analyze(
          {
            audioBuffer: audioInput.buffer,
            sampleRate: audioInput.sampleRate,
            analysisType: input.analysisType || AnalysisType.SYNTHETIC_DETECTION,
          },
          userId
        );
        results.push(result);
      } catch (err) {
        const message = err instanceof Error ? err.message : "Analysis failed";
        errors.push({ name: audioInput.name, error: message });
      }
    }
    const batchJob = await prisma.analysisJob.create({
      data: {
        userId,
        analysisType: AnalysisType.BATCH,
        audioFilePath: `voiceprint/${userId}/${batchId}`,
        status: errors.length === input.audioBuffers.length
          ? AnalysisJobStatus.FAILED
          : AnalysisJobStatus.COMPLETED,
        errorMessage:
          errors.length > 0 ? `${errors.length} of ${input.audioBuffers.length} files failed` : undefined,
        completedAt: new Date(),
      },
    });
    return {
      batchId,
      jobId: batchJob.id,
      totalFiles: input.audioBuffers.length,
      successfulResults: results.length,
      failedCount: errors.length,
      results,
      errors,
    };
  }
  async getBatchResult(batchJobId: string): Promise<BatchResult | null> {
    const job = await prisma.analysisJob.findUnique({
      where: { id: batchJobId },
      include: { result: true },
    });
    if (!job) return null;
    const childJobs = await prisma.analysisJob.findMany({
      where: {
        userId: job.userId,
        createdAt: { gte: job.createdAt, lt: new Date(job.createdAt.getTime() + 60000) },
        id: { not: job.id },
      },
      include: { result: true },
    });
    const results: AnalysisResultOutput[] = [];
    const errors: Array<{ name: string; error: string }> = [];
    for (const childJob of childJobs) {
      if (childJob.result) {
        const r = childJob.result;
        results.push({
          jobId: childJob.id,
          syntheticScore: r.syntheticScore,
          verdict: r.verdict,
          confidence: r.confidence,
          matchedEnrollmentId: r.matchedEnrollmentId || undefined,
          matchedSimilarity: r.matchedSimilarity || undefined,
          processingTimeMs: r.processingTimeMs,
          modelVersion: r.modelVersion || undefined,
        });
      } else if (childJob.errorMessage) {
        errors.push({ name: childJob.audioFilePath, error: childJob.errorMessage });
      }
    }
    return {
      batchId: job.audioFilePath.split("/").pop() || job.id,
      jobId: job.id,
      totalFiles: childJobs.length,
      successfulResults: results.length,
      failedCount: errors.length,
      results,
      errors,
    };
  }
 }
 export interface BatchResult {
  batchId: string;
  jobId: string;
  totalFiles: number;
  successfulResults: number;
  failedCount: number;
  results: AnalysisResultOutput[];
  errors: Array<{ name: string; error: string }>;
 }
--- a/services/voiceprint/src/embedding/EmbeddingService.ts
+++ b/services/voiceprint/src/embedding/EmbeddingService.ts
@@ -0,0 +1,196 @@
 import { spawn } from "child_process";
 const EMBEDDING_DIM = 192;
 const MODEL_VERSION = "ecapa-tdnn-0.1.0-mock";
 export class EmbeddingService {
  private mlServiceUrl: string;
  constructor() {
    this.mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001";
  }
  async extract(audioBuffer: Buffer): Promise<EmbeddingOutput> {
    const mlAvailable = await this.checkMLService();
    if (mlAvailable) {
      return this.extractViaML(audioBuffer);
    }
    return this.extractMock(audioBuffer);
  }
  async classify(embedding: number[]): Promise<number> {
    const mlAvailable = await this.checkMLService();
    if (mlAvailable) {
      return this.classifyViaML(embedding);
    }
    return this.classifyMock(embedding);
  }
  getModelVersion(): string {
    return MODEL_VERSION;
  }
  private async extractViaML(audioBuffer: Buffer): Promise<EmbeddingOutput> {
    return new Promise((resolve, reject) => {
      const jsonInput = audioBuffer.toString("base64");
      const proc = spawn("python3", [
        "-c",
        `
 import urllib.request, json, sys
 req = urllib.request.Request(
    "${this.mlServiceUrl}/embedding",
    data=json.dumps({"audio": "${jsonInput.substring(0, 5000)}"}).encode(),
    headers={"Content-Type": "application/json"}
 )
 try:
    with urllib.request.urlopen(req, timeout=60) as resp:
        data = json.loads(resp.read())
        sys.stdout.write(json.dumps({"ok": True, "vector": data.get("embedding", []), "dim": data.get("dimension", ${EMBEDDING_DIM})}))
 except Exception as e:
    sys.stdout.write(json.dumps({"ok": False, "error": str(e)}))
 `,
      ]);
      let output = "";
      proc.stdout.on("data", (chunk) => { output += chunk.toString(); });
      proc.on("close", (code) => {
        try {
          const result = JSON.parse(output);
          if (result.ok && result.vector.length === EMBEDDING_DIM) {
            resolve({ vector: result.vector, dimension: EMBEDDING_DIM });
          } else {
            resolve(this.generateMockFromBuffer(audioBuffer));
          }
        } catch {
          resolve(this.generateMockFromBuffer(audioBuffer));
        }
      });
    });
  }
  private async classifyViaML(embedding: number[]): Promise<number> {
    return new Promise((resolve) => {
      const proc = spawn("python3", [
        "-c",
        `
 import urllib.request, json, sys
 req = urllib.request.Request(
    "${this.mlServiceUrl}/classify",
    data=json.dumps({"embedding": ${JSON.stringify(embedding)}}).encode(),
    headers={"Content-Type": "application/json"}
 )
 try:
    with urllib.request.urlopen(req, timeout=30) as resp:
        data = json.loads(resp.read())
        sys.stdout.write(json.dumps({"score": data.get("synthetic_score", 0.5)}))
 except:
    sys.stdout.write(json.dumps({"score": 0.5}))
 `,
      ]);
      let output = "";
      proc.stdout.on("data", (chunk) => { output += chunk.toString(); });
      proc.on("close", () => {
        try {
          const result = JSON.parse(output);
          resolve(result.score || 0.5);
        } catch {
          resolve(0.5);
        }
      });
    });
  }
  private async extractMock(audioBuffer: Buffer): Promise<EmbeddingOutput> {
    return this.generateMockFromBuffer(audioBuffer);
  }
  private async classifyMock(embedding: number[]): Promise<number> {
    const mean = embedding.reduce((s, v) => s + v, 0) / embedding.length;
    const variance = embedding.reduce((s, v) => s + (v - mean) ** 2, 0) / embedding.length;
    const stdDev = Math.sqrt(variance);
    const syntheticIndicators = [
      stdDev < 0.1 ? 0.8 : 0.2,
      Math.abs(mean) > 0.5 ? 0.7 : 0.3,
      this.hasArtifacts(embedding) ? 0.9 : 0.1,
    ];
    return syntheticIndicators.reduce((s, v) => s + v, 0) / syntheticIndicators.length;
  }
  private generateMockFromBuffer(audioBuffer: Buffer): EmbeddingOutput {
    const seed = this.computeSeed(audioBuffer);
    const rng = this.createRNG(seed);
    const vector: number[] = [];
    for (let i = 0; i < EMBEDDING_DIM; i++) {
      const u1 = rng();
      const u2 = rng();
      const gauss = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
      vector.push(parseFloat(gauss.toFixed(6)));
    }
    const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
    const normalized = vector.map((v) => parseFloat((v / norm).toFixed(6)));
    return { vector: normalized, dimension: EMBEDDING_DIM };
  }
  private hasArtifacts(embedding: number[]): boolean {
    const window = 16;
    let artifactCount = 0;
    for (let i = 0; i < embedding.length - window; i += window) {
      const slice = embedding.slice(i, i + window);
      const localMean = slice.reduce((s, v) => s + v, 0) / slice.length;
      const localVar = slice.reduce((s, v) => s + (v - localMean) ** 2, 0) / slice.length;
      if (localVar < 0.001) artifactCount++;
    }
    return artifactCount > embedding.length / window / 3;
  }
  private async checkMLService(): Promise<boolean> {
    return new Promise((resolve) => {
      const proc = spawn("python3", [
        "-c",
        `
 import urllib.request, sys
 try:
    urllib.request.urlopen("${this.mlServiceUrl}/health", timeout=2)
    sys.exit(0)
 except:
    sys.exit(1)
 `,
      ]);
      proc.on("close", (code) => resolve(code === 0));
    });
  }
  private computeSeed(buffer: Buffer): number {
    let hash = 0;
    const sampleSize = Math.min(buffer.length, 1024);
    for (let i = 0; i < sampleSize; i += 4) {
      hash = ((hash << 5) - hash + buffer.readInt32LE(i)) | 0;
    }
    return Math.abs(hash);
  }
  private createRNG(seed: number): () => number {
    return () => {
      seed = (seed * 1664525 + 1013904223) & 0xffffffff;
      return (seed >>> 0) / 0xffffffff;
    };
  }
 }
 export interface EmbeddingOutput {
  vector: number[];
  dimension: number;
 }
--- a/services/voiceprint/src/enrollment/VoiceEnrollmentService.ts
+++ b/services/voiceprint/src/enrollment/VoiceEnrollmentService.ts
@@ -0,0 +1,151 @@
 import prisma from "@shieldai/db";
 import { EmbeddingService } from "../embedding/EmbeddingService";
 import { FAISSIndex } from "../indexer/FAISSIndex";
 import { AudioPreprocessor } from "../preprocessor/AudioPreprocessor";
 import { VoiceEnrollmentInput, VoiceEnrollmentOutput } from "@shieldai/types";
 export class VoiceEnrollmentService {
  private embeddingService: EmbeddingService;
  private faissIndex: FAISSIndex;
  private preprocessor: AudioPreprocessor;
  constructor() {
    this.embeddingService = new EmbeddingService();
    this.faissIndex = new FAISSIndex();
    this.preprocessor = new AudioPreprocessor();
  }
  async enroll(input: VoiceEnrollmentInput, userId: string): Promise<VoiceEnrollmentOutput> {
    const preprocessed = await this.preprocessor.preprocess(input.audioBuffer, input.sampleRate);
    const embedding = await this.embeddingService.extract(preprocessed.audio);
    const enrollment = await prisma.voiceEnrollment.create({
      data: {
        userId,
        label: input.label,
        embeddingVector: embedding.vector,
        embeddingDim: embedding.dimension,
        sampleRate: preprocessed.sampleRate,
        durationSec: preprocessed.durationSec,
      },
    });
    await this.faissIndex.add(enrollment.id, embedding.vector);
    return {
      id: enrollment.id,
      label: enrollment.label,
      embeddingDim: enrollment.embeddingDim,
      sampleRate: enrollment.sampleRate,
      durationSec: enrollment.durationSec,
      createdAt: enrollment.createdAt,
    };
  }
  async listEnrollments(userId: string): Promise<VoiceEnrollmentOutput[]> {
    const enrollments = await prisma.voiceEnrollment.findMany({
      where: { userId },
      orderBy: { createdAt: "desc" },
      select: {
        id: true,
        label: true,
        embeddingDim: true,
        sampleRate: true,
        durationSec: true,
        createdAt: true,
      },
    });
    return enrollments.map((e) => ({
      id: e.id,
      label: e.label,
      embeddingDim: e.embeddingDim,
      sampleRate: e.sampleRate,
      durationSec: e.durationSec,
      createdAt: e.createdAt,
    }));
  }
  async removeEnrollment(userId: string, enrollmentId: string): Promise<boolean> {
    const enrollment = await prisma.voiceEnrollment.findFirst({
      where: { id: enrollmentId, userId },
    });
    if (!enrollment) {
      throw new Error(`Enrollment ${enrollmentId} not found for user ${userId}`);
    }
    await prisma.voiceEnrollment.delete({ where: { id: enrollmentId } });
    await this.faissIndex.remove(enrollmentId);
    return true;
  }
  async getEnrollment(enrollmentId: string): Promise<VoiceEnrollmentOutput | null> {
    const enrollment = await prisma.voiceEnrollment.findUnique({
      where: { id: enrollmentId },
      select: {
        id: true,
        label: true,
        embeddingDim: true,
        sampleRate: true,
        durationSec: true,
        createdAt: true,
      },
    });
    if (!enrollment) return null;
    return {
      id: enrollment.id,
      label: enrollment.label,
      embeddingDim: enrollment.embeddingDim,
      sampleRate: enrollment.sampleRate,
      durationSec: enrollment.durationSec,
      createdAt: enrollment.createdAt,
    };
  }
  async matchVoice(
    audioBuffer: Buffer,
    userId: string,
    threshold: number = 0.75
  ): Promise<MatchResult | null> {
    const preprocessed = await this.preprocessor.preprocess(audioBuffer);
    const embedding = await this.embeddingService.extract(preprocessed.audio);
    const matches = await this.faissIndex.search(embedding.vector, 5);
    const enrollmentIds = matches.map((m) => m.id);
    const enrollments = await prisma.voiceEnrollment.findMany({
      where: {
        id: { in: enrollmentIds },
        userId,
      },
    });
    let bestMatch: MatchResult | null = null;
    for (const match of matches) {
      const enrollment = enrollments.find((e) => e.id === match.id);
      if (enrollment && match.similarity >= threshold) {
        if (!bestMatch || match.similarity > bestMatch.similarity) {
          bestMatch = {
            enrollmentId: enrollment.id,
            label: enrollment.label,
            similarity: match.similarity,
          };
        }
      }
    }
    return bestMatch;
  }
 }
 export interface MatchResult {
  enrollmentId: string;
  label: string;
  similarity: number;
 }
--- a/services/voiceprint/src/index.ts
+++ b/services/voiceprint/src/index.ts
@@ -0,0 +1,6 @@
 export * from "./preprocessor/AudioPreprocessor";
 export * from "./enrollment/VoiceEnrollmentService";
 export * from "./analysis/AnalysisService";
 export * from "./analysis/BatchAnalysisService";
 export * from "./embedding/EmbeddingService";
 export * from "./indexer/FAISSIndex";
--- a/services/voiceprint/src/indexer/FAISSIndex.ts
+++ b/services/voiceprint/src/indexer/FAISSIndex.ts
@@ -0,0 +1,86 @@
 export class FAISSIndex {
  private store: Map<string, number[]> = new Map();
  private readonly dimension = 192;
  private initialized = false;
  async initialize(): Promise<void> {
    if (this.initialized) return;
    this.initialized = true;
  }
  async add(id: string, vector: number[]): Promise<void> {
    this.normalizeInPlace(vector);
    this.store.set(id, [...vector]);
  }
  async remove(id: string): Promise<void> {
    this.store.delete(id);
  }
  async search(
    queryVector: number[],
    topK: number = 5
  ): Promise<SearchResult[]> {
    const normalized = [...queryVector];
    this.normalizeInPlace(normalized);
    const scores: Array<{ id: string; similarity: number }> = [];
    for (const [id, vector] of this.store.entries()) {
      const similarity = this.cosineSimilarity(normalized, vector);
      scores.push({ id, similarity });
    }
    scores.sort((a, b) => b.similarity - a.similarity);
    return scores.slice(0, topK).map((s, i) => ({ rank: i + 1, id: s.id, similarity: s.similarity }));
  }
  async count(): Promise<number> {
    return this.store.size;
  }
  async clear(): Promise<void> {
    this.store.clear();
  }
  async loadFromDatabase(): Promise<void> {
    const prisma = (await import("@shieldai/db")).default;
    const enrollments = await prisma.voiceEnrollment.findMany({
      select: { id: true, embeddingVector: true },
    });
    for (const enrollment of enrollments) {
      this.store.set(enrollment.id, enrollment.embeddingVector);
    }
  }
  private cosineSimilarity(a: number[], b: number[]): number {
    let dotProduct = 0;
    let normA = 0;
    let normB = 0;
    for (let i = 0; i < a.length; i++) {
      dotProduct += a[i] * b[i];
      normA += a[i] * a[i];
      normB += b[i] * b[i];
    }
    const denominator = Math.sqrt(normA) * Math.sqrt(normB);
    return denominator > 0 ? dotProduct / denominator : 0;
  }
  private normalizeInPlace(vector: number[]): void {
    const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
    if (norm > 0) {
      for (let i = 0; i < vector.length; i++) {
        vector[i] /= norm;
      }
    }
  }
 }
 export interface SearchResult {
  rank: number;
  id: string;
  similarity: number;
 }
--- a/services/voiceprint/src/preprocessor/AudioPreprocessor.ts
+++ b/services/voiceprint/src/preprocessor/AudioPreprocessor.ts
@@ -0,0 +1,327 @@
 import { spawn } from "child_process";
 import { randomBytes } from "crypto";
 import { tmpdir } from "os";
 import { join } from "path";
 export class AudioPreprocessor {
  private readonly targetSampleRate = 16000;
  private readonly channels = 1;
  async preprocess(audioBuffer: Buffer, inputSampleRate?: number): Promise<PreprocessedAudio> {
    const tempInput = this.writeTempFile(audioBuffer, ".wav");
    const tempOutput = this.writeTempFile(Buffer.alloc(0), ".wav");
    const outputSampleRate = inputSampleRate || this.detectSampleRate(audioBuffer);
    try {
      await this.runPythonPreprocess(tempInput, tempOutput, outputSampleRate);
      const processed = await this.readFile(tempOutput);
      return {
        audio: processed,
        sampleRate: this.targetSampleRate,
        channels: this.channels,
        durationSec: processed.length / (this.targetSampleRate * this.channels * 2),
      };
    } catch (err) {
      const fallback = await this.jsFallback(audioBuffer, outputSampleRate);
      return fallback;
    }
  }
  async preprocessBatch(
    inputs: Array<{ buffer: Buffer; sampleRate?: number }>
  ): Promise<PreprocessedAudio[]> {
    const promises = inputs.map(async (input) => {
      return this.preprocess(input.buffer, input.sampleRate);
    });
    return Promise.all(promises);
  }
  applyVAD(audioBuffer: Buffer, sampleRate: number): Buffer {
    const int16Array = this.bufferToInt16(audioBuffer);
    const silenceThreshold = 500;
    const windowSize = Math.floor(sampleRate * 0.02);
    const segments: number[][] = [];
    let currentSegment: number[] = [];
    for (let i = 0; i < int16Array.length; i += windowSize) {
      const window = int16Array.slice(i, i + windowSize);
      const rms = Math.sqrt(
        window.reduce((sum, val) => sum + val * val, 0) / window.length
      );
      if (rms > silenceThreshold) {
        currentSegment.push(...window);
      } else if (currentSegment.length > 0) {
        segments.push(currentSegment);
        currentSegment = [];
      }
    }
    if (currentSegment.length > 0) {
      segments.push(currentSegment);
    }
    const merged = segments.flat();
    return this.int16ToBuffer(merged);
  }
  normalizeAudio(audioBuffer: Buffer): Buffer {
    const int16Array = this.bufferToInt16(audioBuffer);
    const maxAmplitude = Math.max(...int16Array.map((v) => Math.abs(v)));
    const targetMax = 32767 * 0.95;
    if (maxAmplitude === 0) return audioBuffer;
    const scale = targetMax / maxAmplitude;
    const normalized = int16Array.map((v) => Math.round(v * scale));
    return this.int16ToBuffer(normalized);
  }
  async extractFeatures(audioBuffer: Buffer): Promise<AudioFeatures> {
    const int16Array = this.bufferToInt16(audioBuffer);
    const sampleRate = this.targetSampleRate;
    const windowSize = Math.floor(sampleRate * 0.025);
    const hopLength = Math.floor(sampleRate * 0.01);
    const mfccs: number[][] = [];
    const numCoeffs = 13;
    for (let i = 0; i < int16Array.length - windowSize; i += hopLength) {
      const frame = int16Array.slice(i, i + windowSize);
      const coeffs = this.computeMFCC(frame, numCoeffs);
      mfccs.push(coeffs);
    }
    const zeroCrossingRate = this.computeZCR(int16Array);
    const spectralCentroid = this.computeSpectralCentroid(int16Array);
    const spectralRollOff = this.computeSpectralRollOff(int16Array);
    return {
      mfccs,
      zeroCrossingRate,
      spectralCentroid,
      spectralRollOff,
      durationSec: int16Array.length / sampleRate,
    };
  }
  private async runPythonPreprocess(
    inputPath: string,
    outputPath: string,
    inputSampleRate: number
  ): Promise<void> {
    return new Promise((resolve, reject) => {
      const mlServiceUrl = process.env.VOICEPRINT_ML_URL || "http://localhost:8001";
      const proc = spawn("python3", [
        "-c",
        `
 import urllib.request, json, sys
 req = urllib.request.Request(
    "${mlServiceUrl}/preprocess",
    data=json.dumps({"input_path": "${inputPath}", "output_path": "${outputPath}", "input_sr": ${inputSampleRate}}).encode(),
    headers={"Content-Type": "application/json"}
 )
 try:
    with urllib.request.urlopen(req, timeout=30) as resp:
        sys.exit(0) if resp.status == 200 else sys.exit(1)
 except:
    sys.exit(1)
 `,
      ]);
      proc.on("close", (code) => {
        code === 0 ? resolve() : reject(new Error(`Python preprocess exited with code ${code}`));
      });
    });
  }
  private async jsFallback(
    audioBuffer: Buffer,
    inputSampleRate: number
  ): Promise<PreprocessedAudio> {
    let processed = audioBuffer;
    if (inputSampleRate !== this.targetSampleRate) {
      processed = this.resample(audioBuffer, inputSampleRate, this.targetSampleRate);
    }
    processed = this.applyVAD(processed, this.targetSampleRate);
    processed = this.normalizeAudio(processed);
    return {
      audio: processed,
      sampleRate: this.targetSampleRate,
      channels: this.channels,
      durationSec: processed.length / (this.targetSampleRate * this.channels * 2),
    };
  }
  private resample(buffer: Buffer, fromRate: number, toRate: number): Buffer {
    const int16 = this.bufferToInt16(buffer);
    const ratio = fromRate / toRate;
    const newLength = Math.round(int16.length / ratio);
    const resampled: number[] = [];
    for (let i = 0; i < newLength; i++) {
      const srcIdx = Math.floor(i * ratio);
      const nextIdx = Math.min(srcIdx + 1, int16.length - 1);
      const frac = (i * ratio) - srcIdx;
      resampled.push(Math.round(int16[srcIdx] * (1 - frac) + int16[nextIdx] * frac));
    }
    return this.int16ToBuffer(resampled);
  }
  private detectSampleRate(buffer: Buffer): number {
    if (buffer.length < 44) return 16000;
    const fmtOffset = buffer.toString("ascii", 12, 16).trim() === "fmt " ? 16 : 40;
    if (fmtOffset + 4 <= buffer.length) {
      const sr = buffer.readUInt32LE(fmtOffset + 4);
      if (sr >= 8000 && sr <= 48000) return sr;
    }
    return 16000;
  }
  private bufferToInt16(buffer: Buffer): number[] {
    const arr: number[] = new Array(buffer.length / 2);
    for (let i = 0; i < arr.length; i++) {
      arr[i] = buffer.readInt16LE(i * 2);
    }
    return arr;
  }
  private int16ToBuffer(arr: number[]): Buffer {
    const buf = Buffer.alloc(arr.length * 2);
    for (let i = 0; i < arr.length; i++) {
      buf.writeInt16LE(arr[i], i * 2);
    }
    return buf;
  }
  private computeMFCC(frame: number[], numCoeffs: number): number[] {
    const n = frame.length;
    const fftSize = Math.pow(2, Math.ceil(Math.log2(n)) + 1);
    const spectrum: number[] = new Array(fftSize / 2 + 1);
    for (let k = 0; k < spectrum.length; k++) {
      let real = 0, imag = 0;
      for (let n = 0; n < frame.length; n++) {
        const angle = (2 * Math.PI * k * n) / fftSize;
        real += frame[n] * Math.cos(angle);
        imag -= frame[n] * Math.sin(angle);
      }
      spectrum[k] = Math.sqrt(real * real + imag * imag);
    }
    const numFilters = 20;
    const filterbank = this.createFilterbank(spectrum.length, numFilters);
    const logEnergies: number[] = [];
    for (let m = 0; m < numFilters; m++) {
      let energy = 0;
      for (let k = 0; k < spectrum.length; k++) {
        energy += filterbank[m][k] * spectrum[k] * spectrum[k];
      }
      logEnergies.push(Math.log(Math.max(energy, 1e-10)));
    }
    const mfccs: number[] = [];
    for (let d = 0; d < numCoeffs; d++) {
      let coeff = 0;
      for (let m = 0; m < numFilters; m++) {
        coeff += logEnergies[m] * Math.cos(((2 * m + 1) * (d + 1) * Math.PI) / (2 * numFilters));
      }
      mfccs.push(coeff);
    }
    return mfccs;
  }
  private createFilterbank(numBins: number, numFilters: number): number[][] {
    const filterbank: number[][] = Array.from({ length: numFilters }, () =>
      new Array(numBins).fill(0)
    );
    const low = Math.floor(20 * numBins / 8000);
    const high = Math.floor(5500 * numBins / 8000);
    const spacing = (high - low) / (numFilters + 1);
    for (let m = 1; m <= numFilters; m++) {
      const center = Math.floor(low + m * spacing);
      const prev = Math.floor(low + (m - 1) * spacing);
      const next = Math.floor(low + (m + 1) * spacing);
      for (let k = prev; k < center; k++) {
        if (k > 0) filterbank[m - 1][k] = (k - prev) / (center - prev);
      }
      for (let k = center; k < next; k++) {
        if (next - center > 0) filterbank[m - 1][k] = (next - k) / (next - center);
      }
    }
    return filterbank;
  }
  private computeZCR(int16: number[]): number {
    let crossings = 0;
    for (let i = 1; i < int16.length; i++) {
      if ((int16[i] > 0 && int16[i - 1] <= 0) || (int16[i] <= 0 && int16[i - 1] > 0)) {
        crossings++;
      }
    }
    return crossings / int16.length;
  }
  private computeSpectralCentroid(int16: number[]): number {
    const n = int16.length;
    let num = 0, den = 0;
    for (let i = 0; i < n; i++) {
      num += i * int16[i] * int16[i];
      den += int16[i] * int16[i];
    }
    return den > 0 ? num / den : 0;
  }
  private computeSpectralRollOff(int16: number[]): number {
    const n = int16.length;
    let totalEnergy = 0;
    for (let i = 0; i < n; i++) {
      totalEnergy += int16[i] * int16[i];
    }
    const threshold = totalEnergy * 0.85;
    let cumulative = 0;
    for (let i = 0; i < n; i++) {
      cumulative += int16[i] * int16[i];
      if (cumulative >= threshold) return i;
    }
    return n - 1;
  }
  private writeTempFile(content: Buffer, ext: string): string {
    const file = join(tmpdir(), `vp_${randomBytes(8).toString("hex")}${ext}`);
    require("fs").writeFileSync(file, content);
    return file;
  }
  private async readFile(path: string): Promise<Buffer> {
    return require("fs").readFileSync(path);
  }
 }
 export interface PreprocessedAudio {
  audio: Buffer;
  sampleRate: number;
  channels: number;
  durationSec: number;
 }
 export interface AudioFeatures {
  mfccs: number[][];
  zeroCrossingRate: number;
  spectralCentroid: number;
  spectralRollOff: number;
  durationSec: number;
 }
--- a/services/voiceprint/tsconfig.json
+++ b/services/voiceprint/tsconfig.json
@@ -0,0 +1,8 @@
 {
  "extends": "../../tsconfig.json",
  "compilerOptions": {
    "outDir": "./dist",
    "rootDir": "./src"
  },
  "include": ["src/**/*"]
 }