for first push

2026-04-29 16:29:03 -04:00
parent 218de3b03b
commit 509259bcf2
19 changed files with 1911 additions and 2 deletions
--- a/packages/core/src/audio/webrtc/audio-pipeline.ts
+++ b/packages/core/src/audio/webrtc/audio-pipeline.ts
@@ -0,0 +1,316 @@
+/**
+ * Audio Processing Pipeline for Real-Time Analysis
+ * Coordinates WebRTC stream capture with VoicePreprocess for continuous analysis
+ */
+
+import { WebRTCStreamCapture, createWebRTCCapture } from './stream-capture';
+
+// Type definitions for real-time processing
+export interface AudioChunk {
+  id: string;
+  timestamp: number;
+  data: Float32Array;
+  duration: number;
+}
+
+export interface VoiceprintResult {
+  chunkId: string;
+  timestamp: number;
+  features: AudioFeatures;
+  embedding: number[];
+  confidence: number;
+  status: 'complete' | 'error';
+}
+
+// Audio chunk configuration
+export interface AudioChunkConfig {
+  chunkDuration: number;
+  overlapDuration: number;
+  sampleRate: number;
+}
+
+// Preprocessor interfaces (copied from AudioPreprocessor for standalone usage)
+export interface PreprocessedAudio {
+  audio: Buffer;
+  sampleRate: number;
+  channels: number;
+  durationSec: number;
+}
+
+export interface AudioFeatures {
+  mfccs: number[][];
+  zeroCrossingRate: number;
+  spectralCentroid: number;
+  spectralRollOff: number;
+  durationSec: number;
+}
+
+interface PipelineConfig {
+  chunkDuration: number;     // 5000ms
+  overlapDuration: number;   // 1000ms
+  sampleRate: number;        // 16000 Hz
+  onAnalysisComplete?: (result: VoiceprintResult) => void;
+  onStreamError?: (error: Error) => void;
+  onError?: (error: Error) => void;
+}
+
+const DEFAULT_CONFIG: PipelineConfig = {
+  chunkDuration: 5000,
+  overlapDuration: 1000,
+  sampleRate: 16000
+};
+
+export class AudioPipeline {
+  private streamCapture: WebRTCStreamCapture | null = null;
+  private isRunning: boolean = false;
+  private chunkBuffer: AudioChunk[] = [];
+  private maxBufferLength: number = 10;
+  
+  private onChunkReady?: (chunk: AudioChunk) => void;
+  private onAnalysisComplete?: (result: VoiceprintResult) => void;
+  private onPipelineError?: (error: Error) => void;
+  
+  constructor(private config: PipelineConfig = DEFAULT_CONFIG) {}
+
+  /**
+   * Initialize pipeline components
+   */
+  async initialize(): Promise<void> {
+    // Initialize WebRTC stream capture
+    this.streamCapture = createWebRTCCapture({
+      chunkDuration: this.config.chunkDuration,
+      overlapDuration: this.config.overlapDuration,
+      sampleRate: this.config.sampleRate
+    });
+
+    // Connect WebRTC chunk processing
+    this.streamCapture.onChunkReady = (rawAudio, timestamp) => {
+      this.handleRawChunk(rawAudio, timestamp);
+    };
+
+    // Connect stream error handling
+    this.streamCapture.onStreamError = (error) => {
+      this.onPipelineError?.(error);
+    };
+
+    console.log('[Pipeline] Initialized');
+  }
+
+  /**
+   * Process raw audio chunk from WebRTC
+   */
+  private async handleRawChunk(rawAudio: Float32Array, timestamp: number): Promise<void> {
+    try {
+      // Create audio chunk
+      const chunk: AudioChunk = {
+        id: `chunk-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
+        timestamp,
+        data: rawAudio,
+        duration: this.config.chunkDuration / 1000
+      };
+
+      // Add to buffer
+      this.chunkBuffer.push(chunk);
+
+      // Maintain buffer size
+      if (this.chunkBuffer.length > this.maxBufferLength) {
+        const removed = this.chunkBuffer.shift();
+        if (removed) {
+          // Process removed chunk with overlap
+          await this.processChunk(removed);
+        }
+      }
+
+      // Process current chunk
+      await this.processChunk(chunk);
+
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      console.error('[Pipeline] Error processing chunk:', err);
+      this.onPipelineError?.(err);
+    }
+  }
+
+  /**
+   * Convert Float32Array to 16-bit PCM Buffer
+   */
+  private float32ArrayToBuffer(floatData: Float32Array): Buffer {
+    const int16Array = new Int16Array(floatData.length);
+    for (let i = 0; i < floatData.length; i++) {
+      // Clamp and scale to 16-bit range
+      const clamped = Math.max(-1, Math.min(1, floatData[i]));
+      int16Array[i] = Math.round(clamped * 32767);
+    }
+    return Buffer.from(int16Array.buffer);
+  }
+
+  /**
+   * Process a single audio chunk (mock implementation)
+   */
+  private async processChunk(chunk: AudioChunk): Promise<VoiceprintResult> {
+    try {
+      // Generate mock features
+      // Convert Float32Array to Buffer properly
+      const int16Array = new Int16Array(chunk.data.length);
+      for (let i = 0; i < chunk.data.length; i++) {
+        const clamped = Math.max(-1, Math.min(1, chunk.data[i]));
+        int16Array[i] = Math.round(clamped * 32767);
+      }
+      const dataBuffer = Buffer.from(int16Array.buffer);
+      const features: AudioFeatures = await extractMockFeatures(dataBuffer, chunk.duration);
+
+      // Generate embedding (placeholder - would use actual embedding service)
+      const embedding = this.generatePlaceholderEmbedding(features);
+
+      // Create result
+      const result: VoiceprintResult = {
+        chunkId: chunk.id,
+        timestamp: chunk.timestamp,
+        features,
+        embedding,
+        confidence: 0.95, // Placeholder - would come from actual analysis
+        status: 'complete'
+      };
+
+      // Emit result
+      this.onAnalysisComplete?.(result);
+
+      return result;
+
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      console.error('[Pipeline] Preprocessing error:', err);
+      this.onPipelineError?.(err);
+      throw err;
+    }
+  }
+
+  /**
+   * Generate placeholder embedding (would use actual embedding service)
+   */
+  private generatePlaceholderEmbedding(features: AudioFeatures): number[] {
+    // Placeholder embedding - would be replaced with actual embedding generation
+    const embedding: number[] = [];
+    
+    // Use spectral features as proxy for embedding
+    embedding.push(features.spectralCentroid);
+    embedding.push(features.spectralRollOff);
+    embedding.push(features.zeroCrossingRate);
+    
+    // MFCC summary (first few coefficients)
+    if (features.mfccs && features.mfccs.length > 0) {
+      for (let i = 0; i < Math.min(5, features.mfccs[0].length); i++) {
+        embedding.push(features.mfccs[0][i]);
+      }
+    }
+    
+    // Normalize and pad
+    while (embedding.length < 128) {
+      embedding.push(0);
+    }
+    
+    return embedding;
+  }
+
+  /**
+   * Start the real-time analysis pipeline
+   */
+  async start(): Promise<void> {
+    if (this.isRunning) {
+      console.log('[Pipeline] Already running');
+      return;
+    }
+
+    try {
+      await this.initialize();
+      if (this.streamCapture) {
+        await this.streamCapture.start();
+      }
+      this.isRunning = true;
+
+      console.log('[Pipeline] Real-time analysis started');
+
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      console.error('[Pipeline] Failed to start:', err);
+      this.onPipelineError?.(err);
+      throw err;
+    }
+  }
+
+  /**
+   * Stop the pipeline
+   */
+  async stop(): Promise<void> {
+    if (!this.isRunning) {
+      return;
+    }
+
+    try {
+      // Drain remaining buffer
+      while (this.chunkBuffer.length > 0) {
+        const chunk = this.chunkBuffer.shift();
+        if (chunk) {
+          await this.processChunk(chunk);
+        }
+      }
+
+      // Stop WebRTC capture
+      if (this.streamCapture) {
+        this.streamCapture.stop();
+      }
+
+      this.isRunning = false;
+      console.log('[Pipeline] Stopped');
+
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      console.error('[Pipeline] Error stopping:', err);
+      this.onPipelineError?.(err);
+      throw err;
+    }
+  }
+
+  /**
+   * Get pipeline status
+   */
+  getStatus(): {
+    isRunning: boolean;
+    bufferLength: number;
+    streamActive: boolean;
+  } {
+    return {
+      isRunning: this.isRunning,
+      bufferLength: this.chunkBuffer.length,
+      streamActive: this.streamCapture?.isRecording || false
+    };
+  }
+}
+
+/**
+ * Extract mock features from audio buffer
+ */
+async function extractMockFeatures(buffer: Buffer, duration: number): Promise<AudioFeatures> {
+    // Simple mock feature extraction for demonstration
+    // In production, this would use actual audio processing
+    const numMfccs = 13;
+    const mfccs: number[][] = [];
+    
+    // Generate mock MFCCs based on buffer hash
+    const bufferHash = buffer.reduce((acc, byte) => acc + byte, 0);
+    for (let i = 0; i < numMfccs; i++) {
+      const coefficients: number[] = [];
+      for (let j = 0; j < 20; j++) {
+        coefficients.push(Math.abs(Math.sin((i * j + bufferHash) * 0.1)) * 0.5 + 0.25);
+      }
+      mfccs.push(coefficients);
+    }
+    
+    return {
+      mfccs,
+      zeroCrossingRate: 0.02 + Math.random() * 0.03,
+      spectralCentroid: 1000 + Math.random() * 2000,
+      spectralRollOff: 3000 + Math.random() * 1000,
+      durationSec: duration
+    };
+  }
--- a/packages/core/src/audio/webrtc/stream-capture.ts
+++ b/packages/core/src/audio/webrtc/stream-capture.ts
@@ -0,0 +1,184 @@
+/**
+ * WebRTC Audio Stream Capture
+ * Captures audio from screen/audio sharing using WebRTC APIs
+ * Implements 5-second chunks with 1-second overlap for sliding window analysis
+ */
+
+interface WebRTCStreamConfig {
+  chunkDuration: number;  // 5000ms
+  overlapDuration: number; // 1000ms
+  sampleRate: number;     // 16000 Hz for voiceprint compatibility
+}
+
+const DEFAULT_CONFIG: WebRTCStreamConfig = {
+  chunkDuration: 5000,
+  overlapDuration: 1000,
+  sampleRate: 16000
+};
+
+export class WebRTCStreamCapture {
+  private stream: MediaStream | null = null;
+  private audioContext: AudioContext | null = null;
+  private analyser: AnalyserNode | null = null;
+  private source: MediaStreamAudioSourceNode | null = null;
+  private _isRecording: boolean = false;
+  
+  private buffer: Float32Array = new Float32Array(0);
+  public onChunkReady?: (chunk: Float32Array, timestamp: number) => void;
+  public onStreamError?: (error: Error) => void;
+  
+  constructor(private config: WebRTCStreamConfig = DEFAULT_CONFIG) {}
+
+  /**
+   * Check if currently recording
+   */
+  public isRecording: boolean = false;
+
+  /**
+   * Start capturing audio from screen/audio sharing
+   */
+  async start(): Promise<void> {
+    if (this.isRecording) {
+      console.log('[WebRTC] Already recording');
+      return;
+    }
+
+    try {
+      // Request screen/audio capture with audio
+      this.stream = await navigator.mediaDevices.getDisplayMedia({
+        video: true,      // Required for audio capture
+        audio: true
+      });
+
+      // Stop any existing tracks
+      this.stream.getTracks().forEach(track => track.stop());
+
+      // Create audio context and analyser
+      this.audioContext = new AudioContext({
+        sampleRate: this.config.sampleRate
+      });
+      this.analyser = this.audioContext.createAnalyser();
+      this.analyser.fftSize = 2048;
+
+      // Connect stream to audio graph
+      this.source = this.audioContext.createMediaStreamSource(this.stream);
+      this.source.connect(this.analyser);
+
+      this.isRecording = true;
+      console.log('[WebRTC] Stream capture started');
+
+      // Start processing loop
+      this.processAudio();
+
+      // Handle stream termination
+      this.stream.getVideoTracks()[0].onended = () => {
+        console.log('[WebRTC] User stopped sharing');
+        this.stop();
+      };
+
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      console.error('[WebRTC] Failed to start stream capture:', err);
+      this.onStreamError?.(err);
+      throw err;
+    }
+  }
+
+  /**
+   * Process audio in real-time with sliding window
+   */
+  private processAudio(): void {
+    if (!this.audioContext || !this.analyser || !this.isRecording) return;
+
+    if (!this.analyser) return;
+    
+    const bufferLength = this.analyser.fftSize;
+    const buffer = new Float32Array(bufferLength);
+
+    const processFrame = () => {
+      if (!this.isRecording) return;
+
+      if (!this.analyser) return;
+      
+      this.analyser.getFloatTimeDomainData(buffer);
+
+      // Get current timestamp
+      const timestamp = this.audioContext?.currentTime ?? 0;
+
+      // Extract audio data for current frame
+      // Use first 512 samples for voice analysis (reduced for faster processing)
+      const audioData = buffer.slice(0, 512);
+
+      // Prepare chunk for analysis
+      if (audioData.length > 0) {
+        this.onChunkReady?.(audioData, timestamp);
+      }
+
+      // Schedule next frame with overlap
+      const frameDuration = this.config.chunkDuration - this.config.overlapDuration;
+      setTimeout(processFrame, frameDuration);
+    };
+
+    processFrame();
+  }
+
+  /**
+   * Stop audio capture
+   */
+  stop(): void {
+    this._isRecording = false;
+
+    if (this.stream) {
+      this.stream.getTracks().forEach(track => track.stop());
+      this.stream = null;
+    }
+
+    if (this.source) {
+      this.source.disconnect();
+      this.source = null;
+    }
+
+    if (this.analyser) {
+      this.analyser.disconnect();
+      this.analyser = null;
+    }
+
+    if (this.audioContext) {
+      this.audioContext.close();
+      this.audioContext = null;
+    }
+
+    console.log('[WebRTC] Stream capture stopped');
+  }
+
+  /**
+   * Get stream metadata
+   */
+  getMetadata(): {
+    isActive: boolean;
+    sampleRate: number;
+    channels: number;
+  } {
+    if (!this.stream) {
+      return { isActive: false, sampleRate: 0, channels: 0 };
+    }
+
+    const audioTrack = this.stream.getAudioTracks()[0];
+    if (!audioTrack) {
+      return { isActive: true, sampleRate: this.config.sampleRate, channels: 1 };
+    }
+
+    return {
+      isActive: true,
+      sampleRate: this.config.sampleRate,
+      channels: audioTrack.getSettings().channelCount || 1
+    };
+  }
+}
+
+/**
+ * Factory function for creating stream capture with auto-start
+ */
+export function createWebRTCCapture(config?: WebRTCStreamConfig): WebRTCStreamCapture {
+  return new WebRTCStreamCapture(config || DEFAULT_CONFIG);
+}
--- a/packages/core/tsconfig.json
+++ b/packages/core/tsconfig.json
@@ -0,0 +1,22 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "commonjs",
+    "lib": ["ES2020", "DOM"],
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "resolveJsonModule": true,
+    "moduleResolution": "node",
+    "baseUrl": ".",
+    "paths": {
+      "@/*": ["src/*"]
+    }
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}