for first push
This commit is contained in:
316
packages/core/src/audio/webrtc/audio-pipeline.ts
Normal file
316
packages/core/src/audio/webrtc/audio-pipeline.ts
Normal file
@@ -0,0 +1,316 @@
|
||||
/**
|
||||
* Audio Processing Pipeline for Real-Time Analysis
|
||||
* Coordinates WebRTC stream capture with VoicePreprocess for continuous analysis
|
||||
*/
|
||||
|
||||
import { WebRTCStreamCapture, createWebRTCCapture } from './stream-capture';
|
||||
|
||||
// Type definitions for real-time processing
|
||||
export interface AudioChunk {
|
||||
id: string;
|
||||
timestamp: number;
|
||||
data: Float32Array;
|
||||
duration: number;
|
||||
}
|
||||
|
||||
export interface VoiceprintResult {
|
||||
chunkId: string;
|
||||
timestamp: number;
|
||||
features: AudioFeatures;
|
||||
embedding: number[];
|
||||
confidence: number;
|
||||
status: 'complete' | 'error';
|
||||
}
|
||||
|
||||
// Audio chunk configuration
|
||||
export interface AudioChunkConfig {
|
||||
chunkDuration: number;
|
||||
overlapDuration: number;
|
||||
sampleRate: number;
|
||||
}
|
||||
|
||||
// Preprocessor interfaces (copied from AudioPreprocessor for standalone usage)
|
||||
export interface PreprocessedAudio {
|
||||
audio: Buffer;
|
||||
sampleRate: number;
|
||||
channels: number;
|
||||
durationSec: number;
|
||||
}
|
||||
|
||||
export interface AudioFeatures {
|
||||
mfccs: number[][];
|
||||
zeroCrossingRate: number;
|
||||
spectralCentroid: number;
|
||||
spectralRollOff: number;
|
||||
durationSec: number;
|
||||
}
|
||||
|
||||
interface PipelineConfig {
|
||||
chunkDuration: number; // 5000ms
|
||||
overlapDuration: number; // 1000ms
|
||||
sampleRate: number; // 16000 Hz
|
||||
onAnalysisComplete?: (result: VoiceprintResult) => void;
|
||||
onStreamError?: (error: Error) => void;
|
||||
onError?: (error: Error) => void;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: PipelineConfig = {
|
||||
chunkDuration: 5000,
|
||||
overlapDuration: 1000,
|
||||
sampleRate: 16000
|
||||
};
|
||||
|
||||
export class AudioPipeline {
|
||||
private streamCapture: WebRTCStreamCapture | null = null;
|
||||
private isRunning: boolean = false;
|
||||
private chunkBuffer: AudioChunk[] = [];
|
||||
private maxBufferLength: number = 10;
|
||||
|
||||
private onChunkReady?: (chunk: AudioChunk) => void;
|
||||
private onAnalysisComplete?: (result: VoiceprintResult) => void;
|
||||
private onPipelineError?: (error: Error) => void;
|
||||
|
||||
constructor(private config: PipelineConfig = DEFAULT_CONFIG) {}
|
||||
|
||||
/**
|
||||
* Initialize pipeline components
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
// Initialize WebRTC stream capture
|
||||
this.streamCapture = createWebRTCCapture({
|
||||
chunkDuration: this.config.chunkDuration,
|
||||
overlapDuration: this.config.overlapDuration,
|
||||
sampleRate: this.config.sampleRate
|
||||
});
|
||||
|
||||
// Connect WebRTC chunk processing
|
||||
this.streamCapture.onChunkReady = (rawAudio, timestamp) => {
|
||||
this.handleRawChunk(rawAudio, timestamp);
|
||||
};
|
||||
|
||||
// Connect stream error handling
|
||||
this.streamCapture.onStreamError = (error) => {
|
||||
this.onPipelineError?.(error);
|
||||
};
|
||||
|
||||
console.log('[Pipeline] Initialized');
|
||||
}
|
||||
|
||||
/**
|
||||
* Process raw audio chunk from WebRTC
|
||||
*/
|
||||
private async handleRawChunk(rawAudio: Float32Array, timestamp: number): Promise<void> {
|
||||
try {
|
||||
// Create audio chunk
|
||||
const chunk: AudioChunk = {
|
||||
id: `chunk-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
timestamp,
|
||||
data: rawAudio,
|
||||
duration: this.config.chunkDuration / 1000
|
||||
};
|
||||
|
||||
// Add to buffer
|
||||
this.chunkBuffer.push(chunk);
|
||||
|
||||
// Maintain buffer size
|
||||
if (this.chunkBuffer.length > this.maxBufferLength) {
|
||||
const removed = this.chunkBuffer.shift();
|
||||
if (removed) {
|
||||
// Process removed chunk with overlap
|
||||
await this.processChunk(removed);
|
||||
}
|
||||
}
|
||||
|
||||
// Process current chunk
|
||||
await this.processChunk(chunk);
|
||||
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
console.error('[Pipeline] Error processing chunk:', err);
|
||||
this.onPipelineError?.(err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Float32Array to 16-bit PCM Buffer
|
||||
*/
|
||||
private float32ArrayToBuffer(floatData: Float32Array): Buffer {
|
||||
const int16Array = new Int16Array(floatData.length);
|
||||
for (let i = 0; i < floatData.length; i++) {
|
||||
// Clamp and scale to 16-bit range
|
||||
const clamped = Math.max(-1, Math.min(1, floatData[i]));
|
||||
int16Array[i] = Math.round(clamped * 32767);
|
||||
}
|
||||
return Buffer.from(int16Array.buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single audio chunk (mock implementation)
|
||||
*/
|
||||
private async processChunk(chunk: AudioChunk): Promise<VoiceprintResult> {
|
||||
try {
|
||||
// Generate mock features
|
||||
// Convert Float32Array to Buffer properly
|
||||
const int16Array = new Int16Array(chunk.data.length);
|
||||
for (let i = 0; i < chunk.data.length; i++) {
|
||||
const clamped = Math.max(-1, Math.min(1, chunk.data[i]));
|
||||
int16Array[i] = Math.round(clamped * 32767);
|
||||
}
|
||||
const dataBuffer = Buffer.from(int16Array.buffer);
|
||||
const features: AudioFeatures = await extractMockFeatures(dataBuffer, chunk.duration);
|
||||
|
||||
// Generate embedding (placeholder - would use actual embedding service)
|
||||
const embedding = this.generatePlaceholderEmbedding(features);
|
||||
|
||||
// Create result
|
||||
const result: VoiceprintResult = {
|
||||
chunkId: chunk.id,
|
||||
timestamp: chunk.timestamp,
|
||||
features,
|
||||
embedding,
|
||||
confidence: 0.95, // Placeholder - would come from actual analysis
|
||||
status: 'complete'
|
||||
};
|
||||
|
||||
// Emit result
|
||||
this.onAnalysisComplete?.(result);
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
console.error('[Pipeline] Preprocessing error:', err);
|
||||
this.onPipelineError?.(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate placeholder embedding (would use actual embedding service)
|
||||
*/
|
||||
private generatePlaceholderEmbedding(features: AudioFeatures): number[] {
|
||||
// Placeholder embedding - would be replaced with actual embedding generation
|
||||
const embedding: number[] = [];
|
||||
|
||||
// Use spectral features as proxy for embedding
|
||||
embedding.push(features.spectralCentroid);
|
||||
embedding.push(features.spectralRollOff);
|
||||
embedding.push(features.zeroCrossingRate);
|
||||
|
||||
// MFCC summary (first few coefficients)
|
||||
if (features.mfccs && features.mfccs.length > 0) {
|
||||
for (let i = 0; i < Math.min(5, features.mfccs[0].length); i++) {
|
||||
embedding.push(features.mfccs[0][i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize and pad
|
||||
while (embedding.length < 128) {
|
||||
embedding.push(0);
|
||||
}
|
||||
|
||||
return embedding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the real-time analysis pipeline
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (this.isRunning) {
|
||||
console.log('[Pipeline] Already running');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.initialize();
|
||||
if (this.streamCapture) {
|
||||
await this.streamCapture.start();
|
||||
}
|
||||
this.isRunning = true;
|
||||
|
||||
console.log('[Pipeline] Real-time analysis started');
|
||||
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
console.error('[Pipeline] Failed to start:', err);
|
||||
this.onPipelineError?.(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the pipeline
|
||||
*/
|
||||
async stop(): Promise<void> {
|
||||
if (!this.isRunning) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Drain remaining buffer
|
||||
while (this.chunkBuffer.length > 0) {
|
||||
const chunk = this.chunkBuffer.shift();
|
||||
if (chunk) {
|
||||
await this.processChunk(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
// Stop WebRTC capture
|
||||
if (this.streamCapture) {
|
||||
this.streamCapture.stop();
|
||||
}
|
||||
|
||||
this.isRunning = false;
|
||||
console.log('[Pipeline] Stopped');
|
||||
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
console.error('[Pipeline] Error stopping:', err);
|
||||
this.onPipelineError?.(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get pipeline status
|
||||
*/
|
||||
getStatus(): {
|
||||
isRunning: boolean;
|
||||
bufferLength: number;
|
||||
streamActive: boolean;
|
||||
} {
|
||||
return {
|
||||
isRunning: this.isRunning,
|
||||
bufferLength: this.chunkBuffer.length,
|
||||
streamActive: this.streamCapture?.isRecording || false
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract mock features from audio buffer
|
||||
*/
|
||||
async function extractMockFeatures(buffer: Buffer, duration: number): Promise<AudioFeatures> {
|
||||
// Simple mock feature extraction for demonstration
|
||||
// In production, this would use actual audio processing
|
||||
const numMfccs = 13;
|
||||
const mfccs: number[][] = [];
|
||||
|
||||
// Generate mock MFCCs based on buffer hash
|
||||
const bufferHash = buffer.reduce((acc, byte) => acc + byte, 0);
|
||||
for (let i = 0; i < numMfccs; i++) {
|
||||
const coefficients: number[] = [];
|
||||
for (let j = 0; j < 20; j++) {
|
||||
coefficients.push(Math.abs(Math.sin((i * j + bufferHash) * 0.1)) * 0.5 + 0.25);
|
||||
}
|
||||
mfccs.push(coefficients);
|
||||
}
|
||||
|
||||
return {
|
||||
mfccs,
|
||||
zeroCrossingRate: 0.02 + Math.random() * 0.03,
|
||||
spectralCentroid: 1000 + Math.random() * 2000,
|
||||
spectralRollOff: 3000 + Math.random() * 1000,
|
||||
durationSec: duration
|
||||
};
|
||||
}
|
||||
184
packages/core/src/audio/webrtc/stream-capture.ts
Normal file
184
packages/core/src/audio/webrtc/stream-capture.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* WebRTC Audio Stream Capture
|
||||
* Captures audio from screen/audio sharing using WebRTC APIs
|
||||
* Implements 5-second chunks with 1-second overlap for sliding window analysis
|
||||
*/
|
||||
|
||||
interface WebRTCStreamConfig {
|
||||
chunkDuration: number; // 5000ms
|
||||
overlapDuration: number; // 1000ms
|
||||
sampleRate: number; // 16000 Hz for voiceprint compatibility
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: WebRTCStreamConfig = {
|
||||
chunkDuration: 5000,
|
||||
overlapDuration: 1000,
|
||||
sampleRate: 16000
|
||||
};
|
||||
|
||||
export class WebRTCStreamCapture {
|
||||
private stream: MediaStream | null = null;
|
||||
private audioContext: AudioContext | null = null;
|
||||
private analyser: AnalyserNode | null = null;
|
||||
private source: MediaStreamAudioSourceNode | null = null;
|
||||
private _isRecording: boolean = false;
|
||||
|
||||
private buffer: Float32Array = new Float32Array(0);
|
||||
public onChunkReady?: (chunk: Float32Array, timestamp: number) => void;
|
||||
public onStreamError?: (error: Error) => void;
|
||||
|
||||
constructor(private config: WebRTCStreamConfig = DEFAULT_CONFIG) {}
|
||||
|
||||
/**
|
||||
* Check if currently recording
|
||||
*/
|
||||
public isRecording: boolean = false;
|
||||
|
||||
/**
|
||||
* Start capturing audio from screen/audio sharing
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (this.isRecording) {
|
||||
console.log('[WebRTC] Already recording');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Request screen/audio capture with audio
|
||||
this.stream = await navigator.mediaDevices.getDisplayMedia({
|
||||
video: true, // Required for audio capture
|
||||
audio: true
|
||||
});
|
||||
|
||||
// Stop any existing tracks
|
||||
this.stream.getTracks().forEach(track => track.stop());
|
||||
|
||||
// Create audio context and analyser
|
||||
this.audioContext = new AudioContext({
|
||||
sampleRate: this.config.sampleRate
|
||||
});
|
||||
this.analyser = this.audioContext.createAnalyser();
|
||||
this.analyser.fftSize = 2048;
|
||||
|
||||
// Connect stream to audio graph
|
||||
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
||||
this.source.connect(this.analyser);
|
||||
|
||||
this.isRecording = true;
|
||||
console.log('[WebRTC] Stream capture started');
|
||||
|
||||
// Start processing loop
|
||||
this.processAudio();
|
||||
|
||||
// Handle stream termination
|
||||
this.stream.getVideoTracks()[0].onended = () => {
|
||||
console.log('[WebRTC] User stopped sharing');
|
||||
this.stop();
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
console.error('[WebRTC] Failed to start stream capture:', err);
|
||||
this.onStreamError?.(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process audio in real-time with sliding window
|
||||
*/
|
||||
private processAudio(): void {
|
||||
if (!this.audioContext || !this.analyser || !this.isRecording) return;
|
||||
|
||||
if (!this.analyser) return;
|
||||
|
||||
const bufferLength = this.analyser.fftSize;
|
||||
const buffer = new Float32Array(bufferLength);
|
||||
|
||||
const processFrame = () => {
|
||||
if (!this.isRecording) return;
|
||||
|
||||
if (!this.analyser) return;
|
||||
|
||||
this.analyser.getFloatTimeDomainData(buffer);
|
||||
|
||||
// Get current timestamp
|
||||
const timestamp = this.audioContext?.currentTime ?? 0;
|
||||
|
||||
// Extract audio data for current frame
|
||||
// Use first 512 samples for voice analysis (reduced for faster processing)
|
||||
const audioData = buffer.slice(0, 512);
|
||||
|
||||
// Prepare chunk for analysis
|
||||
if (audioData.length > 0) {
|
||||
this.onChunkReady?.(audioData, timestamp);
|
||||
}
|
||||
|
||||
// Schedule next frame with overlap
|
||||
const frameDuration = this.config.chunkDuration - this.config.overlapDuration;
|
||||
setTimeout(processFrame, frameDuration);
|
||||
};
|
||||
|
||||
processFrame();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop audio capture
|
||||
*/
|
||||
stop(): void {
|
||||
this._isRecording = false;
|
||||
|
||||
if (this.stream) {
|
||||
this.stream.getTracks().forEach(track => track.stop());
|
||||
this.stream = null;
|
||||
}
|
||||
|
||||
if (this.source) {
|
||||
this.source.disconnect();
|
||||
this.source = null;
|
||||
}
|
||||
|
||||
if (this.analyser) {
|
||||
this.analyser.disconnect();
|
||||
this.analyser = null;
|
||||
}
|
||||
|
||||
if (this.audioContext) {
|
||||
this.audioContext.close();
|
||||
this.audioContext = null;
|
||||
}
|
||||
|
||||
console.log('[WebRTC] Stream capture stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stream metadata
|
||||
*/
|
||||
getMetadata(): {
|
||||
isActive: boolean;
|
||||
sampleRate: number;
|
||||
channels: number;
|
||||
} {
|
||||
if (!this.stream) {
|
||||
return { isActive: false, sampleRate: 0, channels: 0 };
|
||||
}
|
||||
|
||||
const audioTrack = this.stream.getAudioTracks()[0];
|
||||
if (!audioTrack) {
|
||||
return { isActive: true, sampleRate: this.config.sampleRate, channels: 1 };
|
||||
}
|
||||
|
||||
return {
|
||||
isActive: true,
|
||||
sampleRate: this.config.sampleRate,
|
||||
channels: audioTrack.getSettings().channelCount || 1
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory function for creating stream capture with auto-start
|
||||
*/
|
||||
export function createWebRTCCapture(config?: WebRTCStreamConfig): WebRTCStreamCapture {
|
||||
return new WebRTCStreamCapture(config || DEFAULT_CONFIG);
|
||||
}
|
||||
22
packages/core/tsconfig.json
Normal file
22
packages/core/tsconfig.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"module": "commonjs",
|
||||
"lib": ["ES2020", "DOM"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"resolveJsonModule": true,
|
||||
"moduleResolution": "node",
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user