web security audit fixes
This commit is contained in:
@@ -41,6 +41,11 @@ interface WavHeader {
|
||||
|
||||
/** Maximum allowed audio duration in seconds */
|
||||
const MAX_DURATION_SEC = 30;
|
||||
/** Maximum raw WAV file size before processing (default 5MB). Prevents memory exhaustion. */
|
||||
const MAX_INPUT_BYTES = parseInt(
|
||||
process.env.VOICEPRINT_MAX_INPUT_BYTES ?? "5242880",
|
||||
10,
|
||||
);
|
||||
/** Target normalization level in dBFS */
|
||||
const TARGET_DBFS = -3;
|
||||
/** Frame size for VAD in milliseconds */
|
||||
@@ -359,16 +364,27 @@ function computeQualityMetrics(samples: Float64Array): {
|
||||
|
||||
/**
|
||||
* Main audio preprocessing pipeline:
|
||||
* 1. Parse WAV header
|
||||
* 2. Read PCM samples
|
||||
* 3. Convert to mono
|
||||
* 4. Resample to 16kHz
|
||||
* 5. Normalize to -3 dBFS
|
||||
* 6. VAD silence trimming
|
||||
* 7. Limit to 30 seconds
|
||||
* 8. Convert to 16-bit PCM
|
||||
* 1. Validate input size
|
||||
* 2. Parse WAV header
|
||||
* 3. Validate duration from header (reject too-long audio before decoding)
|
||||
* 4. Read PCM samples
|
||||
* 5. Convert to mono
|
||||
* 6. Resample to 16kHz
|
||||
* 7. Normalize to -3 dBFS
|
||||
* 8. VAD silence trimming
|
||||
* 9. Limit to 30 seconds
|
||||
* 10. Convert to 16-bit PCM
|
||||
*/
|
||||
export async function preprocessAudio(inputBuffer: Buffer): Promise<ProcessedAudio> {
|
||||
// Reject oversized input early to prevent memory exhaustion
|
||||
if (inputBuffer.length > MAX_INPUT_BYTES) {
|
||||
throw new Error(
|
||||
`Audio file too large: ${(inputBuffer.length / 1024 / 1024).toFixed(1)}MB. ` +
|
||||
`Maximum ${(MAX_INPUT_BYTES / 1024 / 1024).toFixed(0)}MB. ` +
|
||||
`Please upload a shorter audio clip (max ${MAX_DURATION_SEC} seconds).`,
|
||||
);
|
||||
}
|
||||
|
||||
// Detect if it's a WAV by checking RIFF header
|
||||
const isWav =
|
||||
inputBuffer.length >= 4 &&
|
||||
@@ -382,6 +398,17 @@ export async function preprocessAudio(inputBuffer: Buffer): Promise<ProcessedAud
|
||||
}
|
||||
|
||||
const { header, dataOffset } = parseWavHeader(inputBuffer);
|
||||
|
||||
// Validate duration from header BEFORE allocating sample buffers.
|
||||
// This prevents loading multi-hour WAV files into memory.
|
||||
const totalSamples = Math.floor(header.dataSize / (header.bitsPerSample / 8) / header.numChannels);
|
||||
const durationSec = totalSamples / header.sampleRate;
|
||||
if (durationSec > MAX_DURATION_SEC + 30) {
|
||||
throw new Error(
|
||||
`Audio too long: ${durationSec.toFixed(1)}s. Maximum ${MAX_DURATION_SEC}s for analysis. ` +
|
||||
`Please trim your audio before uploading.`,
|
||||
);
|
||||
}
|
||||
let samples = readPcmSamples(inputBuffer, header, dataOffset);
|
||||
|
||||
// Convert to mono
|
||||
|
||||
Reference in New Issue
Block a user