web security audit fixes

This commit is contained in:
2026-06-02 10:30:42 -04:00
parent 36b087ae92
commit ab0d4857db
26 changed files with 1527 additions and 289 deletions

View File

@@ -41,6 +41,11 @@ interface WavHeader {
/** Maximum allowed audio duration in seconds */
const MAX_DURATION_SEC = 30;
/** Maximum raw WAV file size before processing (default 5MB). Prevents memory exhaustion. */
const MAX_INPUT_BYTES = parseInt(
process.env.VOICEPRINT_MAX_INPUT_BYTES ?? "5242880",
10,
);
/** Target normalization level in dBFS */
const TARGET_DBFS = -3;
/** Frame size for VAD in milliseconds */
@@ -359,16 +364,27 @@ function computeQualityMetrics(samples: Float64Array): {
/**
* Main audio preprocessing pipeline:
* 1. Parse WAV header
* 2. Read PCM samples
* 3. Convert to mono
* 4. Resample to 16kHz
* 5. Normalize to -3 dBFS
* 6. VAD silence trimming
* 7. Limit to 30 seconds
* 8. Convert to 16-bit PCM
* 1. Validate input size
* 2. Parse WAV header
* 3. Validate duration from header (reject too-long audio before decoding)
* 4. Read PCM samples
* 5. Convert to mono
* 6. Resample to 16kHz
* 7. Normalize to -3 dBFS
* 8. VAD silence trimming
* 9. Limit to 30 seconds
* 10. Convert to 16-bit PCM
*/
export async function preprocessAudio(inputBuffer: Buffer): Promise<ProcessedAudio> {
// Reject oversized input early to prevent memory exhaustion
if (inputBuffer.length > MAX_INPUT_BYTES) {
throw new Error(
`Audio file too large: ${(inputBuffer.length / 1024 / 1024).toFixed(1)}MB. ` +
`Maximum ${(MAX_INPUT_BYTES / 1024 / 1024).toFixed(0)}MB. ` +
`Please upload a shorter audio clip (max ${MAX_DURATION_SEC} seconds).`,
);
}
// Detect if it's a WAV by checking RIFF header
const isWav =
inputBuffer.length >= 4 &&
@@ -382,6 +398,17 @@ export async function preprocessAudio(inputBuffer: Buffer): Promise<ProcessedAud
}
const { header, dataOffset } = parseWavHeader(inputBuffer);
// Validate duration from header BEFORE allocating sample buffers.
// This prevents loading multi-hour WAV files into memory.
const totalSamples = Math.floor(header.dataSize / (header.bitsPerSample / 8) / header.numChannels);
const durationSec = totalSamples / header.sampleRate;
if (durationSec > MAX_DURATION_SEC + 30) {
throw new Error(
`Audio too long: ${durationSec.toFixed(1)}s. Maximum ${MAX_DURATION_SEC}s for analysis. ` +
`Please trim your audio before uploading.`,
);
}
let samples = readPcmSamples(inputBuffer, header, dataOffset);
// Convert to mono