Files
ShieldAI/services/spamshield/test/regex-validation.test.ts
Michael Freno b01b79d02a Add ReDoS validation for SpamRule.pattern field (FRE-4512)
- Create regex-validation utility with ReDoS detection (nested quantifiers,
  overlapping alternations, complexity limits)
- Add @db.VarChar(500) constraint on pattern field in Prisma schema
- Integrate validation in rule-engine at load time and evaluation time
- Add 46 unit tests covering syntax, ReDoS patterns, complexity, edge cases

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-02 07:23:39 -04:00

266 lines
9.3 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import {
validateRegexPattern,
analyzeRegexComplexity,
isSafeRegexPattern,
RegexValidationError,
} from '../src/utils/regex-validation';
describe('Regex Validation', () => {
describe('validateRegexPattern', () => {
describe('syntax validation', () => {
it('accepts valid simple patterns', () => {
expect(() => validateRegexPattern('\\d{3}-\\d{3}-\\d{4}')).not.toThrow();
});
it('accepts valid phone number patterns', () => {
expect(() => validateRegexPattern('^\\+1\\d{10}$')).not.toThrow();
});
it('accepts valid character class patterns', () => {
expect(() => validateRegexPattern('^[a-zA-Z0-9]+$')).not.toThrow();
});
it('accepts valid alternation patterns', () => {
expect(() => validateRegexPattern('^(phone|fax|mobile)$')).not.toThrow();
});
it('rejects invalid regex syntax', () => {
expect(() => validateRegexPattern('(unclosed')).toThrow(RegexValidationError);
});
it('rejects empty pattern', () => {
expect(() => validateRegexPattern('')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - nested quantifiers', () => {
it('detects nested quantifier (a+)*', () => {
expect(() => validateRegexPattern('(a+)*')).toThrow(RegexValidationError);
});
it('detects nested quantifier (a*)+', () => {
expect(() => validateRegexPattern('(a*)+')).toThrow(RegexValidationError);
});
it('detects nested quantifier ([0-9]+)+', () => {
expect(() => validateRegexPattern('([0-9]+)+')).toThrow(RegexValidationError);
});
it('detects nested quantifier (a|b+)*', () => {
expect(() => validateRegexPattern('(a|b+)*')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - overlapping alternations', () => {
it('detects overlapping alternation (a|aa)*', () => {
expect(() => validateRegexPattern('(a|aa)*')).toThrow(RegexValidationError);
});
it('detects overlapping alternation (abc|ab|a)*', () => {
expect(() => validateRegexPattern('(abc|ab|a)*')).toThrow(RegexValidationError);
});
it('detects overlapping alternation (foo|foot)*', () => {
expect(() => validateRegexPattern('(foo|foot)*')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - complexity limits', () => {
it('rejects pattern exceeding max length', () => {
const longPattern = 'a'.repeat(600);
expect(() => validateRegexPattern(longPattern)).toThrow(RegexValidationError);
});
it('rejects pattern exceeding max nesting depth', () => {
const deepPattern = '((((' + '((((a))))' + '))))';
expect(() => validateRegexPattern(deepPattern, { maxNestingDepth: 3 })).toThrow(RegexValidationError);
});
it('rejects pattern exceeding max alternations', () => {
const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)';
expect(() => validateRegexPattern(manyAlts, { maxAlternations: 5 })).toThrow(RegexValidationError);
});
});
describe('safe patterns', () => {
it('accepts anchored phone pattern', () => {
const metrics = validateRegexPattern('^\\+\\d{1,3}\\d{4,14}$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts simple character class', () => {
const metrics = validateRegexPattern('^[A-Z]{2}\\d{6}$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts non-overlapping alternation', () => {
const metrics = validateRegexPattern('^(spammer|blocker|filter)$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts escaped special characters', () => {
const metrics = validateRegexPattern('\\(\\d{3}\\) \\d{3}-\\d{4}');
expect(metrics.isRedosProne).toBe(false);
});
});
});
describe('analyzeRegexComplexity', () => {
it('returns correct metrics for simple pattern', () => {
const metrics = analyzeRegexComplexity('\\d{3}-\\d{3}-\\d{4}');
expect(metrics.length).toBe(17);
expect(metrics.nestingDepth).toBe(0);
expect(metrics.alternationCount).toBe(0);
expect(metrics.groupCount).toBe(0);
});
it('returns correct metrics for pattern with groups', () => {
const metrics = analyzeRegexComplexity('(\\d{3})-(\\d{3})-(\\d{4})');
expect(metrics.groupCount).toBe(3);
expect(metrics.nestingDepth).toBe(1);
});
it('returns correct metrics for pattern with alternations', () => {
const metrics = analyzeRegexComplexity('(phone|fax|mobile)');
expect(metrics.alternationCount).toBe(2);
});
it('marks nested quantifier as ReDoS prone', () => {
const metrics = analyzeRegexComplexity('(a+)*');
expect(metrics.isRedosProne).toBe(true);
expect(metrics.issues.length).toBeGreaterThan(0);
});
it('marks overlapping alternation as ReDoS prone', () => {
const metrics = analyzeRegexComplexity('(a|aa)*');
expect(metrics.isRedosProne).toBe(true);
});
it('marks long pattern as ReDoS prone', () => {
const longPattern = 'a'.repeat(600);
const metrics = analyzeRegexComplexity(longPattern);
expect(metrics.isRedosProne).toBe(true);
});
it('marks deep nesting as ReDoS prone', () => {
const deepPattern = '((((' + '((((a))))' + '))))';
const metrics = analyzeRegexComplexity(deepPattern, { maxNestingDepth: 3 });
expect(metrics.isRedosProne).toBe(true);
});
it('marks high alternation count as ReDoS prone', () => {
const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)';
const metrics = analyzeRegexComplexity(manyAlts, { maxAlternations: 5 });
expect(metrics.isRedosProne).toBe(true);
});
});
describe('isSafeRegexPattern', () => {
it('returns true for safe patterns', () => {
expect(isSafeRegexPattern('^\\+\\d{1,3}\\d{4,14}$')).toBe(true);
expect(isSafeRegexPattern('^[A-Z]{2}\\d{6}$')).toBe(true);
expect(isSafeRegexPattern('\\d{3}-\\d{3}-\\d{4}')).toBe(true);
});
it('returns false for nested quantifiers', () => {
expect(isSafeRegexPattern('(a+)*')).toBe(false);
expect(isSafeRegexPattern('([0-9]+)+')).toBe(false);
});
it('returns false for overlapping alternations', () => {
expect(isSafeRegexPattern('(a|aa)*')).toBe(false);
expect(isSafeRegexPattern('(foo|foot)*')).toBe(false);
});
it('returns false for invalid syntax', () => {
expect(isSafeRegexPattern('(unclosed')).toBe(false);
});
it('returns false for empty pattern', () => {
expect(isSafeRegexPattern('')).toBe(false);
});
it('returns false for excessively long patterns', () => {
expect(isSafeRegexPattern('a'.repeat(600))).toBe(false);
});
});
describe('RegexValidationError', () => {
it('includes pattern and reason in error', () => {
const pattern = '(a+)*';
try {
validateRegexPattern(pattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.name).toBe('RegexValidationError');
expect(validationErr.pattern).toBe(pattern);
expect(validationErr.reason).toContain('ReDoS risk');
}
});
it('includes syntax error message', () => {
const pattern = '(unclosed';
try {
validateRegexPattern(pattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.pattern).toBe(pattern);
expect(validationErr.reason).toContain('Invalid regex syntax');
}
});
it('includes length error message', () => {
const longPattern = 'a'.repeat(600);
try {
validateRegexPattern(longPattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.reason).toContain('exceeds maximum');
}
});
});
describe('edge cases', () => {
it('handles escaped characters correctly', () => {
expect(() => validateRegexPattern('\\(\\d+\\)')).not.toThrow();
});
it('handles character classes without false positives', () => {
expect(() => validateRegexPattern('[a-z]+')).not.toThrow();
expect(() => validateRegexPattern('[^0-9]+')).not.toThrow();
expect(() => validateRegexPattern('[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+')).not.toThrow();
});
it('handles non-capturing groups', () => {
expect(() => validateRegexPattern('(?:abc)+')).not.toThrow();
});
it('handles lookaheads', () => {
expect(() => validateRegexPattern('(?=\\d{3})\\d+')).not.toThrow();
});
it('handles quantifiers with ranges', () => {
expect(() => validateRegexPattern('\\d{1,3}')).not.toThrow();
expect(() => validateRegexPattern('[a-z]{2,4}')).not.toThrow();
});
it('handles Unicode property escapes', () => {
expect(() => validateRegexPattern('\\p{L}+')).not.toThrow();
});
it('handles multiline and dotall flags in pattern', () => {
expect(() => validateRegexPattern('^.+$')).not.toThrow();
});
it('counts quantifiers correctly', () => {
const metrics = analyzeRegexComplexity('a+b*c?d{2}');
expect(metrics.quantifierCount).toBe(4);
});
it('handles special characters in character classes', () => {
expect(() => validateRegexPattern('[^\\w\\s]+')).not.toThrow();
});
});
});