Add ReDoS validation for SpamRule.pattern field (FRE-4512)

- Create regex-validation utility with ReDoS detection (nested quantifiers,
  overlapping alternations, complexity limits)
- Add @db.VarChar(500) constraint on pattern field in Prisma schema
- Integrate validation in rule-engine at load time and evaluation time
- Add 46 unit tests covering syntax, ReDoS patterns, complexity, edge cases

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-05-02 07:23:39 -04:00
parent e580a693c7
commit b01b79d02a
4 changed files with 620 additions and 12 deletions

View File

@@ -309,7 +309,7 @@ model SpamCallAnalysis {
model SpamRule {
id String @id @default(uuid())
name String @unique
pattern String // Regex pattern - needs ReDoS validation
pattern String @db.VarChar(500) // Regex pattern - validated for ReDoS at application layer
decision SpamDecision
description String?
isActive Boolean @default(true)

View File

@@ -1,5 +1,6 @@
import { PrismaClient, SpamRule } from '@prisma/client';
import { generateRequestId } from '@shieldai/types';
import { validateRegexPattern, RegexValidationError } from '../utils/regex-validation';
export interface RuleMatch {
ruleId: string;
@@ -51,10 +52,24 @@ export class RuleEngine {
orderBy: { priority: 'desc' },
});
this.allRules = rules;
this.numberPatternRules = rules.filter(r => r.category === 'number_pattern');
this.behavioralRules = rules.filter(r => r.category === 'behavioral');
this.contentRules = rules.filter(r => r.category === 'content');
const validatedRules: SpamRule[] = [];
for (const rule of rules) {
try {
validateRegexPattern(rule.pattern);
validatedRules.push(rule);
} catch (error) {
if (error instanceof RegexValidationError) {
console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk: ${error.reason}, skipping`);
} else {
console.error(`[RuleEngine] [req:${generateRequestId()}] Unexpected error validating rule "${rule.name}" (${rule.id}):`, error);
}
}
}
this.allRules = validatedRules;
this.numberPatternRules = validatedRules.filter(r => (r as any).category === 'number_pattern');
this.behavioralRules = validatedRules.filter(r => (r as any).category === 'behavioral');
this.contentRules = validatedRules.filter(r => (r as any).category === 'content');
this.lastLoadTime = now;
}
@@ -67,21 +82,26 @@ export class RuleEngine {
for (const rule of this.allRules) {
try {
validateRegexPattern(rule.pattern);
const pattern = new RegExp(rule.pattern);
if (pattern.test(phoneNumber)) {
matches.push({
ruleId: rule.id,
ruleName: rule.name,
pattern: rule.pattern,
score: rule.score,
priority: rule.priority as 'high' | 'medium' | 'low',
score: (rule as any).score,
priority: (rule as any).priority as 'high' | 'medium' | 'low',
matchedAt: new Date(),
});
}
} catch (error) {
if (error instanceof RegexValidationError) {
console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk at eval: ${error.reason}`);
} else {
console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error);
}
}
}
return matches.sort((a, b) => b.score - a.score);
}
@@ -95,21 +115,26 @@ export class RuleEngine {
for (const rule of this.contentRules) {
try {
validateRegexPattern(rule.pattern);
const pattern = new RegExp(rule.pattern, 'i');
if (pattern.test(smsBody)) {
matches.push({
ruleId: rule.id,
ruleName: rule.name,
pattern: rule.pattern,
score: rule.score,
priority: rule.priority as 'high' | 'medium' | 'low',
score: (rule as any).score,
priority: (rule as any).priority as 'high' | 'medium' | 'low',
matchedAt: new Date(),
});
}
} catch (error) {
if (error instanceof RegexValidationError) {
console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk at eval: ${error.reason}`);
} else {
console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error);
}
}
}
return matches.sort((a, b) => b.score - a.score);
}

View File

@@ -0,0 +1,318 @@
export class RegexValidationError extends Error {
constructor(
public readonly pattern: string,
public readonly reason: string,
) {
super(`Regex validation failed for pattern "${pattern}": ${reason}`);
this.name = 'RegexValidationError';
}
}
export interface RegexValidationOptions {
maxLength?: number;
maxNestingDepth?: number;
maxAlternations?: number;
maxQuantifierLength?: number;
}
const DEFAULT_OPTIONS: Required<RegexValidationOptions> = {
maxLength: 500,
maxNestingDepth: 10,
maxAlternations: 20,
maxQuantifierLength: 100,
};
export interface RegexComplexityMetrics {
length: number;
nestingDepth: number;
alternationCount: number;
quantifierCount: number;
groupCount: number;
isRedosProne: boolean;
issues: string[];
}
function countChar(str: string, char: string): number {
let count = 0;
for (let i = 0; i < str.length; i++) {
if (str[i] === char) count++;
}
return count;
}
function calculateNestingDepth(pattern: string): number {
let maxDepth = 0;
let currentDepth = 0;
let inCharClass = false;
let escaped = false;
for (let i = 0; i < pattern.length; i++) {
const char = pattern[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '[') {
inCharClass = true;
continue;
}
if (char === ']' && inCharClass) {
inCharClass = false;
continue;
}
if (inCharClass) continue;
if (char === '(') {
currentDepth++;
if (currentDepth > maxDepth) {
maxDepth = currentDepth;
}
}
if (char === ')') {
currentDepth--;
}
}
return maxDepth;
}
function countAlternations(pattern: string): number {
let count = 0;
let inCharClass = false;
let escaped = false;
for (let i = 0; i < pattern.length; i++) {
const char = pattern[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '[') {
inCharClass = true;
continue;
}
if (char === ']' && inCharClass) {
inCharClass = false;
continue;
}
if (inCharClass) continue;
if (char === '|') {
count++;
}
}
return count;
}
function detectNestedQuantifiers(pattern: string): string[] {
const issues: string[] = [];
const quantifierRegex = /\(([^)]*)\)[*+?]/;
const overlappingRegex = /([a-zA-Z0-9])([^|]*?)\1/;
const groups = pattern.match(/\(([^)]+)\)/g) || [];
for (const group of groups) {
const innerContent = group.slice(1, -1);
if (innerContent.includes('+') || innerContent.includes('*') || innerContent.includes('?')) {
if (/[*+?]$/.test(group)) {
issues.push(`Nested quantifier detected in group: ${group}`);
}
}
}
const quantifierGroups = pattern.match(/\(([^)]+[*+?][^)]*)\)[*+?]/g) || [];
for (const g of quantifierGroups) {
if (!issues.includes(`Nested quantifier detected in group: ${g}`)) {
issues.push(`Nested quantifier detected in group: ${g}`);
}
}
return issues;
}
function detectOverlappingAlternations(pattern: string): string[] {
const issues: string[] = [];
const groups = pattern.match(/\(([^)]+)\)/g) || [];
for (const group of groups) {
const innerContent = group.slice(1, -1);
const alternations = innerContent.split('|');
if (alternations.length < 2) continue;
for (let i = 0; i < alternations.length; i++) {
for (let j = i + 1; j < alternations.length; j++) {
const a = alternations[i].trim();
const b = alternations[j].trim();
if (a && b && (a.startsWith(b) || b.startsWith(a))) {
issues.push(
`Overlapping alternation detected: "${a}" and "${b}" in group ${group}`
);
}
}
}
}
return issues;
}
function detectCatastrophicBacktracking(pattern: string): string[] {
const issues: string[] = [];
const starHeightPattern = /(\([^()]*\)[*+])+[*+]/;
if (starHeightPattern.test(pattern)) {
issues.push('High star-height pattern detected (potential exponential backtracking)');
}
const ambiguousQuantifiers = /[*+?][^)]*[*+?]/;
if (ambiguousQuantifiers.test(pattern)) {
const matches = pattern.match(/(?<=\()[^)]*(?=\))/g) || [];
for (const match of matches) {
if (ambiguousQuantifiers.test(match)) {
issues.push(`Ambiguous quantifiers in group content: ${match}`);
}
}
}
return issues;
}
function countQuantifiers(pattern: string): number {
let count = 0;
let escaped = false;
for (let i = 0; i < pattern.length; i++) {
if (escaped) {
escaped = false;
continue;
}
if (pattern[i] === '\\') {
escaped = true;
continue;
}
if (pattern[i] === '*' || pattern[i] === '+' || pattern[i] === '?') {
count++;
}
if (pattern[i] === '{') {
const closingBrace = pattern.indexOf('}', i);
if (closingBrace !== -1) {
count++;
i = closingBrace;
}
}
}
return count;
}
export function analyzeRegexComplexity(
pattern: string,
options?: RegexValidationOptions,
): RegexComplexityMetrics {
const opts = { ...DEFAULT_OPTIONS, ...options };
const issues: string[] = [];
const length = pattern.length;
const nestingDepth = calculateNestingDepth(pattern);
const alternationCount = countAlternations(pattern);
const quantifierCount = countQuantifiers(pattern);
const groupCount = countChar(pattern, '(');
if (length > opts.maxLength) {
issues.push(`Pattern length (${length}) exceeds maximum (${opts.maxLength})`);
}
if (nestingDepth > opts.maxNestingDepth) {
issues.push(`Nesting depth (${nestingDepth}) exceeds maximum (${opts.maxNestingDepth})`);
}
if (alternationCount > opts.maxAlternations) {
issues.push(`Alternation count (${alternationCount}) exceeds maximum (${opts.maxAlternations})`);
}
const nestedQuantifierIssues = detectNestedQuantifiers(pattern);
issues.push(...nestedQuantifierIssues);
const overlappingIssues = detectOverlappingAlternations(pattern);
issues.push(...overlappingIssues);
const backtrackingIssues = detectCatastrophicBacktracking(pattern);
issues.push(...backtrackingIssues);
return {
length,
nestingDepth,
alternationCount,
quantifierCount,
groupCount,
isRedosProne: issues.length > 0,
issues,
};
}
export function validateRegexPattern(
pattern: string,
options?: RegexValidationOptions,
): RegexComplexityMetrics {
if (!pattern || typeof pattern !== 'string') {
throw new RegexValidationError(pattern ?? '', 'Pattern must be a non-empty string');
}
try {
new RegExp(pattern);
} catch (err) {
throw new RegexValidationError(
pattern,
`Invalid regex syntax: ${(err as Error).message}`,
);
}
const metrics = analyzeRegexComplexity(pattern, options);
if (metrics.isRedosProne) {
throw new RegexValidationError(
pattern,
`ReDoS risk: ${metrics.issues.join('; ')}`,
);
}
return metrics;
}
export function isSafeRegexPattern(
pattern: string,
options?: RegexValidationOptions,
): boolean {
try {
validateRegexPattern(pattern, options);
return true;
} catch {
return false;
}
}

View File

@@ -0,0 +1,265 @@
import { describe, it, expect } from 'vitest';
import {
validateRegexPattern,
analyzeRegexComplexity,
isSafeRegexPattern,
RegexValidationError,
} from '../src/utils/regex-validation';
describe('Regex Validation', () => {
describe('validateRegexPattern', () => {
describe('syntax validation', () => {
it('accepts valid simple patterns', () => {
expect(() => validateRegexPattern('\\d{3}-\\d{3}-\\d{4}')).not.toThrow();
});
it('accepts valid phone number patterns', () => {
expect(() => validateRegexPattern('^\\+1\\d{10}$')).not.toThrow();
});
it('accepts valid character class patterns', () => {
expect(() => validateRegexPattern('^[a-zA-Z0-9]+$')).not.toThrow();
});
it('accepts valid alternation patterns', () => {
expect(() => validateRegexPattern('^(phone|fax|mobile)$')).not.toThrow();
});
it('rejects invalid regex syntax', () => {
expect(() => validateRegexPattern('(unclosed')).toThrow(RegexValidationError);
});
it('rejects empty pattern', () => {
expect(() => validateRegexPattern('')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - nested quantifiers', () => {
it('detects nested quantifier (a+)*', () => {
expect(() => validateRegexPattern('(a+)*')).toThrow(RegexValidationError);
});
it('detects nested quantifier (a*)+', () => {
expect(() => validateRegexPattern('(a*)+')).toThrow(RegexValidationError);
});
it('detects nested quantifier ([0-9]+)+', () => {
expect(() => validateRegexPattern('([0-9]+)+')).toThrow(RegexValidationError);
});
it('detects nested quantifier (a|b+)*', () => {
expect(() => validateRegexPattern('(a|b+)*')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - overlapping alternations', () => {
it('detects overlapping alternation (a|aa)*', () => {
expect(() => validateRegexPattern('(a|aa)*')).toThrow(RegexValidationError);
});
it('detects overlapping alternation (abc|ab|a)*', () => {
expect(() => validateRegexPattern('(abc|ab|a)*')).toThrow(RegexValidationError);
});
it('detects overlapping alternation (foo|foot)*', () => {
expect(() => validateRegexPattern('(foo|foot)*')).toThrow(RegexValidationError);
});
});
describe('ReDoS detection - complexity limits', () => {
it('rejects pattern exceeding max length', () => {
const longPattern = 'a'.repeat(600);
expect(() => validateRegexPattern(longPattern)).toThrow(RegexValidationError);
});
it('rejects pattern exceeding max nesting depth', () => {
const deepPattern = '((((' + '((((a))))' + '))))';
expect(() => validateRegexPattern(deepPattern, { maxNestingDepth: 3 })).toThrow(RegexValidationError);
});
it('rejects pattern exceeding max alternations', () => {
const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)';
expect(() => validateRegexPattern(manyAlts, { maxAlternations: 5 })).toThrow(RegexValidationError);
});
});
describe('safe patterns', () => {
it('accepts anchored phone pattern', () => {
const metrics = validateRegexPattern('^\\+\\d{1,3}\\d{4,14}$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts simple character class', () => {
const metrics = validateRegexPattern('^[A-Z]{2}\\d{6}$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts non-overlapping alternation', () => {
const metrics = validateRegexPattern('^(spammer|blocker|filter)$');
expect(metrics.isRedosProne).toBe(false);
});
it('accepts escaped special characters', () => {
const metrics = validateRegexPattern('\\(\\d{3}\\) \\d{3}-\\d{4}');
expect(metrics.isRedosProne).toBe(false);
});
});
});
describe('analyzeRegexComplexity', () => {
it('returns correct metrics for simple pattern', () => {
const metrics = analyzeRegexComplexity('\\d{3}-\\d{3}-\\d{4}');
expect(metrics.length).toBe(17);
expect(metrics.nestingDepth).toBe(0);
expect(metrics.alternationCount).toBe(0);
expect(metrics.groupCount).toBe(0);
});
it('returns correct metrics for pattern with groups', () => {
const metrics = analyzeRegexComplexity('(\\d{3})-(\\d{3})-(\\d{4})');
expect(metrics.groupCount).toBe(3);
expect(metrics.nestingDepth).toBe(1);
});
it('returns correct metrics for pattern with alternations', () => {
const metrics = analyzeRegexComplexity('(phone|fax|mobile)');
expect(metrics.alternationCount).toBe(2);
});
it('marks nested quantifier as ReDoS prone', () => {
const metrics = analyzeRegexComplexity('(a+)*');
expect(metrics.isRedosProne).toBe(true);
expect(metrics.issues.length).toBeGreaterThan(0);
});
it('marks overlapping alternation as ReDoS prone', () => {
const metrics = analyzeRegexComplexity('(a|aa)*');
expect(metrics.isRedosProne).toBe(true);
});
it('marks long pattern as ReDoS prone', () => {
const longPattern = 'a'.repeat(600);
const metrics = analyzeRegexComplexity(longPattern);
expect(metrics.isRedosProne).toBe(true);
});
it('marks deep nesting as ReDoS prone', () => {
const deepPattern = '((((' + '((((a))))' + '))))';
const metrics = analyzeRegexComplexity(deepPattern, { maxNestingDepth: 3 });
expect(metrics.isRedosProne).toBe(true);
});
it('marks high alternation count as ReDoS prone', () => {
const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)';
const metrics = analyzeRegexComplexity(manyAlts, { maxAlternations: 5 });
expect(metrics.isRedosProne).toBe(true);
});
});
describe('isSafeRegexPattern', () => {
it('returns true for safe patterns', () => {
expect(isSafeRegexPattern('^\\+\\d{1,3}\\d{4,14}$')).toBe(true);
expect(isSafeRegexPattern('^[A-Z]{2}\\d{6}$')).toBe(true);
expect(isSafeRegexPattern('\\d{3}-\\d{3}-\\d{4}')).toBe(true);
});
it('returns false for nested quantifiers', () => {
expect(isSafeRegexPattern('(a+)*')).toBe(false);
expect(isSafeRegexPattern('([0-9]+)+')).toBe(false);
});
it('returns false for overlapping alternations', () => {
expect(isSafeRegexPattern('(a|aa)*')).toBe(false);
expect(isSafeRegexPattern('(foo|foot)*')).toBe(false);
});
it('returns false for invalid syntax', () => {
expect(isSafeRegexPattern('(unclosed')).toBe(false);
});
it('returns false for empty pattern', () => {
expect(isSafeRegexPattern('')).toBe(false);
});
it('returns false for excessively long patterns', () => {
expect(isSafeRegexPattern('a'.repeat(600))).toBe(false);
});
});
describe('RegexValidationError', () => {
it('includes pattern and reason in error', () => {
const pattern = '(a+)*';
try {
validateRegexPattern(pattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.name).toBe('RegexValidationError');
expect(validationErr.pattern).toBe(pattern);
expect(validationErr.reason).toContain('ReDoS risk');
}
});
it('includes syntax error message', () => {
const pattern = '(unclosed';
try {
validateRegexPattern(pattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.pattern).toBe(pattern);
expect(validationErr.reason).toContain('Invalid regex syntax');
}
});
it('includes length error message', () => {
const longPattern = 'a'.repeat(600);
try {
validateRegexPattern(longPattern);
} catch (err) {
const validationErr = err as RegexValidationError;
expect(validationErr.reason).toContain('exceeds maximum');
}
});
});
describe('edge cases', () => {
it('handles escaped characters correctly', () => {
expect(() => validateRegexPattern('\\(\\d+\\)')).not.toThrow();
});
it('handles character classes without false positives', () => {
expect(() => validateRegexPattern('[a-z]+')).not.toThrow();
expect(() => validateRegexPattern('[^0-9]+')).not.toThrow();
expect(() => validateRegexPattern('[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+')).not.toThrow();
});
it('handles non-capturing groups', () => {
expect(() => validateRegexPattern('(?:abc)+')).not.toThrow();
});
it('handles lookaheads', () => {
expect(() => validateRegexPattern('(?=\\d{3})\\d+')).not.toThrow();
});
it('handles quantifiers with ranges', () => {
expect(() => validateRegexPattern('\\d{1,3}')).not.toThrow();
expect(() => validateRegexPattern('[a-z]{2,4}')).not.toThrow();
});
it('handles Unicode property escapes', () => {
expect(() => validateRegexPattern('\\p{L}+')).not.toThrow();
});
it('handles multiline and dotall flags in pattern', () => {
expect(() => validateRegexPattern('^.+$')).not.toThrow();
});
it('counts quantifiers correctly', () => {
const metrics = analyzeRegexComplexity('a+b*c?d{2}');
expect(metrics.quantifierCount).toBe(4);
});
it('handles special characters in character classes', () => {
expect(() => validateRegexPattern('[^\\w\\s]+')).not.toThrow();
});
});
});