Add ReDoS validation for SpamRule.pattern field (FRE-4512)
- Create regex-validation utility with ReDoS detection (nested quantifiers, overlapping alternations, complexity limits) - Add @db.VarChar(500) constraint on pattern field in Prisma schema - Integrate validation in rule-engine at load time and evaluation time - Add 46 unit tests covering syntax, ReDoS patterns, complexity, edge cases Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
318
services/spamshield/src/utils/regex-validation.ts
Normal file
318
services/spamshield/src/utils/regex-validation.ts
Normal file
@@ -0,0 +1,318 @@
|
||||
export class RegexValidationError extends Error {
|
||||
constructor(
|
||||
public readonly pattern: string,
|
||||
public readonly reason: string,
|
||||
) {
|
||||
super(`Regex validation failed for pattern "${pattern}": ${reason}`);
|
||||
this.name = 'RegexValidationError';
|
||||
}
|
||||
}
|
||||
|
||||
export interface RegexValidationOptions {
|
||||
maxLength?: number;
|
||||
maxNestingDepth?: number;
|
||||
maxAlternations?: number;
|
||||
maxQuantifierLength?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: Required<RegexValidationOptions> = {
|
||||
maxLength: 500,
|
||||
maxNestingDepth: 10,
|
||||
maxAlternations: 20,
|
||||
maxQuantifierLength: 100,
|
||||
};
|
||||
|
||||
export interface RegexComplexityMetrics {
|
||||
length: number;
|
||||
nestingDepth: number;
|
||||
alternationCount: number;
|
||||
quantifierCount: number;
|
||||
groupCount: number;
|
||||
isRedosProne: boolean;
|
||||
issues: string[];
|
||||
}
|
||||
|
||||
function countChar(str: string, char: string): number {
|
||||
let count = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
if (str[i] === char) count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
function calculateNestingDepth(pattern: string): number {
|
||||
let maxDepth = 0;
|
||||
let currentDepth = 0;
|
||||
let inCharClass = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < pattern.length; i++) {
|
||||
const char = pattern[i];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '[') {
|
||||
inCharClass = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === ']' && inCharClass) {
|
||||
inCharClass = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inCharClass) continue;
|
||||
|
||||
if (char === '(') {
|
||||
currentDepth++;
|
||||
if (currentDepth > maxDepth) {
|
||||
maxDepth = currentDepth;
|
||||
}
|
||||
}
|
||||
|
||||
if (char === ')') {
|
||||
currentDepth--;
|
||||
}
|
||||
}
|
||||
|
||||
return maxDepth;
|
||||
}
|
||||
|
||||
function countAlternations(pattern: string): number {
|
||||
let count = 0;
|
||||
let inCharClass = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < pattern.length; i++) {
|
||||
const char = pattern[i];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '[') {
|
||||
inCharClass = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === ']' && inCharClass) {
|
||||
inCharClass = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inCharClass) continue;
|
||||
|
||||
if (char === '|') {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
function detectNestedQuantifiers(pattern: string): string[] {
|
||||
const issues: string[] = [];
|
||||
const quantifierRegex = /\(([^)]*)\)[*+?]/;
|
||||
const overlappingRegex = /([a-zA-Z0-9])([^|]*?)\1/;
|
||||
|
||||
const groups = pattern.match(/\(([^)]+)\)/g) || [];
|
||||
|
||||
for (const group of groups) {
|
||||
const innerContent = group.slice(1, -1);
|
||||
|
||||
if (innerContent.includes('+') || innerContent.includes('*') || innerContent.includes('?')) {
|
||||
if (/[*+?]$/.test(group)) {
|
||||
issues.push(`Nested quantifier detected in group: ${group}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const quantifierGroups = pattern.match(/\(([^)]+[*+?][^)]*)\)[*+?]/g) || [];
|
||||
for (const g of quantifierGroups) {
|
||||
if (!issues.includes(`Nested quantifier detected in group: ${g}`)) {
|
||||
issues.push(`Nested quantifier detected in group: ${g}`);
|
||||
}
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
function detectOverlappingAlternations(pattern: string): string[] {
|
||||
const issues: string[] = [];
|
||||
|
||||
const groups = pattern.match(/\(([^)]+)\)/g) || [];
|
||||
|
||||
for (const group of groups) {
|
||||
const innerContent = group.slice(1, -1);
|
||||
const alternations = innerContent.split('|');
|
||||
|
||||
if (alternations.length < 2) continue;
|
||||
|
||||
for (let i = 0; i < alternations.length; i++) {
|
||||
for (let j = i + 1; j < alternations.length; j++) {
|
||||
const a = alternations[i].trim();
|
||||
const b = alternations[j].trim();
|
||||
|
||||
if (a && b && (a.startsWith(b) || b.startsWith(a))) {
|
||||
issues.push(
|
||||
`Overlapping alternation detected: "${a}" and "${b}" in group ${group}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
function detectCatastrophicBacktracking(pattern: string): string[] {
|
||||
const issues: string[] = [];
|
||||
|
||||
const starHeightPattern = /(\([^()]*\)[*+])+[*+]/;
|
||||
if (starHeightPattern.test(pattern)) {
|
||||
issues.push('High star-height pattern detected (potential exponential backtracking)');
|
||||
}
|
||||
|
||||
const ambiguousQuantifiers = /[*+?][^)]*[*+?]/;
|
||||
if (ambiguousQuantifiers.test(pattern)) {
|
||||
const matches = pattern.match(/(?<=\()[^)]*(?=\))/g) || [];
|
||||
for (const match of matches) {
|
||||
if (ambiguousQuantifiers.test(match)) {
|
||||
issues.push(`Ambiguous quantifiers in group content: ${match}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
function countQuantifiers(pattern: string): number {
|
||||
let count = 0;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < pattern.length; i++) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pattern[i] === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pattern[i] === '*' || pattern[i] === '+' || pattern[i] === '?') {
|
||||
count++;
|
||||
}
|
||||
|
||||
if (pattern[i] === '{') {
|
||||
const closingBrace = pattern.indexOf('}', i);
|
||||
if (closingBrace !== -1) {
|
||||
count++;
|
||||
i = closingBrace;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
export function analyzeRegexComplexity(
|
||||
pattern: string,
|
||||
options?: RegexValidationOptions,
|
||||
): RegexComplexityMetrics {
|
||||
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||
const issues: string[] = [];
|
||||
|
||||
const length = pattern.length;
|
||||
const nestingDepth = calculateNestingDepth(pattern);
|
||||
const alternationCount = countAlternations(pattern);
|
||||
const quantifierCount = countQuantifiers(pattern);
|
||||
const groupCount = countChar(pattern, '(');
|
||||
|
||||
if (length > opts.maxLength) {
|
||||
issues.push(`Pattern length (${length}) exceeds maximum (${opts.maxLength})`);
|
||||
}
|
||||
|
||||
if (nestingDepth > opts.maxNestingDepth) {
|
||||
issues.push(`Nesting depth (${nestingDepth}) exceeds maximum (${opts.maxNestingDepth})`);
|
||||
}
|
||||
|
||||
if (alternationCount > opts.maxAlternations) {
|
||||
issues.push(`Alternation count (${alternationCount}) exceeds maximum (${opts.maxAlternations})`);
|
||||
}
|
||||
|
||||
const nestedQuantifierIssues = detectNestedQuantifiers(pattern);
|
||||
issues.push(...nestedQuantifierIssues);
|
||||
|
||||
const overlappingIssues = detectOverlappingAlternations(pattern);
|
||||
issues.push(...overlappingIssues);
|
||||
|
||||
const backtrackingIssues = detectCatastrophicBacktracking(pattern);
|
||||
issues.push(...backtrackingIssues);
|
||||
|
||||
return {
|
||||
length,
|
||||
nestingDepth,
|
||||
alternationCount,
|
||||
quantifierCount,
|
||||
groupCount,
|
||||
isRedosProne: issues.length > 0,
|
||||
issues,
|
||||
};
|
||||
}
|
||||
|
||||
export function validateRegexPattern(
|
||||
pattern: string,
|
||||
options?: RegexValidationOptions,
|
||||
): RegexComplexityMetrics {
|
||||
if (!pattern || typeof pattern !== 'string') {
|
||||
throw new RegexValidationError(pattern ?? '', 'Pattern must be a non-empty string');
|
||||
}
|
||||
|
||||
try {
|
||||
new RegExp(pattern);
|
||||
} catch (err) {
|
||||
throw new RegexValidationError(
|
||||
pattern,
|
||||
`Invalid regex syntax: ${(err as Error).message}`,
|
||||
);
|
||||
}
|
||||
|
||||
const metrics = analyzeRegexComplexity(pattern, options);
|
||||
|
||||
if (metrics.isRedosProne) {
|
||||
throw new RegexValidationError(
|
||||
pattern,
|
||||
`ReDoS risk: ${metrics.issues.join('; ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
export function isSafeRegexPattern(
|
||||
pattern: string,
|
||||
options?: RegexValidationOptions,
|
||||
): boolean {
|
||||
try {
|
||||
validateRegexPattern(pattern, options);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user