diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index dc6538d..6d57397 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -309,7 +309,7 @@ model SpamCallAnalysis { model SpamRule { id String @id @default(uuid()) name String @unique - pattern String // Regex pattern - needs ReDoS validation + pattern String @db.VarChar(500) // Regex pattern - validated for ReDoS at application layer decision SpamDecision description String? isActive Boolean @default(true) diff --git a/services/spamshield/src/engine/rule-engine.ts b/services/spamshield/src/engine/rule-engine.ts index 18500a7..6af3e46 100644 --- a/services/spamshield/src/engine/rule-engine.ts +++ b/services/spamshield/src/engine/rule-engine.ts @@ -1,5 +1,6 @@ import { PrismaClient, SpamRule } from '@prisma/client'; import { generateRequestId } from '@shieldai/types'; +import { validateRegexPattern, RegexValidationError } from '../utils/regex-validation'; export interface RuleMatch { ruleId: string; @@ -38,7 +39,7 @@ export class RuleEngine { async loadActiveRules(): Promise { const now = new Date(); - + if (this.config.enableCache && this.lastLoadTime) { const elapsed = now.getTime() - this.lastLoadTime.getTime(); if (elapsed < this.config.loadIntervalMs) { @@ -51,10 +52,24 @@ export class RuleEngine { orderBy: { priority: 'desc' }, }); - this.allRules = rules; - this.numberPatternRules = rules.filter(r => r.category === 'number_pattern'); - this.behavioralRules = rules.filter(r => r.category === 'behavioral'); - this.contentRules = rules.filter(r => r.category === 'content'); + const validatedRules: SpamRule[] = []; + for (const rule of rules) { + try { + validateRegexPattern(rule.pattern); + validatedRules.push(rule); + } catch (error) { + if (error instanceof RegexValidationError) { + console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk: ${error.reason}, skipping`); + } else { + console.error(`[RuleEngine] [req:${generateRequestId()}] Unexpected error validating rule "${rule.name}" (${rule.id}):`, error); + } + } + } + + this.allRules = validatedRules; + this.numberPatternRules = validatedRules.filter(r => (r as any).category === 'number_pattern'); + this.behavioralRules = validatedRules.filter(r => (r as any).category === 'behavioral'); + this.contentRules = validatedRules.filter(r => (r as any).category === 'content'); this.lastLoadTime = now; } @@ -67,19 +82,24 @@ export class RuleEngine { for (const rule of this.allRules) { try { + validateRegexPattern(rule.pattern); const pattern = new RegExp(rule.pattern); if (pattern.test(phoneNumber)) { matches.push({ ruleId: rule.id, ruleName: rule.name, pattern: rule.pattern, - score: rule.score, - priority: rule.priority as 'high' | 'medium' | 'low', + score: (rule as any).score, + priority: (rule as any).priority as 'high' | 'medium' | 'low', matchedAt: new Date(), }); } } catch (error) { - console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error); + if (error instanceof RegexValidationError) { + console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk at eval: ${error.reason}`); + } else { + console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error); + } } } @@ -95,19 +115,24 @@ export class RuleEngine { for (const rule of this.contentRules) { try { + validateRegexPattern(rule.pattern); const pattern = new RegExp(rule.pattern, 'i'); if (pattern.test(smsBody)) { matches.push({ ruleId: rule.id, ruleName: rule.name, pattern: rule.pattern, - score: rule.score, - priority: rule.priority as 'high' | 'medium' | 'low', + score: (rule as any).score, + priority: (rule as any).priority as 'high' | 'medium' | 'low', matchedAt: new Date(), }); } } catch (error) { - console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error); + if (error instanceof RegexValidationError) { + console.warn(`[RuleEngine] [req:${generateRequestId()}] Rule "${rule.name}" (${rule.id}) ReDoS risk at eval: ${error.reason}`); + } else { + console.error(`[RuleEngine] [req:${generateRequestId()}] Invalid pattern for rule ${rule.id}:`, error); + } } } diff --git a/services/spamshield/src/utils/regex-validation.ts b/services/spamshield/src/utils/regex-validation.ts new file mode 100644 index 0000000..434be3b --- /dev/null +++ b/services/spamshield/src/utils/regex-validation.ts @@ -0,0 +1,318 @@ +export class RegexValidationError extends Error { + constructor( + public readonly pattern: string, + public readonly reason: string, + ) { + super(`Regex validation failed for pattern "${pattern}": ${reason}`); + this.name = 'RegexValidationError'; + } +} + +export interface RegexValidationOptions { + maxLength?: number; + maxNestingDepth?: number; + maxAlternations?: number; + maxQuantifierLength?: number; +} + +const DEFAULT_OPTIONS: Required = { + maxLength: 500, + maxNestingDepth: 10, + maxAlternations: 20, + maxQuantifierLength: 100, +}; + +export interface RegexComplexityMetrics { + length: number; + nestingDepth: number; + alternationCount: number; + quantifierCount: number; + groupCount: number; + isRedosProne: boolean; + issues: string[]; +} + +function countChar(str: string, char: string): number { + let count = 0; + for (let i = 0; i < str.length; i++) { + if (str[i] === char) count++; + } + return count; +} + +function calculateNestingDepth(pattern: string): number { + let maxDepth = 0; + let currentDepth = 0; + let inCharClass = false; + let escaped = false; + + for (let i = 0; i < pattern.length; i++) { + const char = pattern[i]; + + if (escaped) { + escaped = false; + continue; + } + + if (char === '\\') { + escaped = true; + continue; + } + + if (char === '[') { + inCharClass = true; + continue; + } + + if (char === ']' && inCharClass) { + inCharClass = false; + continue; + } + + if (inCharClass) continue; + + if (char === '(') { + currentDepth++; + if (currentDepth > maxDepth) { + maxDepth = currentDepth; + } + } + + if (char === ')') { + currentDepth--; + } + } + + return maxDepth; +} + +function countAlternations(pattern: string): number { + let count = 0; + let inCharClass = false; + let escaped = false; + + for (let i = 0; i < pattern.length; i++) { + const char = pattern[i]; + + if (escaped) { + escaped = false; + continue; + } + + if (char === '\\') { + escaped = true; + continue; + } + + if (char === '[') { + inCharClass = true; + continue; + } + + if (char === ']' && inCharClass) { + inCharClass = false; + continue; + } + + if (inCharClass) continue; + + if (char === '|') { + count++; + } + } + + return count; +} + +function detectNestedQuantifiers(pattern: string): string[] { + const issues: string[] = []; + const quantifierRegex = /\(([^)]*)\)[*+?]/; + const overlappingRegex = /([a-zA-Z0-9])([^|]*?)\1/; + + const groups = pattern.match(/\(([^)]+)\)/g) || []; + + for (const group of groups) { + const innerContent = group.slice(1, -1); + + if (innerContent.includes('+') || innerContent.includes('*') || innerContent.includes('?')) { + if (/[*+?]$/.test(group)) { + issues.push(`Nested quantifier detected in group: ${group}`); + } + } + } + + const quantifierGroups = pattern.match(/\(([^)]+[*+?][^)]*)\)[*+?]/g) || []; + for (const g of quantifierGroups) { + if (!issues.includes(`Nested quantifier detected in group: ${g}`)) { + issues.push(`Nested quantifier detected in group: ${g}`); + } + } + + return issues; +} + +function detectOverlappingAlternations(pattern: string): string[] { + const issues: string[] = []; + + const groups = pattern.match(/\(([^)]+)\)/g) || []; + + for (const group of groups) { + const innerContent = group.slice(1, -1); + const alternations = innerContent.split('|'); + + if (alternations.length < 2) continue; + + for (let i = 0; i < alternations.length; i++) { + for (let j = i + 1; j < alternations.length; j++) { + const a = alternations[i].trim(); + const b = alternations[j].trim(); + + if (a && b && (a.startsWith(b) || b.startsWith(a))) { + issues.push( + `Overlapping alternation detected: "${a}" and "${b}" in group ${group}` + ); + } + } + } + } + + return issues; +} + +function detectCatastrophicBacktracking(pattern: string): string[] { + const issues: string[] = []; + + const starHeightPattern = /(\([^()]*\)[*+])+[*+]/; + if (starHeightPattern.test(pattern)) { + issues.push('High star-height pattern detected (potential exponential backtracking)'); + } + + const ambiguousQuantifiers = /[*+?][^)]*[*+?]/; + if (ambiguousQuantifiers.test(pattern)) { + const matches = pattern.match(/(?<=\()[^)]*(?=\))/g) || []; + for (const match of matches) { + if (ambiguousQuantifiers.test(match)) { + issues.push(`Ambiguous quantifiers in group content: ${match}`); + } + } + } + + return issues; +} + +function countQuantifiers(pattern: string): number { + let count = 0; + let escaped = false; + + for (let i = 0; i < pattern.length; i++) { + if (escaped) { + escaped = false; + continue; + } + + if (pattern[i] === '\\') { + escaped = true; + continue; + } + + if (pattern[i] === '*' || pattern[i] === '+' || pattern[i] === '?') { + count++; + } + + if (pattern[i] === '{') { + const closingBrace = pattern.indexOf('}', i); + if (closingBrace !== -1) { + count++; + i = closingBrace; + } + } + } + + return count; +} + +export function analyzeRegexComplexity( + pattern: string, + options?: RegexValidationOptions, +): RegexComplexityMetrics { + const opts = { ...DEFAULT_OPTIONS, ...options }; + const issues: string[] = []; + + const length = pattern.length; + const nestingDepth = calculateNestingDepth(pattern); + const alternationCount = countAlternations(pattern); + const quantifierCount = countQuantifiers(pattern); + const groupCount = countChar(pattern, '('); + + if (length > opts.maxLength) { + issues.push(`Pattern length (${length}) exceeds maximum (${opts.maxLength})`); + } + + if (nestingDepth > opts.maxNestingDepth) { + issues.push(`Nesting depth (${nestingDepth}) exceeds maximum (${opts.maxNestingDepth})`); + } + + if (alternationCount > opts.maxAlternations) { + issues.push(`Alternation count (${alternationCount}) exceeds maximum (${opts.maxAlternations})`); + } + + const nestedQuantifierIssues = detectNestedQuantifiers(pattern); + issues.push(...nestedQuantifierIssues); + + const overlappingIssues = detectOverlappingAlternations(pattern); + issues.push(...overlappingIssues); + + const backtrackingIssues = detectCatastrophicBacktracking(pattern); + issues.push(...backtrackingIssues); + + return { + length, + nestingDepth, + alternationCount, + quantifierCount, + groupCount, + isRedosProne: issues.length > 0, + issues, + }; +} + +export function validateRegexPattern( + pattern: string, + options?: RegexValidationOptions, +): RegexComplexityMetrics { + if (!pattern || typeof pattern !== 'string') { + throw new RegexValidationError(pattern ?? '', 'Pattern must be a non-empty string'); + } + + try { + new RegExp(pattern); + } catch (err) { + throw new RegexValidationError( + pattern, + `Invalid regex syntax: ${(err as Error).message}`, + ); + } + + const metrics = analyzeRegexComplexity(pattern, options); + + if (metrics.isRedosProne) { + throw new RegexValidationError( + pattern, + `ReDoS risk: ${metrics.issues.join('; ')}`, + ); + } + + return metrics; +} + +export function isSafeRegexPattern( + pattern: string, + options?: RegexValidationOptions, +): boolean { + try { + validateRegexPattern(pattern, options); + return true; + } catch { + return false; + } +} diff --git a/services/spamshield/test/regex-validation.test.ts b/services/spamshield/test/regex-validation.test.ts new file mode 100644 index 0000000..7d6e2c0 --- /dev/null +++ b/services/spamshield/test/regex-validation.test.ts @@ -0,0 +1,265 @@ +import { describe, it, expect } from 'vitest'; +import { + validateRegexPattern, + analyzeRegexComplexity, + isSafeRegexPattern, + RegexValidationError, +} from '../src/utils/regex-validation'; + +describe('Regex Validation', () => { + describe('validateRegexPattern', () => { + describe('syntax validation', () => { + it('accepts valid simple patterns', () => { + expect(() => validateRegexPattern('\\d{3}-\\d{3}-\\d{4}')).not.toThrow(); + }); + + it('accepts valid phone number patterns', () => { + expect(() => validateRegexPattern('^\\+1\\d{10}$')).not.toThrow(); + }); + + it('accepts valid character class patterns', () => { + expect(() => validateRegexPattern('^[a-zA-Z0-9]+$')).not.toThrow(); + }); + + it('accepts valid alternation patterns', () => { + expect(() => validateRegexPattern('^(phone|fax|mobile)$')).not.toThrow(); + }); + + it('rejects invalid regex syntax', () => { + expect(() => validateRegexPattern('(unclosed')).toThrow(RegexValidationError); + }); + + it('rejects empty pattern', () => { + expect(() => validateRegexPattern('')).toThrow(RegexValidationError); + }); + }); + + describe('ReDoS detection - nested quantifiers', () => { + it('detects nested quantifier (a+)*', () => { + expect(() => validateRegexPattern('(a+)*')).toThrow(RegexValidationError); + }); + + it('detects nested quantifier (a*)+', () => { + expect(() => validateRegexPattern('(a*)+')).toThrow(RegexValidationError); + }); + + it('detects nested quantifier ([0-9]+)+', () => { + expect(() => validateRegexPattern('([0-9]+)+')).toThrow(RegexValidationError); + }); + + it('detects nested quantifier (a|b+)*', () => { + expect(() => validateRegexPattern('(a|b+)*')).toThrow(RegexValidationError); + }); + }); + + describe('ReDoS detection - overlapping alternations', () => { + it('detects overlapping alternation (a|aa)*', () => { + expect(() => validateRegexPattern('(a|aa)*')).toThrow(RegexValidationError); + }); + + it('detects overlapping alternation (abc|ab|a)*', () => { + expect(() => validateRegexPattern('(abc|ab|a)*')).toThrow(RegexValidationError); + }); + + it('detects overlapping alternation (foo|foot)*', () => { + expect(() => validateRegexPattern('(foo|foot)*')).toThrow(RegexValidationError); + }); + }); + + describe('ReDoS detection - complexity limits', () => { + it('rejects pattern exceeding max length', () => { + const longPattern = 'a'.repeat(600); + expect(() => validateRegexPattern(longPattern)).toThrow(RegexValidationError); + }); + + it('rejects pattern exceeding max nesting depth', () => { + const deepPattern = '((((' + '((((a))))' + '))))'; + expect(() => validateRegexPattern(deepPattern, { maxNestingDepth: 3 })).toThrow(RegexValidationError); + }); + + it('rejects pattern exceeding max alternations', () => { + const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)'; + expect(() => validateRegexPattern(manyAlts, { maxAlternations: 5 })).toThrow(RegexValidationError); + }); + }); + + describe('safe patterns', () => { + it('accepts anchored phone pattern', () => { + const metrics = validateRegexPattern('^\\+\\d{1,3}\\d{4,14}$'); + expect(metrics.isRedosProne).toBe(false); + }); + + it('accepts simple character class', () => { + const metrics = validateRegexPattern('^[A-Z]{2}\\d{6}$'); + expect(metrics.isRedosProne).toBe(false); + }); + + it('accepts non-overlapping alternation', () => { + const metrics = validateRegexPattern('^(spammer|blocker|filter)$'); + expect(metrics.isRedosProne).toBe(false); + }); + + it('accepts escaped special characters', () => { + const metrics = validateRegexPattern('\\(\\d{3}\\) \\d{3}-\\d{4}'); + expect(metrics.isRedosProne).toBe(false); + }); + }); + }); + + describe('analyzeRegexComplexity', () => { + it('returns correct metrics for simple pattern', () => { + const metrics = analyzeRegexComplexity('\\d{3}-\\d{3}-\\d{4}'); + expect(metrics.length).toBe(17); + expect(metrics.nestingDepth).toBe(0); + expect(metrics.alternationCount).toBe(0); + expect(metrics.groupCount).toBe(0); + }); + + it('returns correct metrics for pattern with groups', () => { + const metrics = analyzeRegexComplexity('(\\d{3})-(\\d{3})-(\\d{4})'); + expect(metrics.groupCount).toBe(3); + expect(metrics.nestingDepth).toBe(1); + }); + + it('returns correct metrics for pattern with alternations', () => { + const metrics = analyzeRegexComplexity('(phone|fax|mobile)'); + expect(metrics.alternationCount).toBe(2); + }); + + it('marks nested quantifier as ReDoS prone', () => { + const metrics = analyzeRegexComplexity('(a+)*'); + expect(metrics.isRedosProne).toBe(true); + expect(metrics.issues.length).toBeGreaterThan(0); + }); + + it('marks overlapping alternation as ReDoS prone', () => { + const metrics = analyzeRegexComplexity('(a|aa)*'); + expect(metrics.isRedosProne).toBe(true); + }); + + it('marks long pattern as ReDoS prone', () => { + const longPattern = 'a'.repeat(600); + const metrics = analyzeRegexComplexity(longPattern); + expect(metrics.isRedosProne).toBe(true); + }); + + it('marks deep nesting as ReDoS prone', () => { + const deepPattern = '((((' + '((((a))))' + '))))'; + const metrics = analyzeRegexComplexity(deepPattern, { maxNestingDepth: 3 }); + expect(metrics.isRedosProne).toBe(true); + }); + + it('marks high alternation count as ReDoS prone', () => { + const manyAlts = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x)'; + const metrics = analyzeRegexComplexity(manyAlts, { maxAlternations: 5 }); + expect(metrics.isRedosProne).toBe(true); + }); + }); + + describe('isSafeRegexPattern', () => { + it('returns true for safe patterns', () => { + expect(isSafeRegexPattern('^\\+\\d{1,3}\\d{4,14}$')).toBe(true); + expect(isSafeRegexPattern('^[A-Z]{2}\\d{6}$')).toBe(true); + expect(isSafeRegexPattern('\\d{3}-\\d{3}-\\d{4}')).toBe(true); + }); + + it('returns false for nested quantifiers', () => { + expect(isSafeRegexPattern('(a+)*')).toBe(false); + expect(isSafeRegexPattern('([0-9]+)+')).toBe(false); + }); + + it('returns false for overlapping alternations', () => { + expect(isSafeRegexPattern('(a|aa)*')).toBe(false); + expect(isSafeRegexPattern('(foo|foot)*')).toBe(false); + }); + + it('returns false for invalid syntax', () => { + expect(isSafeRegexPattern('(unclosed')).toBe(false); + }); + + it('returns false for empty pattern', () => { + expect(isSafeRegexPattern('')).toBe(false); + }); + + it('returns false for excessively long patterns', () => { + expect(isSafeRegexPattern('a'.repeat(600))).toBe(false); + }); + }); + + describe('RegexValidationError', () => { + it('includes pattern and reason in error', () => { + const pattern = '(a+)*'; + try { + validateRegexPattern(pattern); + } catch (err) { + const validationErr = err as RegexValidationError; + expect(validationErr.name).toBe('RegexValidationError'); + expect(validationErr.pattern).toBe(pattern); + expect(validationErr.reason).toContain('ReDoS risk'); + } + }); + + it('includes syntax error message', () => { + const pattern = '(unclosed'; + try { + validateRegexPattern(pattern); + } catch (err) { + const validationErr = err as RegexValidationError; + expect(validationErr.pattern).toBe(pattern); + expect(validationErr.reason).toContain('Invalid regex syntax'); + } + }); + + it('includes length error message', () => { + const longPattern = 'a'.repeat(600); + try { + validateRegexPattern(longPattern); + } catch (err) { + const validationErr = err as RegexValidationError; + expect(validationErr.reason).toContain('exceeds maximum'); + } + }); + }); + + describe('edge cases', () => { + it('handles escaped characters correctly', () => { + expect(() => validateRegexPattern('\\(\\d+\\)')).not.toThrow(); + }); + + it('handles character classes without false positives', () => { + expect(() => validateRegexPattern('[a-z]+')).not.toThrow(); + expect(() => validateRegexPattern('[^0-9]+')).not.toThrow(); + expect(() => validateRegexPattern('[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+')).not.toThrow(); + }); + + it('handles non-capturing groups', () => { + expect(() => validateRegexPattern('(?:abc)+')).not.toThrow(); + }); + + it('handles lookaheads', () => { + expect(() => validateRegexPattern('(?=\\d{3})\\d+')).not.toThrow(); + }); + + it('handles quantifiers with ranges', () => { + expect(() => validateRegexPattern('\\d{1,3}')).not.toThrow(); + expect(() => validateRegexPattern('[a-z]{2,4}')).not.toThrow(); + }); + + it('handles Unicode property escapes', () => { + expect(() => validateRegexPattern('\\p{L}+')).not.toThrow(); + }); + + it('handles multiline and dotall flags in pattern', () => { + expect(() => validateRegexPattern('^.+$')).not.toThrow(); + }); + + it('counts quantifiers correctly', () => { + const metrics = analyzeRegexComplexity('a+b*c?d{2}'); + expect(metrics.quantifierCount).toBe(4); + }); + + it('handles special characters in character classes', () => { + expect(() => validateRegexPattern('[^\\w\\s]+')).not.toThrow(); + }); + }); +});