- Create regex-validation utility with ReDoS detection (nested quantifiers, overlapping alternations, complexity limits) - Add @db.VarChar(500) constraint on pattern field in Prisma schema - Integrate validation in rule-engine at load time and evaluation time - Add 46 unit tests covering syntax, ReDoS patterns, complexity, edge cases Co-Authored-By: Paperclip <noreply@paperclip.ing>
319 lines
7.2 KiB
TypeScript
319 lines
7.2 KiB
TypeScript
export class RegexValidationError extends Error {
|
|
constructor(
|
|
public readonly pattern: string,
|
|
public readonly reason: string,
|
|
) {
|
|
super(`Regex validation failed for pattern "${pattern}": ${reason}`);
|
|
this.name = 'RegexValidationError';
|
|
}
|
|
}
|
|
|
|
export interface RegexValidationOptions {
|
|
maxLength?: number;
|
|
maxNestingDepth?: number;
|
|
maxAlternations?: number;
|
|
maxQuantifierLength?: number;
|
|
}
|
|
|
|
const DEFAULT_OPTIONS: Required<RegexValidationOptions> = {
|
|
maxLength: 500,
|
|
maxNestingDepth: 10,
|
|
maxAlternations: 20,
|
|
maxQuantifierLength: 100,
|
|
};
|
|
|
|
export interface RegexComplexityMetrics {
|
|
length: number;
|
|
nestingDepth: number;
|
|
alternationCount: number;
|
|
quantifierCount: number;
|
|
groupCount: number;
|
|
isRedosProne: boolean;
|
|
issues: string[];
|
|
}
|
|
|
|
function countChar(str: string, char: string): number {
|
|
let count = 0;
|
|
for (let i = 0; i < str.length; i++) {
|
|
if (str[i] === char) count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
function calculateNestingDepth(pattern: string): number {
|
|
let maxDepth = 0;
|
|
let currentDepth = 0;
|
|
let inCharClass = false;
|
|
let escaped = false;
|
|
|
|
for (let i = 0; i < pattern.length; i++) {
|
|
const char = pattern[i];
|
|
|
|
if (escaped) {
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if (char === '\\') {
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if (char === '[') {
|
|
inCharClass = true;
|
|
continue;
|
|
}
|
|
|
|
if (char === ']' && inCharClass) {
|
|
inCharClass = false;
|
|
continue;
|
|
}
|
|
|
|
if (inCharClass) continue;
|
|
|
|
if (char === '(') {
|
|
currentDepth++;
|
|
if (currentDepth > maxDepth) {
|
|
maxDepth = currentDepth;
|
|
}
|
|
}
|
|
|
|
if (char === ')') {
|
|
currentDepth--;
|
|
}
|
|
}
|
|
|
|
return maxDepth;
|
|
}
|
|
|
|
function countAlternations(pattern: string): number {
|
|
let count = 0;
|
|
let inCharClass = false;
|
|
let escaped = false;
|
|
|
|
for (let i = 0; i < pattern.length; i++) {
|
|
const char = pattern[i];
|
|
|
|
if (escaped) {
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if (char === '\\') {
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if (char === '[') {
|
|
inCharClass = true;
|
|
continue;
|
|
}
|
|
|
|
if (char === ']' && inCharClass) {
|
|
inCharClass = false;
|
|
continue;
|
|
}
|
|
|
|
if (inCharClass) continue;
|
|
|
|
if (char === '|') {
|
|
count++;
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
function detectNestedQuantifiers(pattern: string): string[] {
|
|
const issues: string[] = [];
|
|
const quantifierRegex = /\(([^)]*)\)[*+?]/;
|
|
const overlappingRegex = /([a-zA-Z0-9])([^|]*?)\1/;
|
|
|
|
const groups = pattern.match(/\(([^)]+)\)/g) || [];
|
|
|
|
for (const group of groups) {
|
|
const innerContent = group.slice(1, -1);
|
|
|
|
if (innerContent.includes('+') || innerContent.includes('*') || innerContent.includes('?')) {
|
|
if (/[*+?]$/.test(group)) {
|
|
issues.push(`Nested quantifier detected in group: ${group}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
const quantifierGroups = pattern.match(/\(([^)]+[*+?][^)]*)\)[*+?]/g) || [];
|
|
for (const g of quantifierGroups) {
|
|
if (!issues.includes(`Nested quantifier detected in group: ${g}`)) {
|
|
issues.push(`Nested quantifier detected in group: ${g}`);
|
|
}
|
|
}
|
|
|
|
return issues;
|
|
}
|
|
|
|
function detectOverlappingAlternations(pattern: string): string[] {
|
|
const issues: string[] = [];
|
|
|
|
const groups = pattern.match(/\(([^)]+)\)/g) || [];
|
|
|
|
for (const group of groups) {
|
|
const innerContent = group.slice(1, -1);
|
|
const alternations = innerContent.split('|');
|
|
|
|
if (alternations.length < 2) continue;
|
|
|
|
for (let i = 0; i < alternations.length; i++) {
|
|
for (let j = i + 1; j < alternations.length; j++) {
|
|
const a = alternations[i].trim();
|
|
const b = alternations[j].trim();
|
|
|
|
if (a && b && (a.startsWith(b) || b.startsWith(a))) {
|
|
issues.push(
|
|
`Overlapping alternation detected: "${a}" and "${b}" in group ${group}`
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return issues;
|
|
}
|
|
|
|
function detectCatastrophicBacktracking(pattern: string): string[] {
|
|
const issues: string[] = [];
|
|
|
|
const starHeightPattern = /(\([^()]*\)[*+])+[*+]/;
|
|
if (starHeightPattern.test(pattern)) {
|
|
issues.push('High star-height pattern detected (potential exponential backtracking)');
|
|
}
|
|
|
|
const ambiguousQuantifiers = /[*+?][^)]*[*+?]/;
|
|
if (ambiguousQuantifiers.test(pattern)) {
|
|
const matches = pattern.match(/(?<=\()[^)]*(?=\))/g) || [];
|
|
for (const match of matches) {
|
|
if (ambiguousQuantifiers.test(match)) {
|
|
issues.push(`Ambiguous quantifiers in group content: ${match}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
return issues;
|
|
}
|
|
|
|
function countQuantifiers(pattern: string): number {
|
|
let count = 0;
|
|
let escaped = false;
|
|
|
|
for (let i = 0; i < pattern.length; i++) {
|
|
if (escaped) {
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if (pattern[i] === '\\') {
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if (pattern[i] === '*' || pattern[i] === '+' || pattern[i] === '?') {
|
|
count++;
|
|
}
|
|
|
|
if (pattern[i] === '{') {
|
|
const closingBrace = pattern.indexOf('}', i);
|
|
if (closingBrace !== -1) {
|
|
count++;
|
|
i = closingBrace;
|
|
}
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
export function analyzeRegexComplexity(
|
|
pattern: string,
|
|
options?: RegexValidationOptions,
|
|
): RegexComplexityMetrics {
|
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
const issues: string[] = [];
|
|
|
|
const length = pattern.length;
|
|
const nestingDepth = calculateNestingDepth(pattern);
|
|
const alternationCount = countAlternations(pattern);
|
|
const quantifierCount = countQuantifiers(pattern);
|
|
const groupCount = countChar(pattern, '(');
|
|
|
|
if (length > opts.maxLength) {
|
|
issues.push(`Pattern length (${length}) exceeds maximum (${opts.maxLength})`);
|
|
}
|
|
|
|
if (nestingDepth > opts.maxNestingDepth) {
|
|
issues.push(`Nesting depth (${nestingDepth}) exceeds maximum (${opts.maxNestingDepth})`);
|
|
}
|
|
|
|
if (alternationCount > opts.maxAlternations) {
|
|
issues.push(`Alternation count (${alternationCount}) exceeds maximum (${opts.maxAlternations})`);
|
|
}
|
|
|
|
const nestedQuantifierIssues = detectNestedQuantifiers(pattern);
|
|
issues.push(...nestedQuantifierIssues);
|
|
|
|
const overlappingIssues = detectOverlappingAlternations(pattern);
|
|
issues.push(...overlappingIssues);
|
|
|
|
const backtrackingIssues = detectCatastrophicBacktracking(pattern);
|
|
issues.push(...backtrackingIssues);
|
|
|
|
return {
|
|
length,
|
|
nestingDepth,
|
|
alternationCount,
|
|
quantifierCount,
|
|
groupCount,
|
|
isRedosProne: issues.length > 0,
|
|
issues,
|
|
};
|
|
}
|
|
|
|
export function validateRegexPattern(
|
|
pattern: string,
|
|
options?: RegexValidationOptions,
|
|
): RegexComplexityMetrics {
|
|
if (!pattern || typeof pattern !== 'string') {
|
|
throw new RegexValidationError(pattern ?? '', 'Pattern must be a non-empty string');
|
|
}
|
|
|
|
try {
|
|
new RegExp(pattern);
|
|
} catch (err) {
|
|
throw new RegexValidationError(
|
|
pattern,
|
|
`Invalid regex syntax: ${(err as Error).message}`,
|
|
);
|
|
}
|
|
|
|
const metrics = analyzeRegexComplexity(pattern, options);
|
|
|
|
if (metrics.isRedosProne) {
|
|
throw new RegexValidationError(
|
|
pattern,
|
|
`ReDoS risk: ${metrics.issues.join('; ')}`,
|
|
);
|
|
}
|
|
|
|
return metrics;
|
|
}
|
|
|
|
export function isSafeRegexPattern(
|
|
pattern: string,
|
|
options?: RegexValidationOptions,
|
|
): boolean {
|
|
try {
|
|
validateRegexPattern(pattern, options);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|