Regex Performance and Safety โ Avoid Catastrophic Backtracking
Write safe, efficient regex patterns while avoiding performance pitfalls, ReDoS vulnerabilities, and catastrophic backtracking. Comprehensive guide to regex optimization and security best practices.
Regex Performance and Safety โ Avoid Catastrophic Backtracking
Regular expressions are powerful tools, but poorly written patterns can create serious performance and security vulnerabilities. This comprehensive guide covers regex optimization techniques, catastrophic backtracking prevention, ReDoS (Regular Expression Denial of Service) protection, and best practices for writing safe, efficient regex patterns.
Table of Contents
- Understanding Regex Performance
- Catastrophic Backtracking Explained
- ReDoS Vulnerabilities and Prevention
- Performance Optimization Techniques
- Safe Pattern Design Principles
- Testing and Performance Analysis
- Real-world Vulnerability Examples
- Monitoring and Detection
- Best Practices and Guidelines
Understanding Regex Performance
How Regex Engines Work
Understanding regex engine internals is crucial for writing efficient patterns:
Backtracking Engines vs. Automaton Engines
- Backtracking Engines: JavaScript, Python, Java, .NET, PHP - can have exponential worst-case performance
- Automaton Engines: Go, Rust, RE2 - guaranteed linear time but less feature-complete
- Hybrid Engines: Some engines combine both approaches
Performance Characteristics
class RegexPerformanceAnalyzer {
constructor() {
this.performanceMetrics = {
linear: 'O(n)', // Best case
polynomial: 'O(n^k)', // Moderate backtracking
exponential: 'O(2^n)' // Catastrophic backtracking
};
}
analyzePattern(pattern, testString) {
const startTime = process.hrtime.bigint();
let result;
let timedOut = false;
// Set timeout to prevent hanging
const timeout = setTimeout(() => {
timedOut = true;
}, 5000); // 5 second timeout
try {
const regex = new RegExp(pattern);
result = regex.test(testString);
clearTimeout(timeout);
} catch (error) {
clearTimeout(timeout);
return {
error: error.message,
performance: 'ERROR'
};
}
if (timedOut) {
return {
result: false,
performance: 'TIMEOUT',
timeComplexity: this.performanceMetrics.exponential,
warning: 'Potential ReDoS vulnerability detected'
};
}
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000; // Convert to milliseconds
return {
result,
duration,
performance: this.classifyPerformance(duration, testString.length),
timeComplexity: this.estimateComplexity(duration, testString.length)
};
}
classifyPerformance(duration, inputLength) {
if (duration > 1000) return 'VERY_SLOW';
if (duration > 100) return 'SLOW';
if (duration > 10) return 'MODERATE';
return 'FAST';
}
estimateComplexity(duration, inputLength) {
// Rough estimation based on duration vs input length
const ratio = duration / inputLength;
if (ratio > 10) return this.performanceMetrics.exponential;
if (ratio > 1) return this.performanceMetrics.polynomial;
return this.performanceMetrics.linear;
}
// Test pattern with progressively larger inputs
testScalability(pattern, baseInput, maxIterations = 10) {
const results = [];
for (let i = 1; i <= maxIterations; i++) {
const testInput = baseInput.repeat(i);
const analysis = this.analyzePattern(pattern, testInput);
results.push({
iteration: i,
inputLength: testInput.length,
duration: analysis.duration,
performance: analysis.performance
});
// Stop if performance degrades significantly
if (analysis.performance === 'VERY_SLOW' || analysis.performance === 'TIMEOUT') {
break;
}
}
return {
results,
scalabilityIssue: this.detectScalabilityIssue(results)
};
}
detectScalabilityIssue(results) {
if (results.length < 3) return false;
// Check if duration increases exponentially
const durations = results.map(r => r.duration || 0);
for (let i = 2; i < durations.length; i++) {
const growthRatio = durations[i] / durations[i-1];
if (growthRatio > 2 && durations[i] > 50) {
return {
detected: true,
description: 'Exponential performance degradation detected',
severity: 'HIGH'
};
}
}
return { detected: false };
}
}
Catastrophic Backtracking Explained
What is Catastrophic Backtracking?
Catastrophic backtracking occurs when regex engines explore an exponential number of possible matches, causing performance to degrade dramatically.
Common Patterns That Cause Problems
class BacktrackingAnalyzer {
constructor() {
this.vulnerablePatterns = {
// Nested quantifiers - extremely dangerous
nestedQuantifiers: {
examples: [
'(a+)+', // a+ nested in ()+
'(a*)*', // a* nested in ()*
'(a+)*b', // Common ReDoS pattern
'(.*)+', // Any character repeated
'([a-zA-Z0-9]+)*' // Character class with nested quantifiers
],
description: 'Nested quantifiers create exponential backtracking',
severity: 'CRITICAL'
},
// Alternation with overlap
alternationOverlap: {
examples: [
'(a|a)*', // Overlapping alternatives
'(.*|.+)', // Redundant alternatives
'(a|ab)*', // Prefix overlap
'(\\w+|\\w*)+', // Similar alternatives with quantifiers
],
description: 'Overlapping alternatives cause excessive backtracking',
severity: 'HIGH'
},
// Grouping with quantifiers
groupingProblems: {
examples: [
'(a|b|c)*d', // May backtrack extensively if 'd' not found
'(.*)=(.*)', // Greedy matching can cause issues
'(\\w+\\s*)+', // Repeated groups with optional spaces
],
description: 'Certain grouping patterns can cause performance issues',
severity: 'MEDIUM'
},
// Lookaheads and lookbehinds
lookaroundIssues: {
examples: [
'(?=.*a)(?=.*b).*', // Multiple positive lookaheads
'.*(?<=a.*)b', // Complex lookbehind
'(?=.+)(?=.*\\d).+', // Redundant lookaheads
],
description: 'Complex lookarounds can be computationally expensive',
severity: 'MEDIUM'
}
};
}
analyzePattern(pattern) {
const vulnerabilities = [];
// Check for nested quantifiers
const nestedQuantifierPattern = /\([^)]*[+*?][^)]*\)[+*?]/g;
if (nestedQuantifierPattern.test(pattern)) {
vulnerabilities.push({
type: 'NESTED_QUANTIFIERS',
severity: 'CRITICAL',
description: 'Nested quantifiers detected - high ReDoS risk',
recommendation: 'Use atomic groups or possessive quantifiers if supported'
});
}
// Check for dangerous combinations
const dangerousCombinations = [
{ pattern: /\(\.[+*]\)[+*]/, type: 'EXPONENTIAL_DOT_STAR' },
{ pattern: /\([^)]*\|[^)]*\)[+*]/, type: 'ALTERNATION_WITH_QUANTIFIER' },
{ pattern: /\+.*\+|\*.*\*/, type: 'MULTIPLE_QUANTIFIERS' }
];
dangerousCombinations.forEach(({ pattern: testPattern, type }) => {
if (testPattern.test(pattern)) {
vulnerabilities.push({
type,
severity: 'HIGH',
description: `Dangerous pattern detected: ${type.replace(/_/g, ' ').toLowerCase()}`,
recommendation: this.getRecommendation(type)
});
}
});
// Check for overlapping alternatives
const alternationPattern = /\([^)]*\|[^)]*\)/g;
const alternations = pattern.match(alternationPattern);
if (alternations) {
alternations.forEach(alt => {
if (this.hasOverlappingAlternatives(alt)) {
vulnerabilities.push({
type: 'OVERLAPPING_ALTERNATIVES',
severity: 'MEDIUM',
description: 'Overlapping alternatives detected',
location: alt,
recommendation: 'Ensure alternatives are mutually exclusive'
});
}
});
}
return {
pattern,
vulnerabilities,
riskLevel: this.calculateRiskLevel(vulnerabilities),
recommendations: this.generateRecommendations(vulnerabilities)
};
}
hasOverlappingAlternatives(alternationGroup) {
// Remove parentheses and split by pipe
const alternatives = alternationGroup.slice(1, -1).split('|');
// Check for obvious overlaps
for (let i = 0; i < alternatives.length; i++) {
for (let j = i + 1; j < alternatives.length; j++) {
const alt1 = alternatives[i].trim();
const alt2 = alternatives[j].trim();
// Check if one alternative is a prefix of another
if (alt1.startsWith(alt2) || alt2.startsWith(alt1)) {
return true;
}
// Check for identical alternatives
if (alt1 === alt2) {
return true;
}
}
}
return false;
}
getRecommendation(type) {
const recommendations = {
'EXPONENTIAL_DOT_STAR': 'Replace (.*) with more specific patterns or use possessive quantifiers',
'ALTERNATION_WITH_QUANTIFIER': 'Avoid quantifiers on alternation groups',
'MULTIPLE_QUANTIFIERS': 'Use atomic groups or rework pattern to avoid multiple quantifiers'
};
return recommendations[type] || 'Review pattern for potential optimization';
}
calculateRiskLevel(vulnerabilities) {
const criticalCount = vulnerabilities.filter(v => v.severity === 'CRITICAL').length;
const highCount = vulnerabilities.filter(v => v.severity === 'HIGH').length;
const mediumCount = vulnerabilities.filter(v => v.severity === 'MEDIUM').length;
if (criticalCount > 0) return 'CRITICAL';
if (highCount > 1) return 'HIGH';
if (highCount > 0 || mediumCount > 2) return 'MEDIUM';
if (mediumCount > 0) return 'LOW';
return 'SAFE';
}
generateRecommendations(vulnerabilities) {
const recommendations = [];
if (vulnerabilities.some(v => v.type === 'NESTED_QUANTIFIERS')) {
recommendations.push({
priority: 'HIGH',
action: 'Replace nested quantifiers with atomic groups or possessive quantifiers',
example: 'Change (a+)+ to (?>a+)+ or a++'
});
}
if (vulnerabilities.some(v => v.type === 'OVERLAPPING_ALTERNATIVES')) {
recommendations.push({
priority: 'MEDIUM',
action: 'Make alternatives mutually exclusive',
example: 'Change (a|ab) to (ab|a) or use word boundaries'
});
}
recommendations.push({
priority: 'LOW',
action: 'Test pattern with long inputs to verify performance',
example: 'Use automated testing with progressively larger inputs'
});
return recommendations;
}
// Demonstrate catastrophic backtracking with examples
demonstrateCatastrophicBacktracking() {
const examples = [
{
pattern: '(a+)+b',
description: 'Nested quantifiers causing exponential backtracking',
goodInput: 'aaab',
badInput: 'aaaaaaaaaaaaaaaaaaaaX', // No 'b' at end
explanation: 'When there\'s no \'b\' at the end, the engine tries all possible ways to match a+'
},
{
pattern: '(a|a)*b',
description: 'Overlapping alternatives with quantifier',
goodInput: 'aaab',
badInput: 'aaaaaaaaaaaaaaaaaaaaX',
explanation: 'Both alternatives match \'a\', creating exponential possibilities'
},
{
pattern: '^(.*)+$',
description: 'Nested .* patterns',
goodInput: 'test',
badInput: 'a'.repeat(20),
explanation: 'The inner .* can match any substring, leading to massive backtracking'
}
];
return examples.map(example => ({
...example,
saferAlternative: this.suggestSaferAlternative(example.pattern)
}));
}
suggestSaferAlternative(pattern) {
const alternatives = {
'(a+)+b': 'a+b',
'(a|a)*b': 'a*b',
'^(.*)+$': '^.*$' // or just .* if anchors aren't needed
};
return alternatives[pattern] || 'Pattern needs manual review';
}
}
ReDoS Vulnerabilities and Prevention
Understanding ReDoS Attacks
ReDoS (Regular Expression Denial of Service) attacks exploit vulnerable regex patterns to cause excessive CPU usage, potentially bringing down services.
ReDoS Detection and Prevention
class ReDoSProtection {
constructor(options = {}) {
this.maxExecutionTime = options.maxExecutionTime || 100; // milliseconds
this.maxInputLength = options.maxInputLength || 10000;
this.enableTimeoutProtection = options.enableTimeoutProtection !== false;
this.knownVulnerablePatterns = [
{
pattern: /\([^)]*[+*][^)]*\)[+*]/,
description: 'Nested quantifiers',
severity: 'CRITICAL'
},
{
pattern: /\(\.[+*]\)[+*]/,
description: 'Nested .* or .+ patterns',
severity: 'CRITICAL'
},
{
pattern: /\([^)]*\|[^)]*\)[+*]/,
description: 'Alternation with quantifiers',
severity: 'HIGH'
}
];
}
// Safe regex execution with timeout protection
safeRegexTest(pattern, input, options = {}) {
const startTime = Date.now();
const timeout = options.timeout || this.maxExecutionTime;
return new Promise((resolve, reject) => {
// Input validation
if (input.length > this.maxInputLength) {
resolve({
result: false,
error: 'Input too long',
inputLength: input.length,
maxLength: this.maxInputLength
});
return;
}
// Pre-scan pattern for known vulnerabilities
const vulnerabilityCheck = this.scanForVulnerabilities(pattern);
if (vulnerabilityCheck.hasVulnerabilities && !options.bypassSafety) {
resolve({
result: false,
error: 'Potentially unsafe pattern detected',
vulnerabilities: vulnerabilityCheck.vulnerabilities
});
return;
}
let timeoutId;
let completed = false;
if (this.enableTimeoutProtection) {
timeoutId = setTimeout(() => {
if (!completed) {
completed = true;
resolve({
result: false,
error: 'Regex execution timeout',
executionTime: Date.now() - startTime,
timeout: true
});
}
}, timeout);
}
try {
const regex = new RegExp(pattern, options.flags || '');
const result = regex.test(input);
if (!completed) {
completed = true;
clearTimeout(timeoutId);
const executionTime = Date.now() - startTime;
resolve({
result,
executionTime,
safe: executionTime < timeout
});
}
} catch (error) {
if (!completed) {
completed = true;
clearTimeout(timeoutId);
resolve({
result: false,
error: error.message,
exception: true
});
}
}
});
}
scanForVulnerabilities(pattern) {
const vulnerabilities = [];
this.knownVulnerablePatterns.forEach(({ pattern: vulnPattern, description, severity }) => {
if (vulnPattern.test(pattern)) {
vulnerabilities.push({
description,
severity,
pattern: vulnPattern.source
});
}
});
return {
hasVulnerabilities: vulnerabilities.length > 0,
vulnerabilities,
riskLevel: this.calculateRiskLevel(vulnerabilities)
};
}
calculateRiskLevel(vulnerabilities) {
if (vulnerabilities.some(v => v.severity === 'CRITICAL')) return 'CRITICAL';
if (vulnerabilities.some(v => v.severity === 'HIGH')) return 'HIGH';
if (vulnerabilities.some(v => v.severity === 'MEDIUM')) return 'MEDIUM';
return 'LOW';
}
// Generate safe alternative patterns
generateSafeAlternatives(unsafePattern) {
const alternatives = [];
// Replace nested quantifiers
if (/\([^)]*[+*][^)]*\)[+*]/.test(unsafePattern)) {
alternatives.push({
pattern: unsafePattern.replace(/\(([^)]*[+*][^)]*)\)[+*]/, '$1'),
description: 'Removed nested quantifiers',
explanation: 'Flattened nested quantifiers to prevent exponential backtracking'
});
}
// Replace .* with more specific patterns
if (/\(\.\*\)[+*]/.test(unsafePattern)) {
alternatives.push({
pattern: unsafePattern.replace(/\(\.\*\)[+*]/, '.*'),
description: 'Simplified .* pattern',
explanation: 'Removed redundant grouping around .*'
});
}
// Replace alternation issues
if (/\([^)]*\|[^)]*\)[+*]/.test(unsafePattern)) {
alternatives.push({
pattern: unsafePattern.replace(/\(([^)]*\|[^)]*)\)[+*]/, '(?:$1)*'),
description: 'Used non-capturing group',
explanation: 'Converted to non-capturing group to reduce backtracking'
});
}
return alternatives;
}
// Automated ReDoS testing
testForReDoS(pattern, maxTestLength = 50) {
const testResults = [];
// Generate test strings that commonly trigger ReDoS
const testGenerators = [
// Repeated character without matching suffix
(len) => 'a'.repeat(len) + 'X',
// Alternating patterns
(len) => ('ab'.repeat(len / 2)) + 'X',
// Mixed characters
(len) => ('abc'.repeat(len / 3)) + 'X',
// Special characters
(len) => ('a1b2'.repeat(len / 4)) + 'X'
];
for (let length = 5; length <= maxTestLength; length += 5) {
for (const generator of testGenerators) {
const testString = generator(length);
const startTime = process.hrtime.bigint();
try {
const regex = new RegExp(pattern);
const result = regex.test(testString);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000; // Convert to ms
testResults.push({
inputLength: length,
testString: testString.substring(0, 20) + (testString.length > 20 ? '...' : ''),
duration,
result,
suspicious: duration > 50 // Flag if taking more than 50ms
});
// Stop testing if we detect a potential issue
if (duration > 1000) {
testResults.push({
inputLength: length,
testString: 'TEST_ABORTED',
duration: '> 1000ms',
result: false,
suspicious: true,
warning: 'Testing stopped due to excessive execution time'
});
break;
}
} catch (error) {
testResults.push({
inputLength: length,
testString: testString.substring(0, 20) + '...',
error: error.message,
suspicious: true
});
}
}
}
return {
pattern,
testResults,
verdict: this.analyzeTestResults(testResults)
};
}
analyzeTestResults(results) {
const suspiciousResults = results.filter(r => r.suspicious);
if (suspiciousResults.length === 0) {
return {
status: 'SAFE',
description: 'No performance issues detected'
};
}
const highDurationResults = results.filter(r => typeof r.duration === 'number' && r.duration > 100);
if (highDurationResults.length > 0) {
const maxDuration = Math.max(...highDurationResults.map(r => r.duration));
return {
status: 'VULNERABLE',
description: `Potential ReDoS vulnerability detected. Max execution time: ${maxDuration.toFixed(2)}ms`,
severity: maxDuration > 1000 ? 'CRITICAL' : 'HIGH',
recommendations: [
'Review pattern for nested quantifiers',
'Consider using atomic groups or possessive quantifiers',
'Implement timeout protection in production'
]
};
}
return {
status: 'SUSPICIOUS',
description: 'Some test cases showed suspicious behavior',
severity: 'MEDIUM',
recommendations: [
'Monitor pattern performance in production',
'Consider additional testing with larger inputs'
]
};
}
// Real-time monitoring for ReDoS attacks
createMonitor() {
const executionTimes = [];
const alertThreshold = 500; // milliseconds
const maxHistorySize = 1000;
return {
recordExecution: (duration, pattern, input) => {
executionTimes.push({
timestamp: Date.now(),
duration,
pattern: pattern.substring(0, 50),
inputLength: input.length
});
// Keep history manageable
if (executionTimes.length > maxHistorySize) {
executionTimes.splice(0, 100);
}
// Check for potential attack
if (duration > alertThreshold) {
return {
alert: true,
severity: duration > 2000 ? 'CRITICAL' : 'HIGH',
message: `Slow regex execution detected: ${duration}ms`,
pattern,
inputLength: input.length
};
}
return { alert: false };
},
getStats: () => {
const recentExecutions = executionTimes.filter(
ex => Date.now() - ex.timestamp < 60000 // Last minute
);
if (recentExecutions.length === 0) {
return { noData: true };
}
const durations = recentExecutions.map(ex => ex.duration);
const avg = durations.reduce((a, b) => a + b) / durations.length;
const max = Math.max(...durations);
const slowQueries = recentExecutions.filter(ex => ex.duration > alertThreshold).length;
return {
totalExecutions: recentExecutions.length,
averageDuration: avg.toFixed(2),
maxDuration: max,
slowExecutions: slowQueries,
potentialAttacks: slowQueries > 5
};
}
};
}
}
Performance Optimization Techniques
Pattern Optimization Strategies
class RegexOptimizer {
constructor() {
this.optimizationRules = [
{
name: 'Remove unnecessary groups',
detector: /\([^)]*\)(?![+*?{])/,
optimizer: (pattern) => pattern.replace(/\(([^)|]+)\)(?![+*?{])/g, '$1')
},
{
name: 'Use character classes',
detector: /\[a-z\]|\[A-Z\]|\[0-9\]/,
optimizer: (pattern) => pattern
.replace(/\[a-z\]/g, '\\p{Ll}')
.replace(/\[A-Z\]/g, '\\p{Lu}')
.replace(/\[0-9\]/g, '\\d')
},
{
name: 'Anchor appropriately',
detector: /^[^$^].*[^$^]$/,
optimizer: (pattern) => {
// Add anchors if they would help performance
if (!/^\^/.test(pattern) && !/\$$/.test(pattern)) {
return `^${pattern}$`;
}
return pattern;
}
}
];
}
optimizePattern(pattern, context = {}) {
let optimized = pattern;
const appliedOptimizations = [];
// 1. Remove redundant escapes
const redundantEscapes = /\\([^\\\[\](){}.*+?^$|\-dwsWDS])/g;
if (redundantEscapes.test(optimized)) {
optimized = optimized.replace(redundantEscapes, '$1');
appliedOptimizations.push('Removed redundant escapes');
}
// 2. Optimize character classes
optimized = this.optimizeCharacterClasses(optimized, appliedOptimizations);
// 3. Optimize quantifiers
optimized = this.optimizeQuantifiers(optimized, appliedOptimizations);
// 4. Optimize alternation
optimized = this.optimizeAlternation(optimized, appliedOptimizations);
// 5. Add anchors if appropriate
if (context.anchored) {
optimized = this.addAnchors(optimized, appliedOptimizations);
}
// 6. Use atomic groups if supported
if (context.supportsAtomicGroups) {
optimized = this.addAtomicGroups(optimized, appliedOptimizations);
}
return {
original: pattern,
optimized,
appliedOptimizations,
estimatedSpeedGain: this.estimateSpeedGain(pattern, optimized)
};
}
optimizeCharacterClasses(pattern, optimizations) {
let optimized = pattern;
// Combine adjacent character ranges
const adjacentRanges = /\[([a-z])\-([a-z])([a-z])\-([a-z])\]/g;
if (adjacentRanges.test(optimized)) {
optimized = optimized.replace(adjacentRanges, (match, start1, end1, start2, end2) => {
if (end1.charCodeAt(0) + 1 === start2.charCodeAt(0)) {
optimizations.push('Combined adjacent character ranges');
return `[${start1}-${end2}]`;
}
return match;
});
}
// Replace common character classes with shorthand
const commonClasses = {
'[0-9]': '\\d',
'[a-zA-Z0-9_]': '\\w',
'[\\t\\n\\r\\f\\v ]': '\\s',
'[a-z]': '[a-z]', // Keep as is for clarity
'[A-Z]': '[A-Z]' // Keep as is for clarity
};
Object.entries(commonClasses).forEach(([longForm, shortForm]) => {
if (optimized.includes(longForm) && shortForm !== longForm) {
optimized = optimized.replace(new RegExp(longForm.replace(/[\[\]\\]/g, '\\$&'), 'g'), shortForm);
optimizations.push(`Replaced ${longForm} with ${shortForm}`);
}
});
return optimized;
}
optimizeQuantifiers(pattern, optimizations) {
let optimized = pattern;
// Replace {1} with no quantifier
if (optimized.includes('{1}')) {
optimized = optimized.replace(/{1}/g, '');
optimizations.push('Removed unnecessary {1} quantifiers');
}
// Replace {0,1} with ?
if (optimized.includes('{0,1}')) {
optimized = optimized.replace(/{0,1}/g, '?');
optimizations.push('Replaced {0,1} with ?');
}
// Replace {1,} with +
if (optimized.includes('{1,}')) {
optimized = optimized.replace(/{1,}/g, '+');
optimizations.push('Replaced {1,} with +');
}
// Replace {0,} with *
if (optimized.includes('{0,}')) {
optimized = optimized.replace(/{0,}/g, '*');
optimizations.push('Replaced {0,} with *');
}
return optimized;
}
optimizeAlternation(pattern, optimizations) {
let optimized = pattern;
// Factor out common prefixes in alternation
const alternationPattern = /\(([^)|]+)\|([^)]+)\)/g;
optimized = optimized.replace(alternationPattern, (match, alt1, alt2) => {
const commonPrefix = this.findCommonPrefix([alt1, alt2]);
if (commonPrefix.length > 1) {
const suffix1 = alt1.substring(commonPrefix.length);
const suffix2 = alt2.substring(commonPrefix.length);
optimizations.push(`Factored out common prefix: ${commonPrefix}`);
return `${commonPrefix}(${suffix1}|${suffix2})`;
}
return match;
});
// Order alternatives by frequency (if context provided)
// This would need frequency data to implement properly
return optimized;
}
addAnchors(pattern, optimizations) {
// Add anchors if pattern doesn't already have them
if (!pattern.startsWith('^') && !pattern.endsWith('$')) {
optimizations.push('Added anchors for better performance');
return `^${pattern}$`;
}
return pattern;
}
addAtomicGroups(pattern, optimizations) {
let optimized = pattern;
// Convert capturing groups to atomic where safe
const nestedQuantifiers = /\(([^)]+[+*])\)[+*]/g;
optimized = optimized.replace(nestedQuantifiers, (match, group) => {
optimizations.push('Converted to atomic group to prevent backtracking');
return `(?>$1)+`; // Atomic group syntax (PCRE, .NET)
});
return optimized;
}
findCommonPrefix(strings) {
if (strings.length === 0) return '';
let prefix = '';
const firstString = strings[0];
for (let i = 0; i < firstString.length; i++) {
const char = firstString[i];
if (strings.every(str => str[i] === char)) {
prefix += char;
} else {
break;
}
}
return prefix;
}
estimateSpeedGain(original, optimized) {
let gain = 0;
// Estimate based on optimization types
if (optimized.length < original.length) {
gain += 5; // Shorter patterns are generally faster
}
if (optimized.includes('>')) { // Atomic groups
gain += 20;
}
if (optimized.startsWith('^') || optimized.endsWith('$')) {
gain += 10; // Anchors can significantly speed up matching
}
if (optimized.includes('\\d') || optimized.includes('\\w') || optimized.includes('\\s')) {
gain += 5; // Character class shortcuts
}
return Math.min(gain, 50); // Cap at 50% estimated gain
}
// Performance benchmarking
benchmark(patterns, testInputs, iterations = 1000) {
const results = [];
patterns.forEach((pattern, patternIndex) => {
const patternResults = {
pattern,
totalTime: 0,
averageTime: 0,
minTime: Infinity,
maxTime: 0,
inputResults: []
};
testInputs.forEach((input, inputIndex) => {
const inputResults = {
input: input.substring(0, 50) + (input.length > 50 ? '...' : ''),
inputLength: input.length,
times: [],
average: 0
};
const regex = new RegExp(pattern);
// Warm up
for (let i = 0; i < 10; i++) {
regex.test(input);
}
// Actual benchmark
for (let i = 0; i < iterations; i++) {
const startTime = process.hrtime.bigint();
regex.test(input);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000; // Convert to milliseconds
inputResults.times.push(duration);
patternResults.totalTime += duration;
if (duration < patternResults.minTime) patternResults.minTime = duration;
if (duration > patternResults.maxTime) patternResults.maxTime = duration;
}
inputResults.average = inputResults.times.reduce((a, b) => a + b) / inputResults.times.length;
patternResults.inputResults.push(inputResults);
});
patternResults.averageTime = patternResults.totalTime / (testInputs.length * iterations);
results.push(patternResults);
});
return results;
}
}
Safe Pattern Design Principles
Safe Pattern Construction
class SafePatternBuilder {
constructor() {
this.safePatterns = {
// Email validation - safe alternatives
email: {
basic: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
strict: /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
},
// URL validation - avoiding catastrophic backtracking
url: {
basic: /^https?:\/\/[a-zA-Z0-9.-]+(?:\.[a-zA-Z]{2,})?(?:\/[^\s]*)?$/,
strict: /^https?:\/\/(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:\/[^\s<>"]*)?$/
},
// Phone number validation
phone: {
us: /^\+?1?[-\s]?\(?[2-9]\d{2}\)?[-\s]?\d{3}[-\s]?\d{4}$/,
international: /^\+[1-9]\d{1,14}$/
},
// Password strength - safe complexity checking
password: {
strong: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$/,
medium: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[A-Za-z\d@$!%*?&]{6,}$/
}
};
this.antiPatterns = [
{
pattern: /\([^)]*[+*][^)]*\)[+*]/,
description: 'Nested quantifiers',
alternative: 'Use atomic groups or flatten the pattern'
},
{
pattern: /\(\.\*\)[+*]/,
description: 'Nested .* patterns',
alternative: 'Use .* without nesting or be more specific'
},
{
pattern: /\([^)]*\|[^)]*\)[+*]/,
description: 'Alternation with quantifiers',
alternative: 'Ensure alternatives are mutually exclusive'
}
];
}
buildSafePattern(requirements) {
const builder = new PatternBuilder();
// Start with basic structure
if (requirements.anchored) {
builder.anchor();
}
// Add character classes safely
requirements.parts.forEach(part => {
switch (part.type) {
case 'letters':
builder.addLetters(part.options);
break;
case 'digits':
builder.addDigits(part.options);
break;
case 'literal':
builder.addLiteral(part.value);
break;
case 'optional':
builder.addOptional(part.pattern);
break;
case 'repeated':
builder.addRepeated(part.pattern, part.min, part.max);
break;
}
});
const pattern = builder.build();
const safety = this.validatePatternSafety(pattern);
return {
pattern,
safety,
recommendations: safety.issues.map(issue => issue.recommendation)
};
}
validatePatternSafety(pattern) {
const issues = [];
// Check against known anti-patterns
this.antiPatterns.forEach(antiPattern => {
if (antiPattern.pattern.test(pattern)) {
issues.push({
type: 'ANTI_PATTERN',
description: antiPattern.description,
severity: 'HIGH',
recommendation: antiPattern.alternative
});
}
});
// Check for excessive complexity
const complexity = this.calculateComplexity(pattern);
if (complexity > 20) {
issues.push({
type: 'HIGH_COMPLEXITY',
description: `Pattern complexity score: ${complexity}`,
severity: 'MEDIUM',
recommendation: 'Consider simplifying the pattern'
});
}
// Check for potential performance issues
const performanceIssues = this.detectPerformanceIssues(pattern);
issues.push(...performanceIssues);
return {
safe: issues.length === 0,
riskLevel: this.calculateRiskLevel(issues),
issues,
complexity
};
}
calculateComplexity(pattern) {
let complexity = 0;
// Count various regex elements
complexity += (pattern.match(/[+*?]/g) || []).length; // Quantifiers
complexity += (pattern.match(/\|/g) || []).length; // Alternations
complexity += (pattern.match(/\(/g) || []).length; // Groups
complexity += (pattern.match(/\[/g) || []).length; // Character classes
complexity += (pattern.match(/\\./g) || []).length; // Escapes
complexity += (pattern.match(/\{\d+,?\d*\}/g) || []).length; // Range quantifiers
// Nested structures increase complexity exponentially
const nestedStructures = (pattern.match(/\([^)]*\)[+*?]/g) || []).length;
complexity += nestedStructures * 5;
return complexity;
}
detectPerformanceIssues(pattern) {
const issues = [];
// Check for patterns that could cause exponential time
const exponentialPatterns = [
{ regex: /\(.*\)\*/g, description: 'Group with .* followed by quantifier' },
{ regex: /\(.+\)\+/g, description: 'Group with .+ followed by quantifier' },
{ regex: /\([^)]*[+*][^)]*\)[+*]/g, description: 'Nested quantifiers' }
];
exponentialPatterns.forEach(({ regex, description }) => {
if (regex.test(pattern)) {
issues.push({
type: 'EXPONENTIAL_TIME',
description,
severity: 'CRITICAL',
recommendation: 'Rewrite to avoid exponential time complexity'
});
}
});
// Check for excessive backtracking potential
if (/\(.*\|.*\)\*/.test(pattern)) {
issues.push({
type: 'EXCESSIVE_BACKTRACKING',
description: 'Alternation with quantifier can cause excessive backtracking',
severity: 'HIGH',
recommendation: 'Make alternatives mutually exclusive or use atomic groups'
});
}
return issues;
}
calculateRiskLevel(issues) {
if (issues.some(issue => issue.severity === 'CRITICAL')) return 'CRITICAL';
if (issues.some(issue => issue.severity === 'HIGH')) return 'HIGH';
if (issues.some(issue => issue.severity === 'MEDIUM')) return 'MEDIUM';
return 'LOW';
}
// Generate safe alternatives for common use cases
generateSafeAlternatives(useCase, requirements = {}) {
const alternatives = {
'email-validation': [
{
pattern: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
description: 'Basic email validation - fast and safe',
pros: ['Linear time complexity', 'Good for most use cases'],
cons: ['Not RFC 5322 compliant', 'May reject some valid emails']
},
{
pattern: /^[\w._%+-]+@[\w.-]+\.[a-zA-Z]{2,}$/,
description: 'Simplified with word characters',
pros: ['Very fast', 'Simple to understand'],
cons: ['Less strict than first option']
}
],
'url-validation': [
{
pattern: /^https?:\/\/[\w.-]+(?:\.[a-zA-Z]{2,})?(?:\/[^\s]*)?$/,
description: 'Basic URL validation',
pros: ['Safe from ReDoS', 'Covers most URLs'],
cons: ['May not handle all URL formats']
}
],
'phone-validation': [
{
pattern: /^\+?[1-9]\d{1,14}$/,
description: 'International phone number format',
pros: ['Simple and fast', 'International compatible'],
cons: ['Very basic validation']
},
{
pattern: /^\+?1?[-\s]?\(?[2-9]\d{2}\)?[-\s]?\d{3}[-\s]?\d{4}$/,
description: 'US phone number with formatting',
pros: ['Handles common US formats'],
cons: ['US-specific']
}
]
};
return alternatives[useCase] || [];
}
// Test pattern safety with automated tests
runSafetyTests(pattern, maxTestTime = 1000) {
const tests = [
// Test with progressively longer inputs
...Array.from({ length: 10 }, (_, i) => ({
name: `Length test ${(i + 1) * 10}`,
input: 'a'.repeat((i + 1) * 10) + 'X',
expectation: 'Should complete in reasonable time'
})),
// Test with alternating patterns
...Array.from({ length: 5 }, (_, i) => ({
name: `Alternating pattern ${i + 1}`,
input: ('ab'.repeat((i + 1) * 10)) + 'X',
expectation: 'Should not cause exponential backtracking'
})),
// Test with nested repetitions
{
name: 'Nested repetition test',
input: 'a'.repeat(20) + 'b'.repeat(20) + 'X',
expectation: 'Should handle nested structures safely'
}
];
const results = tests.map(test => {
const startTime = Date.now();
let result, error, timedOut = false;
const timeout = setTimeout(() => {
timedOut = true;
}, maxTestTime);
try {
const regex = new RegExp(pattern);
result = regex.test(test.input);
clearTimeout(timeout);
} catch (e) {
error = e.message;
clearTimeout(timeout);
}
const duration = Date.now() - startTime;
return {
...test,
result: timedOut ? 'TIMEOUT' : result,
duration,
error,
passed: !timedOut && !error && duration < 100
};
});
const passedTests = results.filter(r => r.passed).length;
const failedTests = results.filter(r => !r.passed);
return {
pattern,
totalTests: tests.length,
passedTests,
failedTests: failedTests.length,
passRate: (passedTests / tests.length * 100).toFixed(1),
results,
recommendation: failedTests.length > 0 ?
'Pattern may have performance or safety issues' :
'Pattern appears safe for production use'
};
}
}
// Helper class for building patterns safely
class PatternBuilder {
constructor() {
this.parts = [];
this.anchored = false;
}
anchor() {
this.anchored = true;
return this;
}
addLetters(options = {}) {
const { caseInsensitive = false, unicode = false } = options;
if (unicode) {
this.parts.push(caseInsensitive ? '[\\p{L}]' : '[\\p{Lu}\\p{Ll}]');
} else {
this.parts.push(caseInsensitive ? '[a-zA-Z]' : '[a-z]|[A-Z]');
}
return this;
}
addDigits(options = {}) {
this.parts.push('\\d');
return this;
}
addLiteral(text) {
// Escape special regex characters
const escaped = text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
this.parts.push(escaped);
return this;
}
addOptional(pattern) {
this.parts.push(`(?:${pattern})?`);
return this;
}
addRepeated(pattern, min = 0, max = null) {
if (max === null) {
if (min === 0) {
this.parts.push(`(?:${pattern})*`);
} else if (min === 1) {
this.parts.push(`(?:${pattern})+`);
} else {
this.parts.push(`(?:${pattern}){${min},}`);
}
} else {
this.parts.push(`(?:${pattern}){${min},${max}}`);
}
return this;
}
build() {
let pattern = this.parts.join('');
if (this.anchored) {
pattern = `^${pattern}$`;
}
return pattern;
}
}
Testing and Performance Analysis
Automated Performance Testing
class RegexPerformanceTester {
constructor(options = {}) {
this.defaultTimeout = options.timeout || 5000;
this.maxInputLength = options.maxInputLength || 1000;
this.performanceThresholds = {
excellent: 1, // < 1ms
good: 10, // < 10ms
acceptable: 100, // < 100ms
poor: 1000, // < 1000ms
terrible: Infinity // >= 1000ms
};
}
async comprehensiveTest(pattern, options = {}) {
const testSuite = {
pattern,
timestamp: new Date().toISOString(),
tests: {
basicFunctionality: await this.testBasicFunctionality(pattern),
performanceScaling: await this.testPerformanceScaling(pattern),
edgeCases: await this.testEdgeCases(pattern),
securityVulnerabilities: await this.testSecurityVulnerabilities(pattern),
memoryUsage: await this.testMemoryUsage(pattern)
}
};
testSuite.overall = this.calculateOverallScore(testSuite.tests);
testSuite.recommendations = this.generateRecommendations(testSuite.tests);
return testSuite;
}
async testBasicFunctionality(pattern) {
const tests = [
{ input: '', expected: false, description: 'Empty string' },
{ input: 'test', expected: true, description: 'Basic matching' },
{ input: 'TEST', expected: false, description: 'Case sensitivity' },
{ input: 'test123', expected: true, description: 'Alphanumeric' },
{ input: 'test@example.com', expected: true, description: 'Complex string' }
];
const results = [];
try {
const regex = new RegExp(pattern);
for (const test of tests) {
const startTime = process.hrtime.bigint();
const result = regex.test(test.input);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000;
results.push({
...test,
actual: result,
duration,
passed: result === test.expected
});
}
} catch (error) {
return {
error: error.message,
passed: false
};
}
const passedTests = results.filter(r => r.passed).length;
return {
totalTests: tests.length,
passedTests,
passRate: (passedTests / tests.length * 100).toFixed(1),
results,
passed: passedTests === tests.length
};
}
async testPerformanceScaling(pattern) {
const inputLengths = [10, 50, 100, 250, 500, 1000];
const results = [];
try {
const regex = new RegExp(pattern);
for (const length of inputLengths) {
const testInputs = [
'a'.repeat(length),
'a'.repeat(length) + 'X', // Non-matching suffix
('ab'.repeat(length / 2)).substring(0, length),
('abc'.repeat(length / 3)).substring(0, length)
];
const lengthResults = [];
for (const input of testInputs) {
const measurements = [];
// Take multiple measurements for accuracy
for (let i = 0; i < 5; i++) {
const startTime = process.hrtime.bigint();
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Timeout')), this.defaultTimeout);
});
const testPromise = new Promise((resolve) => {
regex.test(input);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000;
resolve(duration);
});
try {
const duration = await Promise.race([testPromise, timeoutPromise]);
measurements.push(duration);
} catch (error) {
measurements.push(this.defaultTimeout);
break;
}
}
const avgDuration = measurements.reduce((a, b) => a + b) / measurements.length;
const maxDuration = Math.max(...measurements);
lengthResults.push({
inputType: this.categorizeInput(input),
inputLength: input.length,
avgDuration,
maxDuration,
measurements,
performance: this.categorizePerformance(avgDuration)
});
}
results.push({
inputLength: length,
results: lengthResults,
worstPerformance: Math.max(...lengthResults.map(r => r.avgDuration))
});
// Stop testing if performance becomes unacceptable
const worstDuration = Math.max(...lengthResults.map(r => r.avgDuration));
if (worstDuration > this.performanceThresholds.poor) {
break;
}
}
} catch (error) {
return {
error: error.message,
scalabilityIssue: true
};
}
const scalabilityAnalysis = this.analyzeScalability(results);
return {
results,
scalability: scalabilityAnalysis,
recommendation: scalabilityAnalysis.issue ?
'Pattern shows poor scalability - consider optimization' :
'Pattern scales acceptably with input size'
};
}
analyzeScalability(results) {
if (results.length < 3) {
return { insufficient_data: true };
}
const durations = results.map(r => r.worstPerformance);
const growthRates = [];
for (let i = 1; i < durations.length; i++) {
const growthRate = durations[i] / durations[i - 1];
growthRates.push(growthRate);
}
const avgGrowthRate = growthRates.reduce((a, b) => a + b) / growthRates.length;
const exponentialGrowth = growthRates.some(rate => rate > 3);
const consistentSlowdown = growthRates.filter(rate => rate > 1.5).length > growthRates.length / 2;
return {
averageGrowthRate: avgGrowthRate.toFixed(2),
exponentialGrowth,
consistentSlowdown,
issue: exponentialGrowth || consistentSlowdown,
complexity: this.estimateTimeComplexity(growthRates)
};
}
estimateTimeComplexity(growthRates) {
const avgGrowthRate = growthRates.reduce((a, b) => a + b) / growthRates.length;
if (avgGrowthRate > 2) return 'O(2^n) - Exponential';
if (avgGrowthRate > 1.5) return 'O(n^2) - Quadratic';
if (avgGrowthRate > 1.2) return 'O(n log n) - Quasilinear';
return 'O(n) - Linear';
}
categorizeInput(input) {
if (/^a+$/.test(input)) return 'repeated_char';
if (/^(\w{2,3})+$/.test(input)) return 'repeated_pattern';
if (/X$/.test(input)) return 'non_matching_suffix';
return 'mixed';
}
categorizePerformance(duration) {
if (duration < this.performanceThresholds.excellent) return 'excellent';
if (duration < this.performanceThresholds.good) return 'good';
if (duration < this.performanceThresholds.acceptable) return 'acceptable';
if (duration < this.performanceThresholds.poor) return 'poor';
return 'terrible';
}
async testEdgeCases(pattern) {
const edgeCases = [
{ input: '', description: 'Empty string' },
{ input: ' ', description: 'Single space' },
{ input: '\n', description: 'Newline character' },
{ input: '\t', description: 'Tab character' },
{ input: 'a'.repeat(10000), description: 'Very long string' },
{ input: '๐ฉ', description: 'Unicode emoji' },
{ input: 'ัะตัั', description: 'Non-ASCII characters' },
{ input: '\u0000', description: 'Null character' },
{ input: '\xff\xfe', description: 'Binary data' }
];
const results = [];
try {
const regex = new RegExp(pattern, 'u'); // Unicode flag for better support
for (const testCase of edgeCases) {
try {
const startTime = Date.now();
const result = regex.test(testCase.input);
const duration = Date.now() - startTime;
results.push({
...testCase,
result,
duration,
error: null,
passed: duration < 1000 // Should complete within 1 second
});
} catch (error) {
results.push({
...testCase,
result: null,
duration: null,
error: error.message,
passed: false
});
}
}
} catch (error) {
return {
error: `Failed to create regex: ${error.message}`,
passed: false
};
}
const passedTests = results.filter(r => r.passed).length;
return {
totalTests: edgeCases.length,
passedTests,
passRate: (passedTests / edgeCases.length * 100).toFixed(1),
results,
passed: results.every(r => r.passed)
};
}
async testSecurityVulnerabilities(pattern) {
const vulnerabilityTests = [
{
name: 'ReDoS with repeated characters',
inputs: ['a'.repeat(50) + 'X', 'a'.repeat(100) + 'X', 'a'.repeat(200) + 'X'],
maxAcceptableDuration: 100
},
{
name: 'ReDoS with alternating patterns',
inputs: [('ab'.repeat(25)) + 'X', ('ab'.repeat(50)) + 'X', ('abc'.repeat(33)) + 'X'],
maxAcceptableDuration: 100
},
{
name: 'ReDoS with nested structures',
inputs: [('a1b2'.repeat(12)) + 'X', ('test'.repeat(25)) + 'X'],
maxAcceptableDuration: 100
}
];
const results = [];
let overallSecure = true;
try {
const regex = new RegExp(pattern);
for (const test of vulnerabilityTests) {
const testResults = [];
for (const input of test.inputs) {
const startTime = Date.now();
try {
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Potential ReDoS detected')), 2000);
});
const testPromise = new Promise((resolve) => {
const result = regex.test(input);
const duration = Date.now() - startTime;
resolve({ result, duration });
});
const { result, duration } = await Promise.race([testPromise, timeoutPromise]);
const secure = duration <= test.maxAcceptableDuration;
if (!secure) overallSecure = false;
testResults.push({
input: input.length > 50 ? `${input.substring(0, 50)}...` : input,
inputLength: input.length,
result,
duration,
secure
});
} catch (error) {
overallSecure = false;
testResults.push({
input: input.length > 50 ? `${input.substring(0, 50)}...` : input,
inputLength: input.length,
result: null,
duration: '>2000',
secure: false,
error: error.message
});
}
}
results.push({
...test,
results: testResults,
passed: testResults.every(r => r.secure)
});
}
} catch (error) {
return {
error: `Security test failed: ${error.message}`,
secure: false
};
}
return {
tests: results,
secure: overallSecure,
vulnerabilities: results.filter(r => !r.passed).map(r => r.name),
recommendation: overallSecure ?
'No obvious ReDoS vulnerabilities detected' :
'Potential ReDoS vulnerabilities found - pattern needs review'
};
}
async testMemoryUsage(pattern) {
// Note: Memory testing in JavaScript is limited, but we can do basic checks
const memoryTests = [
{ name: 'Small input', input: 'a'.repeat(100) },
{ name: 'Medium input', input: 'a'.repeat(1000) },
{ name: 'Large input', input: 'a'.repeat(10000) }
];
const results = [];
try {
const regex = new RegExp(pattern);
for (const test of memoryTests) {
const beforeMemory = process.memoryUsage();
// Run test multiple times to amplify memory usage
for (let i = 0; i < 1000; i++) {
regex.test(test.input);
}
const afterMemory = process.memoryUsage();
results.push({
...test,
memoryIncrease: {
heapUsed: afterMemory.heapUsed - beforeMemory.heapUsed,
heapTotal: afterMemory.heapTotal - beforeMemory.heapTotal
}
});
}
} catch (error) {
return {
error: `Memory test failed: ${error.message}`,
passed: false
};
}
const maxMemoryIncrease = Math.max(...results.map(r => r.memoryIncrease.heapUsed));
return {
results,
maxMemoryIncrease,
efficient: maxMemoryIncrease < 1000000, // Less than 1MB increase
recommendation: maxMemoryIncrease > 1000000 ?
'Pattern may have memory efficiency issues' :
'Memory usage appears acceptable'
};
}
calculateOverallScore(tests) {
let score = 100;
const penalties = {
basicFunctionality: tests.basicFunctionality?.passed ? 0 : 30,
performanceScaling: tests.performanceScaling?.scalability?.issue ? 25 : 0,
edgeCases: tests.edgeCases?.passed ? 0 : 15,
security: tests.securityVulnerabilities?.secure ? 0 : 40,
memory: tests.memoryUsage?.efficient ? 0 : 10
};
score -= Object.values(penalties).reduce((a, b) => a + b, 0);
return {
score: Math.max(0, score),
grade: this.calculateGrade(score),
penalties
};
}
calculateGrade(score) {
if (score >= 90) return 'A';
if (score >= 80) return 'B';
if (score >= 70) return 'C';
if (score >= 60) return 'D';
return 'F';
}
generateRecommendations(tests) {
const recommendations = [];
if (!tests.basicFunctionality?.passed) {
recommendations.push({
priority: 'HIGH',
category: 'Functionality',
message: 'Pattern has basic functionality issues - review and fix'
});
}
if (tests.performanceScaling?.scalability?.issue) {
recommendations.push({
priority: 'HIGH',
category: 'Performance',
message: 'Pattern shows poor scalability - consider optimization or redesign'
});
}
if (!tests.securityVulnerabilities?.secure) {
recommendations.push({
priority: 'CRITICAL',
category: 'Security',
message: 'Pattern has potential ReDoS vulnerabilities - immediate review required'
});
}
if (!tests.edgeCases?.passed) {
recommendations.push({
priority: 'MEDIUM',
category: 'Edge Cases',
message: 'Pattern fails on some edge cases - consider additional testing'
});
}
if (!tests.memoryUsage?.efficient) {
recommendations.push({
priority: 'LOW',
category: 'Memory',
message: 'Pattern may use excessive memory - monitor in production'
});
}
return recommendations;
}
}
Real-world Vulnerability Examples
Historical ReDoS Vulnerabilities
class ReDoSExamples {
constructor() {
this.historicalVulnerabilities = [
{
name: 'npm slugify',
vulnerability: 'CVE-2017-16021',
pattern: '/[^\\w\\s$*_+~.()\'"!\\-:@]+/g',
description: 'Character class with nested quantifiers causing ReDoS',
exploitInput: 'a'.repeat(100000),
fix: 'Simplified character class and added input length limits',
impact: 'HIGH - Can cause server DoS'
},
{
name: 'ms package',
vulnerability: 'CVE-2017-16014',
pattern: '/^(-?\\d+\.\\d+)\\s*(\\S*)$/g',
description: 'Exponential backtracking in time parsing',
exploitInput: 'a '.repeat(50000),
fix: 'Rewritten without catastrophic backtracking',
impact: 'MEDIUM - DoS in time parsing'
},
{
name: 'validator.js email',
vulnerability: 'ReDoS in email validation',
pattern: '/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/',
description: 'Safe pattern - no vulnerability',
exploitInput: null,
fix: 'Pattern was already safe',
impact: 'NONE - Good example of safe pattern'
}
];
}
demonstrateVulnerability(vulnerabilityName) {
const vuln = this.historicalVulnerabilities.find(v => v.name === vulnerabilityName);
if (!vuln) {
throw new Error(`Vulnerability ${vulnerabilityName} not found`);
}
if (!vuln.exploitInput) {
return {
vulnerability: vuln,
message: 'This pattern is safe and has no known vulnerability',
safe: true
};
}
const startTime = Date.now();
let timedOut = false;
const timeout = setTimeout(() => {
timedOut = true;
}, 5000);
try {
const regex = new RegExp(vuln.pattern.slice(1, -1)); // Remove /.../ delimiters
const result = regex.test(vuln.exploitInput);
clearTimeout(timeout);
const duration = Date.now() - startTime;
return {
vulnerability: vuln,
exploitSuccessful: duration > 1000 || timedOut,
duration: timedOut ? '>5000ms' : `${duration}ms`,
result,
safe: false
};
} catch (error) {
clearTimeout(timeout);
return {
vulnerability: vuln,
error: error.message,
safe: false
};
}
}
generateSafeAlternatives(vulnerablePattern) {
const alternatives = [];
// Common fixes for ReDoS vulnerabilities
const fixes = [
{
name: 'Remove nested quantifiers',
apply: (pattern) => pattern.replace(/\([^)]*[+*][^)]*\)[+*]/g, match => {
// Simplify nested quantifiers
return match.replace(/\)([+*])$/, ')').replace(/([+*])/, '');
})
},
{
name: 'Use atomic groups (where supported)',
apply: (pattern) => pattern.replace(/\(([^)]+)\)([+*])/g, '(?>$1)$2')
},
{
name: 'Add input length limits',
apply: (pattern) => pattern, // This would be implemented at the application level
note: 'Implement input length validation before regex processing'
},
{
name: 'Use possessive quantifiers (where supported)',
apply: (pattern) => pattern.replace(/([+*?])(?!\?)/g, '$1+')
}
];
fixes.forEach(fix => {
try {
const fixed = fix.apply(vulnerablePattern);
if (fixed !== vulnerablePattern) {
alternatives.push({
name: fix.name,
pattern: fixed,
note: fix.note
});
}
} catch (error) {
// Skip fixes that can't be applied
}
});
return alternatives;
}
// Real-world safe patterns for common use cases
getSafePatterns() {
return {
email: {
pattern: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
description: 'Safe email validation - linear time complexity',
tested: true,
maxRecommendedInputLength: 320 // RFC 5321 limit
},
url: {
pattern: /^https?:\/\/[a-zA-Z0-9.-]+(?:\.[a-zA-Z]{2,})?(?:\/[^\s]*)?$/,
description: 'Safe URL validation - avoids catastrophic backtracking',
tested: true,
maxRecommendedInputLength: 2000
},
phone: {
pattern: /^\+?[1-9]\d{1,14}$/,
description: 'International phone number - simple and safe',
tested: true,
maxRecommendedInputLength: 20
},
ipAddress: {
pattern: /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/,
description: 'IPv4 address validation - safe and accurate',
tested: true,
maxRecommendedInputLength: 15
},
password: {
pattern: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d@$!%*?&]{8,}$/,
description: 'Password strength check - uses safe lookaheads',
tested: true,
maxRecommendedInputLength: 128
},
creditCard: {
pattern: /^[0-9]{13,19}$/,
description: 'Credit card number - simple digit validation',
tested: true,
maxRecommendedInputLength: 19,
note: 'Use Luhn algorithm for actual validation'
}
};
}
// Benchmark safe patterns vs vulnerable patterns
benchmarkSafetyComparison() {
const comparisons = [
{
name: 'Email validation',
vulnerable: '(.*@.*)+',
safe: '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$',
testInput: 'a'.repeat(50) + '@' + 'b'.repeat(50)
},
{
name: 'URL validation',
vulnerable: '(.*://.*)+',
safe: '^https?://[a-zA-Z0-9.-]+(?:\\.[a-zA-Z]{2,})?(?:/[^\\s]*)?$',
testInput: 'http://' + 'a'.repeat(100) + '.com/' + 'b'.repeat(100)
},
{
name: 'Generic text',
vulnerable: '(a+)+b',
safe: 'a+b',
testInput: 'a'.repeat(50) + 'X'
}
];
const results = comparisons.map(comp => {
const vulnerableResult = this.benchmarkPattern(comp.vulnerable, comp.testInput);
const safeResult = this.benchmarkPattern(comp.safe, comp.testInput);
return {
name: comp.name,
vulnerable: {
pattern: comp.vulnerable,
...vulnerableResult
},
safe: {
pattern: comp.safe,
...safeResult
},
improvement: vulnerableResult.duration ?
`${(vulnerableResult.duration / safeResult.duration).toFixed(1)}x faster` :
'Prevented timeout'
};
});
return results;
}
benchmarkPattern(pattern, input, timeout = 1000) {
const startTime = Date.now();
let result, timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
}, timeout);
try {
const regex = new RegExp(pattern);
result = regex.test(input);
clearTimeout(timer);
} catch (error) {
clearTimeout(timer);
return {
error: error.message,
duration: null
};
}
if (timedOut) {
return {
result: 'TIMEOUT',
duration: null,
timedOut: true
};
}
return {
result,
duration: Date.now() - startTime,
timedOut: false
};
}
}
Monitoring and Detection
Production Monitoring
class RegexMonitor {
constructor(options = {}) {
this.options = {
maxExecutionTime: 100,
alertThreshold: 500,
maxHistorySize: 10000,
...options
};
this.executionHistory = [];
this.alerts = [];
this.patterns = new Map(); // Track pattern usage and performance
}
instrumentRegex(originalRegex, metadata = {}) {
const monitor = this;
// Create a wrapper that tracks performance
const instrumentedRegex = {
test: function(input) {
return monitor.monitorExecution(() => originalRegex.test(input), {
method: 'test',
pattern: originalRegex.source,
inputLength: input.length,
...metadata
});
},
exec: function(input) {
return monitor.monitorExecution(() => originalRegex.exec(input), {
method: 'exec',
pattern: originalRegex.source,
inputLength: input.length,
...metadata
});
},
match: function(input) {
return monitor.monitorExecution(() => input.match(originalRegex), {
method: 'match',
pattern: originalRegex.source,
inputLength: input.length,
...metadata
});
},
replace: function(input, replacement) {
return monitor.monitorExecution(() => input.replace(originalRegex, replacement), {
method: 'replace',
pattern: originalRegex.source,
inputLength: input.length,
...metadata
});
},
// Expose original properties
source: originalRegex.source,
flags: originalRegex.flags,
global: originalRegex.global,
ignoreCase: originalRegex.ignoreCase,
multiline: originalRegex.multiline
};
return instrumentedRegex;
}
monitorExecution(operation, metadata) {
const startTime = process.hrtime.bigint();
const executionId = this.generateExecutionId();
let result, error;
try {
result = operation();
} catch (e) {
error = e;
}
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1000000; // Convert to milliseconds
const executionRecord = {
id: executionId,
timestamp: Date.now(),
duration,
error: error ? error.message : null,
...metadata
};
this.recordExecution(executionRecord);
if (error) {
throw error;
}
return result;
}
recordExecution(record) {
// Add to history
this.executionHistory.push(record);
// Maintain history size
if (this.executionHistory.length > this.options.maxHistorySize) {
this.executionHistory = this.executionHistory.slice(-this.options.maxHistorySize);
}
// Update pattern statistics
this.updatePatternStats(record);
// Check for alerts
this.checkForAlerts(record);
}
updatePatternStats(record) {
const patternKey = record.pattern;
if (!this.patterns.has(patternKey)) {
this.patterns.set(patternKey, {
executions: 0,
totalTime: 0,
maxTime: 0,
minTime: Infinity,
errors: 0,
averageInputLength: 0,
totalInputLength: 0,
firstSeen: Date.now(),
lastSeen: Date.now()
});
}
const stats = this.patterns.get(patternKey);
stats.executions++;
stats.totalTime += record.duration;
stats.maxTime = Math.max(stats.maxTime, record.duration);
stats.minTime = Math.min(stats.minTime, record.duration);
stats.totalInputLength += record.inputLength || 0;
stats.averageInputLength = stats.totalInputLength / stats.executions;
stats.lastSeen = Date.now();
if (record.error) {
stats.errors++;
}
this.patterns.set(patternKey, stats);
}
checkForAlerts(record) {
const alerts = [];
// Slow execution alert
if (record.duration > this.options.alertThreshold) {
alerts.push({
type: 'SLOW_EXECUTION',
severity: record.duration > 2000 ? 'CRITICAL' : 'HIGH',
message: `Regex execution took ${record.duration.toFixed(2)}ms`,
pattern: record.pattern,
metadata: record
});
}
// Error alert
if (record.error) {
alerts.push({
type: 'EXECUTION_ERROR',
severity: 'HIGH',
message: `Regex execution failed: ${record.error}`,
pattern: record.pattern,
metadata: record
});
}
// Pattern frequency alert (potential ReDoS attack)
const recentExecutions = this.getRecentExecutions(record.pattern, 60000); // Last minute
if (recentExecutions.length > 100) {
const avgDuration = recentExecutions.reduce((sum, exec) => sum + exec.duration, 0) / recentExecutions.length;
if (avgDuration > 50) {
alerts.push({
type: 'POTENTIAL_REDOS_ATTACK',
severity: 'CRITICAL',
message: `High frequency of slow executions detected for pattern: ${recentExecutions.length} executions in 1 minute`,
pattern: record.pattern,
averageDuration: avgDuration,
metadata: record
});
}
}
// Process alerts
alerts.forEach(alert => this.handleAlert(alert));
}
handleAlert(alert) {
this.alerts.push(alert);
// Emit event or send notification
console.warn(`[REGEX ALERT] ${alert.severity}: ${alert.message}`);
// Keep alert history manageable
if (this.alerts.length > 1000) {
this.alerts = this.alerts.slice(-1000);
}
}
getRecentExecutions(pattern, timeWindowMs) {
const cutoff = Date.now() - timeWindowMs;
return this.executionHistory.filter(exec =>
exec.pattern === pattern && exec.timestamp > cutoff
);
}
generateExecutionId() {
return `exec_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
getPerformanceReport(timeRangeMs = 3600000) { // Default: last hour
const cutoff = Date.now() - timeRangeMs;
const recentExecutions = this.executionHistory.filter(exec => exec.timestamp > cutoff);
if (recentExecutions.length === 0) {
return { message: 'No executions in the specified time range' };
}
// Overall statistics
const totalExecutions = recentExecutions.length;
const totalDuration = recentExecutions.reduce((sum, exec) => sum + exec.duration, 0);
const averageDuration = totalDuration / totalExecutions;
const maxDuration = Math.max(...recentExecutions.map(exec => exec.duration));
const errorCount = recentExecutions.filter(exec => exec.error).length;
// Performance distribution
const durations = recentExecutions.map(exec => exec.duration);
durations.sort((a, b) => a - b);
const p50 = durations[Math.floor(durations.length * 0.5)];
const p95 = durations[Math.floor(durations.length * 0.95)];
const p99 = durations[Math.floor(durations.length * 0.99)];
// Top patterns by execution count
const patternCounts = {};
recentExecutions.forEach(exec => {
patternCounts[exec.pattern] = (patternCounts[exec.pattern] || 0) + 1;
});
const topPatterns = Object.entries(patternCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([pattern, count]) => ({
pattern: pattern.length > 50 ? pattern.substring(0, 50) + '...' : pattern,
executions: count,
percentage: ((count / totalExecutions) * 100).toFixed(1)
}));
// Slowest patterns
const patternPerformance = {};
recentExecutions.forEach(exec => {
if (!patternPerformance[exec.pattern]) {
patternPerformance[exec.pattern] = { durations: [], errors: 0 };
}
patternPerformance[exec.pattern].durations.push(exec.duration);
if (exec.error) patternPerformance[exec.pattern].errors++;
});
const slowestPatterns = Object.entries(patternPerformance)
.map(([pattern, data]) => ({
pattern: pattern.length > 50 ? pattern.substring(0, 50) + '...' : pattern,
averageDuration: (data.durations.reduce((a, b) => a + b) / data.durations.length).toFixed(2),
maxDuration: Math.max(...data.durations).toFixed(2),
executions: data.durations.length,
errors: data.errors
}))
.sort((a, b) => parseFloat(b.averageDuration) - parseFloat(a.averageDuration))
.slice(0, 10);
return {
timeRange: `${timeRangeMs / 60000} minutes`,
summary: {
totalExecutions,
averageDuration: averageDuration.toFixed(2),
maxDuration: maxDuration.toFixed(2),
errorRate: ((errorCount / totalExecutions) * 100).toFixed(2),
performancePercentiles: {
p50: p50.toFixed(2),
p95: p95.toFixed(2),
p99: p99.toFixed(2)
}
},
topPatterns,
slowestPatterns,
alerts: this.getRecentAlerts(timeRangeMs)
};
}
getRecentAlerts(timeRangeMs) {
const cutoff = Date.now() - timeRangeMs;
return this.alerts
.filter(alert => alert.timestamp > cutoff)
.sort((a, b) => b.timestamp - a.timestamp)
.slice(0, 20);
}
// Health check for regex performance
getHealthStatus() {
const recentExecutions = this.getRecentExecutions(null, 300000); // Last 5 minutes
if (recentExecutions.length === 0) {
return { status: 'UNKNOWN', message: 'No recent regex executions' };
}
const avgDuration = recentExecutions.reduce((sum, exec) => sum + exec.duration, 0) / recentExecutions.length;
const errorRate = recentExecutions.filter(exec => exec.error).length / recentExecutions.length;
const slowExecutions = recentExecutions.filter(exec => exec.duration > this.options.alertThreshold).length;
let status = 'HEALTHY';
let issues = [];
if (errorRate > 0.05) { // More than 5% errors
status = 'UNHEALTHY';
issues.push(`High error rate: ${(errorRate * 100).toFixed(1)}%`);
}
if (avgDuration > this.options.maxExecutionTime) {
status = status === 'HEALTHY' ? 'DEGRADED' : status;
issues.push(`Average execution time high: ${avgDuration.toFixed(2)}ms`);
}
if (slowExecutions > recentExecutions.length * 0.1) { // More than 10% slow
status = status === 'HEALTHY' ? 'DEGRADED' : status;
issues.push(`${slowExecutions} slow executions detected`);
}
return {
status,
issues,
metrics: {
averageDuration: avgDuration.toFixed(2),
errorRate: (errorRate * 100).toFixed(2),
totalExecutions: recentExecutions.length
}
};
}
}
Best Practices and Guidelines
Comprehensive Best Practices
class RegexBestPractices {
static getGuidelines() {
return {
performance: {
title: 'Performance Guidelines',
rules: [
{
rule: 'Avoid nested quantifiers',
example: {
bad: '(a+)+',
good: 'a+',
explanation: 'Nested quantifiers create exponential time complexity'
}
},
{
rule: 'Use atomic groups when possible',
example: {
bad: '(\\w+)*',
good: '(?>\\w+)*',
explanation: 'Atomic groups prevent backtracking'
}
},
{
rule: 'Anchor patterns when appropriate',
example: {
bad: 'test.*',
good: '^test.*$',
explanation: 'Anchors reduce search space'
}
},
{
rule: 'Be specific with character classes',
example: {
bad: '.*@.*',
good: '[\\w._%+-]+@[\\w.-]+',
explanation: 'Specific classes are faster than wildcards'
}
}
]
},
security: {
title: 'Security Guidelines',
rules: [
{
rule: 'Implement timeout protection',
example: {
code: `
const timeoutMs = 1000;
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Regex timeout')), timeoutMs)
);
const regexPromise = new Promise((resolve) => resolve(regex.test(input)));
const result = await Promise.race([regexPromise, timeoutPromise]);
`,
explanation: 'Always protect against ReDoS with timeouts'
}
},
{
rule: 'Validate input length',
example: {
code: `
const MAX_INPUT_LENGTH = 10000;
if (input.length > MAX_INPUT_LENGTH) {
throw new Error('Input too long');
}
`,
explanation: 'Limit input size to prevent DoS attacks'
}
},
{
rule: 'Use whitelist approach',
example: {
bad: '/[^