my( $pattern )= @_;
my $error;
my %helper= (
'\w' => '[_[:alnum:]]',
'\W' => '[^_[:alnum:]]',
'\s' => '[[:space:]]',
'\S' => '[^[:space:]]',
'\d' => '[0-9]',
'\D' => '[^0-9]',
'\n' => "\n",
'\t' => "\t",
'\b' => '[[:<:]]',
'\B' => '[[:>:]]',
'\A' => '^',
'\z' => '$',
'\Z' => "\n?\$",
'^' => "(^|\n)",
'$' => "(\n|\$)",
);
my $prev= '';
my @replace;
my @mult= ( 0 );
while( $pattern =~ m< # Tokenize the potential regex
\G # Don't let it skip bits
( # Return what we find
\\x.. # A hexadecimal character
| \\[012][0-7]{0,2} # An octal character
| \\. # A boring escaped character
| [^\[\{] # Not '[' nor '{' so treat as a token
+ #}
| \{(?=\D) # Literal but unescaped '{'
+ #}
| \{\d+,?\d*} # Bounded repetitions
| \[ # '[' starts a character class
\^? # '^' can go in front of the literal
+']'
\]? # ']' at start is taken literally
(?: # Inside, there can be some subtok
+ens
[^\]] # Not '[' so isn't a subtoken
| \[(?=[^.=:]) # '[' but not '[.', '[=', nor '[
+:'
| \[ # Must be '[' of '[.', '[=', or
+'[:'
[^\]]* # Anything but the closing ']'
\] # ']' closes out subtoken
)* # Any number of subtokens
\] # ']' closes out the character class
| (.) # Found something invalid (sets $2)
)
>xsg
) {
my $token= $1;
if( defined $2 ) {
$error= "Your regex appears to be invalid."
. " I understood '"
. $q->escapeHTML(
substr( $pattern, 0, pos($pattern)-1 )
) . "' but not '"
. $q->escapeHTML(
substr( $pattern, pos($pattern)-1 )
) . "'.";
last;
}
if( ')' eq $prev ) {
if( 0 < $mult[-1]
and $token =~ /^[+*]/
|| $token =~ /^{./ #}
) {
$error= "We don't allow a () that contains"
. " +, *, or {} to have +, *, or {} applied to it"
. " (to prevent excess server load).";
last;
}
my $mult= pop @mult;
$mult[-1] += $mult;
}
if( '(' eq $token ) {
push @mult, 0;
} elsif( $token =~ /^[+*]/ || $token =~ /^{./ ) { #}
$mult[-1]++;
}
if( exists $helper{$token} ) {
push @replace, pos($pattern), length($token), $helper{$tok
+en};
} elsif( $token =~ /^\\x([0-9a-fA-F][0-9a-fA-F])\z/ ) {
push @replace, pos($pattern), length($token), chr(hex($1))
+;
} elsif( $token =~ /^\\([012][0-7]{0,2})\z/ ) {
push @replace, pos($pattern), length($token), chr(oct($1))
+;
} elsif( $token =~ /^\\\w\z/ ) {
$error= "Illegal escape ($token) in regex.";
last;
}
$prev= $token;
}
if( 6 < $mult[0] ) {
$error= "Your regex had $mult[0] instances of +, *, or {}."
. " We don't allow more than 6 instances"
. " (to prevent excess server load).";
}
if( $error ) {
return "\\9$error";
}
while( @replace ) {
my( $pos, $len, $str )= splice @replace, -3;
substr( $pattern, $pos-$len, $len )= $str;
}
return $pattern;
|