Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl-Sensitive Sunglasses
 
PerlMonks  

safe mysql regexes

by tye (Cardinal)
on Jan 28, 2003 at 20:18 UTC ( #230722=snippet: print w/ replies, xml ) Need Help??

Description:

I wrote this a while ago to allow users to enter regular expressions in Super Search. Unfortunately, the CPU consumption of even a simple regex in the context of Super Search is too much load so most users aren't allowed to use them.

But if you need to accept regular expressions from users for use with MySQL, then this code might be useful to you.

    my( $pattern )= @_;
    my $error;
    my %helper= (
        '\w' => '[_[:alnum:]]',
        '\W' => '[^_[:alnum:]]',
        '\s' => '[[:space:]]',
        '\S' => '[^[:space:]]',
        '\d' => '[0-9]',
        '\D' => '[^0-9]',
        '\n' => "\n",
        '\t' => "\t",
        '\b' => '[[:<:]]',
        '\B' => '[[:>:]]',
        '\A' => '^',
        '\z' => '$',
        '\Z' => "\n?\$",
        '^'  => "(^|\n)",
        '$'  => "(\n|\$)",
    );
    my $prev= '';
    my @replace;
    my @mult= ( 0 );
    while(  $pattern =~ m<      # Tokenize the potential regex
          \G                    # Don't let it skip bits
            (                   # Return what we find
              \\x..             # A hexadecimal character
            | \\[012][0-7]{0,2} # An octal character
            | \\.               # A boring escaped character
            | [^\[\{]           # Not '[' nor '{' so treat as a token 
+  #}
            | \{(?=\D)          # Literal but unescaped '{'           
+  #}
            | \{\d+,?\d*}       # Bounded repetitions
            | \[                # '[' starts a character class
                \^?             #  '^' can go in front of the literal 
+']'
                \]?             #   ']' at start is taken literally
                (?:             #     Inside, there can be some subtok
+ens
                  [^\]]         #       Not '[' so isn't a subtoken
                | \[(?=[^.=:])  #       '[' but not '[.', '[=', nor '[
+:'
                | \[            #       Must be '[' of '[.', '[=', or 
+'[:'
                    [^\]]*      #       Anything but the closing ']'
                  \]            #       ']' closes out subtoken
                )*              #     Any number of subtokens
              \]                # ']' closes out the character class
            | (.)               # Found something invalid (sets $2)
            )
        >xsg
    ) {
        my $token= $1;
        if(  defined $2  ) {
            $error= "Your regex appears to be invalid."
              . " I understood '"
              . $q->escapeHTML(
                  substr( $pattern, 0, pos($pattern)-1 )
                ) . "' but not '"
              . $q->escapeHTML(
                  substr( $pattern, pos($pattern)-1 )
                ) . "'.";
            last;
        }
        if(  ')' eq $prev  ) {
            if(    0 < $mult[-1]
              and      $token =~ /^[+*]/
                   ||  $token =~ /^{./                  #}
            ) {
                $error= "We don't allow a () that contains"
                    . " +, *, or {} to have +, *, or {} applied to it"
                    . " (to prevent excess server load).";
                last;
            }
            my $mult= pop @mult;
            $mult[-1] += $mult;
        }
        if(  '(' eq $token  ) {
            push @mult, 0;
        } elsif(  $token =~ /^[+*]/  ||  $token =~ /^{./  ) {       #}
            $mult[-1]++;
        }
        if(  exists $helper{$token}  ) {
            push @replace, pos($pattern), length($token), $helper{$tok
+en};
        } elsif(  $token =~ /^\\x([0-9a-fA-F][0-9a-fA-F])\z/  ) {
            push @replace, pos($pattern), length($token), chr(hex($1))
+;
        } elsif(  $token =~ /^\\([012][0-7]{0,2})\z/  ) {
            push @replace, pos($pattern), length($token), chr(oct($1))
+;
        } elsif(  $token =~ /^\\\w\z/  ) {
            $error= "Illegal escape ($token) in regex.";
            last;
        }
        $prev= $token;
    }
    if(  6 < $mult[0]  ) {
        $error= "Your regex had $mult[0] instances of +, *, or {}."
            . " We don't allow more than 6 instances"
            . " (to prevent excess server load).";
    }
    if(  $error  ) {
        return "\\9$error";
    }
    while(  @replace  ) {
        my( $pos, $len, $str )= splice @replace, -3;
        substr( $pattern, $pos-$len, $len )= $str;
    }
    return $pattern;
Comment on safe mysql regexes
Download Code
Re: safe mysql regexes
by zby (Vicar) on Feb 13, 2003 at 14:15 UTC
    I imagine that a library of functions for secure user input for diverse purposes might be usefull.

Back to Snippets Section

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: snippet [id://230722]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others pondering the Monastery: (9)
As of 2014-11-26 21:40 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    My preferred Perl binaries come from:














    Results (174 votes), past polls