$BIG_REGEX = " ... "; sub do_something { # The expression is "fixed" into place on first use. # !! Its a constant. if ( $sexpert =~ m/$BIG_REGEX/o) { ... } } do_something(); # the expression is compiled and fixed into place do_something(); # the compiled expression is directly re-used #### $HUGE_REGEX = " ... "; sub do_it { # The expression is recompiled whenever $HUGE_REGEX changes if ( $sexpert =~ m/$HUGE_REGEX/) { ... } } do_it(); # the expression is compiled do_it(); # the string didn't change so the compiled regex is re-used $HUGE_REGEX = " !!! "; do_it(); # the string changed so the regex is recompiled. do_it(); # the new regex re-used. #### # I need a better way to phrase this. Ideas? $BIGGER_REGEX = qr/ ... /; # The contents of qr// are a constant expression so are compiled at compile-time. sub do_it_again { if ( $sexpert =~ $BIGGER_REGEX ) { ... } } do_it_again(); # the pre-compiled regex is used directly. do_it_again(); # ditto # This qr// was compiled a compile-time. Store a copy in $BIGGER_REGEX. No recompilation # occurs now. $BIGGER_REGEX = qr/ !!! /; do_it_again(); # Like before, a pre-compiled regex is used directly. Nothing special happens. #### $data =~ m/[spectal]{9}/; # Match against $data # match(/"[spectal]{9}"/) #### $re = "[spectal]{9}"; $data =~ $re; # match() # regcomp() # Compile the expression # regcreset $data =~ m/$re/; # match() # regcomp() # Compile the expression # regcreset #### $qr = qr/[spectal]{9}/; # This was compiled during BEGIN{} and has no runtime effect. $data =~ $qr; # match() # regcomp() # Re-use the expression # regcreset $data =~ m/$qr/; # match() # regcomp() # Re-use the expression # regcreset #### $re = '[spectal]{9}'; $qr = qr/$re/; # Compile the expression # qr() # regcomp() # Compile the expression # regcreset $data =~ $qr; # match() # regcomp() # Re-use the expression # regcreset $data =~ m/$qr/; # match() # regcomp() # Re-use the expression # regcreset #### $qr_a = qr/\w/; # Pre-compiled during BEGIN {} $qr_b = qr/\d/; # Pre-compiled during BEGIN {} # Stringify both $qr_a and $qr_b then compile new regex. The previous pre-compilation is not used $data =~ m/$qr_a$qr_b/; # match() # regcomp() # Compile the expression # regcreset $re_a = "\\w"; $re_b = "\\d"; $data =~ m/$re_a$re_b/; # match() # regcomp() # Compile the expression # regcreset #### $re = '[spectal]{9}'; $data =~ m/$re/o; # match() # regcomp() # Compile the expression and remove this step # regcreset #### $re = '[spectal]{9}'; $qr = qr/$re/o; # qr() # regcomp() # Compile the expression and remove this step # regcreset $data =~ m/$re/; # qr() # regcomp() # Re-use the expression # regcreset #### $qr = ... $data =~ m/$qr/o; # match() # regcomp() # Associate the precompiled expression and remove this step # regcreset #### # This is a convenient source of data to match against. (Plato's _The_Republic_) $data = q[I WENT down yesterday to the Piraeus with Glaucon the son of Ariston, that I might offer up my prayers to the goddess; and also because I wanted to see in what manner they would celebrate the festival, which was a new thing. I was delighted with the procession of the inhabitants; but that of the Thracians was equally, if not more, beautiful. When we had finished our prayers and viewed the spectacle, we turned in the direction of the city; and at that instant Polemarchus the son of Cephalus chanced to catch sight of us from a distance as we were starting on our way home, and told his servant to run and bid us wait for him. The servant took hold of me by the cloak behind, and said: Polemarchus desires you to wait.]; # Both of these constructs produce the same structure. Any benchmarking # differences between these should be attributed to system noise. qr() merely # copies the already-compiled expression into the target. Perl has already compiled # the regular expression during the initial parsing so this qr[] can be reused # and is a very fast assignment. sub qr_0_a { $qr = qr[[spectal]{9}]; 1 } sub qr_0_b { $qr = qr[[spectal]{9}]o; 1 } cmpthese( 0, { qr_0_a => \&qr_0_a, qr_0_b => \&qr_0_b } ); # Benchmark: running qr_0_a, qr_0_b, each for at least 3 CPU seconds... # qr_0_a: 4 wallclock secs ( 3.11 usr + 0.00 sys = 3.11 CPU) @ 14585.21/s (n=45360) # qr_0_b: 4 wallclock secs ( 3.10 usr + 0.00 sys = 3.10 CPU) @ 14631.94/s (n=45359) # Rate qr_0_a qr_0_b # qr_0_a 14585/s -- -0% # qr_0_b 14632/s 0% -- # This demonstrates that in both cases it is a simple assignment. *No* compilation # occurs and the /o flag is completely useless here. sub qr_1_a { $qr = qr[[spectal]{9}]; $data =~ $qr; 1 } sub qr_1_b { $qr = qr[[spectal]{9}]o; $data =~ $qr; 1 } cmpthese( 0, { qr_1_a => \&qr_1_a, qr_1_b => \&qr_1_b } ); # Benchmark: running qr_1_a, qr_1_b, each for at least 3 CPU seconds... # qr_1_a: 3 wallclock secs ( 3.20 usr + 0.00 sys = 3.20 CPU) @ 2692.50/s (n=8616 # qr_1_b: 4 wallclock secs ( 3.21 usr + 0.00 sys = 3.21 CPU) @ 2684.11/s (n=8616 # Rate qr_1_b qr_1_a # qr_1_b 2684/s -- -0% # qr_1_a 2692/s 0% -- # Again, no difference that is not attributable to system noise. sub qr_1_c { $qr = qr[[spectal]{9}]; $data =~ m/$qr/o; 1 } sub qr_1_d { $qr = qr[[spectal]{9}]o; $data =~ m/$qr/o; 1 } cmpthese( 0, { qr_1_a => \&qr_1_a, qr_1_b => \&qr_1_b, qr_1_c => \&qr_1_c, qr_1_d => \&qr_1_d } ); # Benchmark: running qr_1_a, qr_1_b, qr_1_c, qr_1_d, each for at least 3 CPU seconds... # qr_1_a: 3 wallclock secs ( 3.12 usr + 0.00 sys = 3.12 CPU) @ 2702.56/s (n=8432) # qr_1_b: 2 wallclock secs ( 3.13 usr + 0.00 sys = 3.13 CPU) @ 2693.93/s (n=8432) # qr_1_c: 5 wallclock secs ( 3.21 usr + 0.00 sys = 3.21 CPU) @ 2747.35/s (n=8819) # qr_1_d: 2 wallclock secs ( 3.21 usr + 0.00 sys = 3.21 CPU) @ 2744.55/s (n=8810) # Rate qr_1_b qr_1_a qr_1_d qr_1_c # qr_1_b 2694/s -- -0% -2% -2% # qr_1_a 2703/s 0% -- -2% -2% # qr_1_d 2745/s 2% 2% -- -0% # qr_1_c 2747/s 2% 2% 0% -- # This shows the very slight difference between c/d and a/b. In a/b # the regcomp on the match() is a very fast operation. In c/d every match() # except the first has no regcomp() operation. # Appending /o on a qr// expression has the same effect as on a m// # expression. In the following code the qr// expression only compiles once # and all repetitions after the first go-round merely copy the Regex object. # So the effect is only relevant if you are interpolating something into your # regular expression. As always, compilation only occurs the first time and then # /o prevents the compilation from occuring again. Without /o the expression would # be properly recompiled each time 'round. The difference is between having $qro # reflect the currently interpolated $_ variable and having it permenantly fixed as # '9' which is the first value to be used. I don't know of any circumstance when this # is the required behaviour so I'd categorize any use of /o with qr// as a bug. # Count backwards from 9 for(qw( 9 8 7 6 5 4 3 2 1 0 )) { $qro = qr[[spectal]{$_}]o; } if ($qro ne "(?-xism:[spectal]{9})") { die; } # $qro is equal to (?-xism:[spectal]{9,9}) which demonstrates that the # compilation step was removed after its one-time-only execution. # All three of these "variations" produce the same data and follow the same # execution path. Any speed differences should be attributed to system noise # as in a very real sense - they are 100% identical. Since $qr and $qro contain # precompiled regular expressions is is *nearly* as efficient as a plain match like # $data =~ /[spectal]{9}/. The overhead is the regcreset() and regcomp() operations. # Since $qr and $qro already contain compiled regular expressions regcomp() skips # all the compilation and returns very quickly. This is a low-overhead operation. $qr = qr[[spectal]{9}]; $qro = qr[[spectal]{9}]o; sub qr_2_a { $data =~ $qr } sub qr_2_b { $data =~ $qro } sub qr_2_c { $data =~ /$qr/ } sub qr_2_d { $data =~ /$qro/ } cmpthese( 0, { qr_2_a => \&qr_2_a, qr_2_b => \&qr_2_b, qr_2_c => \&qr_2_c, qr_2_d => \&qr_2_d } ); # Benchmark: running qr_2_a, qr_2_b, qr_2_c, qr_2_d, each for at least 3 CPU seconds... # qr_2_a: 4 wallclock secs ( 3.17 usr + 0.00 sys = 3.17 CPU) @ 3850.47/s (n=12206) # qr_2_b: 2 wallclock secs ( 3.07 usr + 0.00 sys = 3.07 CPU) @ 3812.70/s (n=11705) # qr_2_c: 3 wallclock secs ( 3.15 usr + 0.00 sys = 3.15 CPU) @ 3836.51/s (n=12085) # qr_2_d: 3 wallclock secs ( 3.17 usr + 0.00 sys = 3.17 CPU) @ 3812.30/s (n=12085) # Rate qr_2_d qr_2_b qr_2_c qr_2_a # qr_2_d 3812/s -- -0% -1% -1% # qr_2_b 3813/s 0% -- -1% -1% # qr_2_c 3837/s 1% 1% -- -0% # qr_2_a 3850/s 1% 1% 0% -- # A small difference but its just system noise. There is no actual difference here. # All of these "variations" produce the same data and follow the same # execution path. Any speed differences should be attributed to system noise # as in a very real sense - they are 100% identical. Since $qr and $qro contain # precompiled regular expressions is is *nearly* as efficient as a plain match like # $data =~ /[spectal]{9}/. The overhead is the regcreset() and regcomp() operations. # Since $qr and $qro already contain compiled regular expressions regcomp() skips # all the compilation and returns very quickly. This is a low-overhead operation. sub qr_2_e { $data =~ /$qr/o } sub qr_2_f { $data =~ /$qro/o } cmpthese( 0, { qr_2_a => \&qr_2_a, qr_2_b => \&qr_2_b, qr_2_c => \&qr_2_c, qr_2_d => \&qr_2_d, qr_2_e => \&qr_2_e, qr_2_f => \&qr_2_f } ); # Benchmark: running qr_2_a, qr_2_b, qr_2_c, qr_2_d, qr_2_e, qr_2_f, each for at least 3 CPU seconds... # qr_2_a: 4 wallclock secs ( 3.26 usr + 0.00 sys = 3.26 CPU) @ 3864.72/s (n=12599) # qr_2_b: 5 wallclock secs ( 3.05 usr + 0.00 sys = 3.05 CPU) @ 3880.33/s (n=11835) # qr_2_c: 3 wallclock secs ( 3.11 usr + 0.00 sys = 3.11 CPU) @ 3858.52/s (n=12000) # qr_2_d: 3 wallclock secs ( 3.11 usr + 0.00 sys = 3.11 CPU) @ 3858.52/s (n=12000) # qr_2_e: 4 wallclock secs ( 3.09 usr + 0.00 sys = 3.09 CPU) @ 3950.16/s (n=12206) # qr_2_f: 5 wallclock secs ( 3.09 usr + 0.00 sys = 3.09 CPU) @ 3911.00/s (n=12085) # Rate qr_2_d qr_2_c qr_2_a qr_2_b qr_2_f qr_2_e # qr_2_d 3859/s -- -0% -0% -1% -1% -2% # qr_2_c 3859/s 0% -- -0% -1% -1% -2% # qr_2_a 3865/s 0% 0% -- -0% -1% -2% # qr_2_b 3880/s 1% 1% 0% -- -1% -2% # qr_2_f 3911/s 1% 1% 1% 1% -- -1% # qr_2_e 3950/s 2% 2% 2% 2% 1% -- # e/f are in reality ever so slightly faster than a/b/c/d which are all equivalent. # This just shows that the speed gain is small enough to disappear right into # system noise. # If the /o flag is added onto /$qr/ then that expression becomes forever bound to # whatever value $qr contained at the time it was first executed. It turns out that /o # works by having the regcomp() operation remove itself from the executing program.