Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change
 
PerlMonks  

Re: Slow Regex - How to Optimize

by GrandFather (Sage)
on Aug 30, 2005 at 23:32 UTC ( #487981=note: print w/ replies, xml ) Need Help??


in reply to Slow Regex - How to Optimize

Benchmark results for the curious

Rate Original dave_the_m rnahi borisz ikegami + anon Original 17.7/s -- -97% -99% -99% -100% + -100% dave_the_m 603/s 3313% -- -76% -82% -87% + -87% rnahi 2502/s 14054% 315% -- -26% -46% + -47% borisz 3392/s 19088% 462% 36% -- -27% + -28% ikegami 4642/s 26158% 669% 86% 37% -- + -2% anon 4736/s 26693% 685% 89% 40% 2% + --
use strict; use warnings; use Benchmark 'cmpthese'; my @sub_code = <DATA>; my %SUBS = ( sqrt => 1, CloseHandle => 2, addch => 2, Beep => 2, FillConsoleOutputCharacter => 2, GetConsoleScreenBufferInfo => 2, GetLargestConsoleWindowSize => 2, GetStdHandle => 2, move => 2, ReadConsoleInput => 2, SetConsoleCursorPosition => 2, SetConsoleMode => 2, SetConsoleTextAttribute => 2, SetConsoleTitle => 2, WriteConsole => 2 ); cmpthese( -1, { 'Original' => 'original ()', 'dave_the_m' => 'dave_the_m ()', 'rnahi' => 'rnahi ()', 'borisz' => 'borisz ()', 'ikegami' => 'ikegami ()', 'anon' => 'anon ()', } ); sub anon { my @subs; foreach my $line ( @sub_code ) { if ($line =~ /[^a-zA-Z]([a-zA-Z_]+[a-zA-Z_0-9]*)[^a-zA-Z]*\(/ and exists $SUBS{$1}) { push @subs, $1 } } } sub ikegami { my @subs; foreach my $line ( @sub_code ) { if ( $line =~ /(\w+)\(/ ) { if ( exists $SUBS{$1} ) { push( @subs, $1 ); } } } } sub original { my @subs; foreach my $line ( @sub_code ) { foreach my $sub ( keys %SUBS ) { push @subs, $sub if $line =~ /[^a-zA-Z]$sub[^a-zA-Z]*\(/; } } } sub dave_the_m { my @subs; foreach my $sub ( keys %SUBS ) { my $re = qr/[^a-zA-Z]$sub[^a-zA-Z]*\(/; foreach my $line ( @sub_code ) { if ( $line =~ $re ) { push( @subs, $sub) } } } } sub rnahi { my @subs; my $code = join("", @sub_code); foreach my $sub ( keys %SUBS ) { while ( $code =~ /\b$sub\b\(/g ) { push( @subs, $sub ) ; } } } sub borisz { my @subs; my $str = join '|', sort { length $b <=> length $a } keys %SUBS; my $re = qr/[^a-zA-Z]($str)[^a-zA-Z]*\(/; /$re/ and push @subs, $1 for ( @sub_code ); } __DATA__ /* Dumb curses implementation for Win32 port */ #include "curses.h" static HANDLE _conin; static HANDLE _conout; void wclear( Window * w ) { COORD sz, xy = { 0, 0 }; DWORD wrote; sz = GetLargestConsoleWindowSize( _conout ); FillConsoleOutputCharacter( _conout, ' ', sz.X * sz.Y, xy, &wrote +); } void attron( unsigned short attr ) { SetConsoleTextAttribute( _conout, attr ); } void attroff( unsigned short attr ) { SetConsoleTextAttribute( _conout, FOREGROUND_RED | FOREGROUND_GREE +N | FOREGROUND_BLUE ); } void move( int y, int x ) { COORD xy = { x, y }; SetConsoleCursorPosition( _conout, xy ); } void mvaddstr( int y, int x, const char * str ) { DWORD wrote; move( y,x ); WriteConsole( _conout, str, strlen( str ), &wrote, 0 ); } void addch( int ch ) { DWORD wrote; WriteConsole( _conout, &ch, 1, &wrote, 0 ); } void mvaddch( int y, int x, int ch ) { move( y, x ); addch( ch ); } void wclrtoeol( Window * w ) { CONSOLE_SCREEN_BUFFER_INFO info; DWORD wrote; GetConsoleScreenBufferInfo( _conout, &info ); FillConsoleOutputCharacter( _conout, ' ', info.dwSize.X - info.dwC +ursorPosition.X, info.dwCursorPosition, &wrote ); } void wrefresh( Window * w ) { /* Do nothing - all changes are immediate */ } int wgetch( Window * w ) { INPUT_RECORD in; DWORD got; while( 1 ) { do ReadConsoleInput( _conin, &in, 1, &got ); while( KEY_EVENT != in.EventType || 0 == in.Event.KeyEvent.bKe +yDown ); /* Translate direction keys into vi(1) motion */ switch( in.Event.KeyEvent.wVirtualKeyCode ) { case VK_LEFT: return 'h'; case VK_RIGHT: return 'l'; case VK_UP: return 'k'; case VK_DOWN: return 'j'; /* Ignore standard modifier keys */ case VK_SHIFT: case VK_CONTROL: case VK_MENU: continue; } return in.Event.KeyEvent.uChar.AsciiChar; } } void beep( void ) { Beep( 650, 250 ); } void noecho( void ) { /* Do nothing */ } void raw( void ) { SetConsoleMode( _conin, ENABLE_PROCESSED_INPUT ); } int initscr( void ) { _conin = GetStdHandle( STD_INPUT_HANDLE ); _conout = GetStdHandle( STD_OUTPUT_HANDLE ); SetConsoleTitle( "Sudoku" ); return 1; } void endwin( void ) { CloseHandle( _conin ); CloseHandle( _conout ); }

Perl is Huffman encoded by design.


Comment on Re: Slow Regex - How to Optimize
Select or Download Code
Replies are listed 'Best First'.
Re^2: Slow Regex - How to Optimize
by ikegami (Pope) on Aug 31, 2005 at 04:40 UTC

    Two notes:

    1) borisz's solution should be even faster on Perl 5.10 with due to a new regexp engine optimization.

    2) My solution and anonymonk's assume that %SUBS keys aren't regexps.

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://487981]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others lurking in the Monastery: (5)
As of 2015-08-01 03:51 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (285 votes), past polls