Re^2: Find out which subpattern matched in regex

in reply to Re: Find out which subpattern matched in regex
in thread Find out which subpattern matched in regex

Thanks for you comment.

I did a quick benchmark and it turns out that nested loop solution is the fastest in most cases. In all other cases - I believe it's when regular expressions converge into a nice tree-like structure - both named captures and unnamed captures are about the same.

Here's my code sample:

#!/usr/bin/perl -w

use 5.010;
use strict;
use warnings;
use Test::More tests => 3;

use Benchmark qw(cmpthese);

my @reglist = ( qr/food?/, qr/b[a4]rd?/, qr/baz(?:o+ka)?/, 100..999);
my @lines = (qw(foobarbaz b4rd perl bazooooka football));

my @expect = ("r0", "r1", "", "r2", "r0");

is_deeply(which_reg_loop(\@reglist, \@lines), \@expect, "which_reg_loo
+p")
    and
is_deeply(which_reg_capt(\@reglist, \@lines), \@expect, "which_reg_cap
+t")
    and
is_deeply(which_reg_named(\@reglist, \@lines), \@expect, "which_reg_na
+med")
    or die "Results differ, no bench";

@lines = @lines x 1000;

cmpthese ( -1, {
    loop => sub {
        which_reg_loop(\@reglist, \@lines);
    },
    capt => sub {
        which_reg_capt(\@reglist, \@lines);
    },
    named => sub {
        which_reg_named(\@reglist, \@lines);
    },
});

sub which_reg_loop {
    my ($reglist, $lines) = @_;

    my @ret;
    LINE: foreach my $str (@$lines) {
        for (my $i = 0; $i < @$reglist; $i++) {
            $str =~ $reglist->[$i] or next;
            push @ret, "r$i";
            next LINE;
        };
        push @ret, '';
    };
    return \@ret;
};

sub which_reg_capt {
    my ($reglist, $lines) = @_;

    my $giant = join "|", map { "($_)" } @$reglist;
    $giant = qr($giant);

    my @ret;
    LINE: foreach (@$lines) {
        my @hits = $_ =~ $giant;
        for (my $i = 0; $i < @hits; $i++) {
            $hits[$i] or next;
            push @ret, "r$i";
            next LINE;
        };
        push @ret, '';
    };
    return \@ret;
};

sub which_reg_named {
    my ($reglist, $lines) = @_;
    my $giant = join "|", map { "(?<r$_>$reglist->[$_])" } 0..$#$regli
+st;
    $giant = qr($giant);

    my @ret = map { $_ =~ $giant ? (keys %+) : '' } @$lines;
    return \@ret;
};
[download]

In Section Seekers of Perl Wisdom