I liked the problem and so I tried to come up with a solution to it. Because I didn't like the idea of having to re-invent the Perl RegEx parser completely, there are a number of limitations to my program:
And the code is, of course, not optimized, and I am not copletely sure whether it is completely bug-free. Any comments are welcome and I would also be highly interested in a Perlgolf version of this one :-). Well, here it is:
#!perl
use strict;
use warnings;
sub ParensMatch {
my @string = split //, shift;
my $num = 0;
for (@string) {
if ($_ eq '(') { ++$num }
elsif ($_ eq ')') { --$num; return undef if ($num < 0) }
}
($num == 0) ? return 'match' : return undef;
}
sub OrOutsideParens {
my @segments = split /\|/, shift;
return undef if (@segments == 1);
my $tot = '';
my $num = @segments;
for (@segments) {
return undef if (--$num == 0);
$tot .= $_;
return 'Yes' if (ParensMatch($tot));
}
}
sub Combinations {
return $_[0] if (@_ == 1);
return '' if (@_ < 1);
@_ = map { ($_ ne '' and $_ =~ /\|/) ? [split(/\|/, $_)] : [$_] } @
+_;
while (@_ > 1) {
my $second = pop;
my $first = pop;
my $tot = [];
for my $fval (@$first) {
for (@$second) {
push @$tot, $fval ? $_ ? "$fval$_" : $fval : $_ ? $_ : '';
}
}
push @_, $tot;
}
return join('|', @{$_[0]});
}
sub ParseRegex {
my $regex = shift;
if (defined $regex and $regex =~ /\|/ and OrOutsideParens($regex))
+{
my @snippets = split /\|/, $regex;
my $cur = '';
my @regsnipp;
for (@snippets) {
$cur .= '|' unless ($cur eq '');
$cur .= $_;
if (ParensMatch($cur)) {
push @regsnipp, ParseRegex($cur);
$cur = '';
}
}
die 'Unmatched | in RegEx' if ($cur ne '');
$regex = join '|', @regsnipp;
} elsif (defined $regex and $regex =~ /\((.*)\)((\{(\d+),(\d+)\})|\
+?)?/) {
my ($before, $after, $first, $second, $third, $fourth, $fifth) =
+ ($`, $', $1, $2, $3, $4, $5);
my $parsedRegex = ParseRegex($first);
if ($second) {
if ($third) {
$regex = Combinations(($parsedRegex) x $fourth);
$parsedRegex = join '|', map { Combinations($regex, ($pars
+edRegex) x $_) } (0..$fifth - $fourth);
} else {
$parsedRegex = "|$parsedRegex";
}
}
$regex = Combinations(ParseRegex($before), $parsedRegex, ParseRe
+gex($after));
} elsif (defined $regex and $regex =~ /\{(\d+),(\d+)\}/) {
my ($before, $after) = ($`, $');
($before, $after) = (ParseRegex($before), ParseRegex($after));
my $parsedMinimum = Combinations(($before) x $4);
$regex = Combinations(join '|', map { Combinations($parsedMinimu
+m, ($before) x $_) } (0..$2 - $1), $after);
} elsif (defined $regex and $regex =~ /(.)\?/) {
my ($before, $after, $first) = ($`, $', $1);
($before, $after) = (ParseRegex($before), ParseRegex($after));
$regex = Combinations($before, $after) . '|' . Combinations($bef
+ore, $first, $after);
}
return $regex;
}
sub getRegexStrings {
my $regex = shift;
my %seen = map { $_ => 1 } split(/\|/, ParseRegex($regex));
return join($/, sort keys %seen);
}
for my $regex (<DATA>) {
chomp $regex;
if (ParensMatch($regex)) {
print "$/RegEx: <$regex>$/";
print "Matching strings:$/$/" . getRegexStrings($regex) . $/ x 2
+;
} else {
print "$/Mismatched parens in RegEx <$regex>$/$/";
}
}
__DATA__
ab?(c|d){0,3}
abac?
(a|(b(c|d)))d?