(although using eval to convert the arrayref returned from parse into a closure like your tst could be interesting even if compile could end up longer than parse)
Ah, heck with it — compile turned out to be only slightly longer than search...
#!/usr/bin/perl
# -*- CPerl -*-
use strict;
use warnings;
# sparse range structure:
# array of arrayref/number
# single integers represented as number
# contigous ranges as arrayref [LOW,HIGH] inclusive
# given: string "A,B,C,X-Y,Z"
# return: sorted array [A, B, C, [X, Y], Z]
sub parse ($) {
my @elements = split /,/, shift;
foreach (@elements) {
s/\s+//g; # prune whitespace
next unless m/\d+-\d+/; # skip conversion if single integer
$_ = [split /-/, $_]; # convert ranges to arrayrefs
}
# sort range set
@elements = sort {(ref($a)?$a->[0]:$a) <=> (ref($b)?$b->[0]:$b)} @el
+ements;
# merge overlapping loose elements into preceding ranges
for (my $i = 0; $i < $#elements; $i++) {
next unless ref $elements[$i]; # skip single integers
while ($i+1 <= $#elements and $elements[$i+1] <= $elements[$i][1])
{ splice @elements, $i+1, 1 } # remove elements included in r
+ange
}
# coalesce contiguous integers into ranges
for (my $i = 0; $i < $#elements; $i++) {
next if ref $elements[$i]; # skip ranges
if ($elements[$i]+1 == $elements[$i+1]) {
my $j = 1+$i;
$j++ while !ref($elements[$j]) && $elements[$j]+1 == $elements[$
+j+1];
splice @elements, $i, 1+$j-$i, [$elements[$i], $elements[$j]];
}
}
# merge adjacent loose elements into succeeding ranges
for (my $i = 0; $i < $#elements; $i++) {
next if ref $elements[$i]; # skip ranges
next unless ref $elements[$i+1]; # but next element is a range
# There can be at most one such element, since contiguous integers
+ were
# coalesced into ranges above.
if ($elements[$i]+1 == $elements[$i+1][0])
{ splice @elements, $i, 2, [$elements[$i], $elements[$i+1][1]] }
}
# merge adjacent loose elements into preceding ranges
for (my $i = 0; $i < $#elements; $i++) {
next unless ref $elements[$i]; # skip single integers
next if ref $elements[$i+1]; # but next element is a single int
+eger
# There can be at most one such element, since contiguous integers
+ were
# coalesced into ranges above.
if ($elements[$i][1]+1 == $elements[$i+1])
{ splice @elements, $i, 2, [$elements[$i][0], $elements[$i+1]] }
}
# merge overlapping ranges
for (my $i = 0; $i < $#elements; $i++) {
next unless ref $elements[$i] and ref $elements[$i+1];
splice @elements, $i, 2, [$elements[$i][0], $elements[$i+1][1]]
if $elements[$i][1] >= $elements[$i+1][0];
}
# merge adjacent ranges
for (my $i = 0; $i < $#elements; $i++) {
next unless ref $elements[$i] and ref $elements[$i+1];
splice @elements, $i, 2, [$elements[$i][0], $elements[$i+1][1]]
if $elements[$i][1]+1 == $elements[$i+1][0];
}
return \@elements;
}
# given: sorted array from sub parse and integer
# return true if the integer is in the sorted array
sub search ($$) {
my $set = shift;
my $num = shift;
my $left = 0; my $right = $#$set;
my $i = $#$set >> 1; # bitshift for integer /2
while ($left < $right) {
if (ref($set->[$i])) { # evaluate a range
return 1 # number within this range
if $num >= $set->[$i][0] && $num <= $set->[$i][1];
if ($num > $set->[$i][0]) { $left = $i+1 }
else { $right = $i-1 }
} else { # evaluate a single integer
return 1 # number matched
if $num == $set->[$i];
if ($num > $set->[$i]) { $left = $i+1 }
else { $right = $i-1 }
}
$i = ($left + $right) >> 1; # bitshift for integer /2
}
# last check
if (ref($set->[$i])) {
return $num >= $set->[$i][0] && $num <= $set->[$i][1]
} else {
return $num == $set->[$i]
}
}
# given: sorted array from sub parse
# return: closure accepting an integer and returning true iff it is in
+ the set
sub wrap ($) {
my $set = shift;
return sub { return search $set, shift };
}
# given: sorted array from sub parse
# return: closure same as from wrap but using unrolled binary search
sub compile ($) {
my $set = shift;
my $expand; $expand = sub {
my $mid = $#_ >> 1;
my $branchcond; my $matchcond;
if (ref $_[$mid]) { # pivot on range
$branchcond = "\$X > $_[$mid][0]";
$matchcond = "\$X >= $_[$mid][0] && \$X <= $_[$mid][1]";
} else { # pivot on single integer
$branchcond = "\$X > $_[$mid]";
$matchcond = "\$X == $_[$mid]";
}
my $fragment = " return $matchcond;\n";
if ($mid+1 <= $#_) {
$fragment =
(" return 1 if $matchcond;\n".
" if ($branchcond) {\n".
$expand->(@_[$mid+1 .. $#_]).
" }");
if ($mid-1 >= 0) {
$fragment .=
(" else {\n".
$expand->(@_[0 .. $mid-1]).
" }\n");
} else {
$fragment .= "\n";
}
}
return $fragment
};
my $tree = $expand->(@$set);
return eval <<CODE
sub {
my \$X = shift;
$tree
return !1; # false value for no match if end reached
}
CODE
}
my $Set = parse shift;
my $Test = wrap $Set;
my $Clos = compile $Set;
use Data::Dumper;
print Data::Dumper->new([$Set],[qw(Set)])->Indent(0)->Dump,"\n";
print Data::Dumper->new([$Clos], [qw(Closure)])->Deparse(1)->Dump;
sub sym ($) { $_[0] ? 'X' : ' ' }
printf "%4s | basic | wrapped | compile\n", '';
foreach (@ARGV) {
printf "%4d | %1s | %1s | %1s\n",
$_, sym(search $Set, $_), sym($Test->($_)), sym($Clos->($_));
}
Edit 2021-02-02 by jcb: Use B::Deparse with Data::Dumper to also output the compiled closure. (line 169 inserted)