in reply to Nonrepeating characters in an RE
Here are a couple of solutions that assume that in a string being tested for character repetition, the only characters that will be checked for repetition are those that appear in a "template". E.g., if the template is 'abc', the strings 'xxx' and 'xxxabcxxx' have no repetition, and the strings 'abca' and 'xaxax' do.
Hash solution (likely the fastest, but I haven't Benchmark-ed):
Win8 Strawberry 5.8.9.5 (32) Tue 08/16/2022 3:05:36
C:\@Work\Perl\monks
>perl
use strict;
use warnings;
use Test::More;
use Test::NoWarnings;
my @Tests = (
'NONE of these strings have repeats',
# repeats
# present
# template string true/false
[ '', '', '', ],
[ '', 'a', '', ],
[ '', 'aa', '', ],
[ '', 'aaa', '', ],
[ '', 'abc', '', ],
[ 'cba', '', '', ],
[ 'cba', 'a', '', ],
[ 'cba', 'abc', '', ],
[ 'cba', 'xxxxxx', '', ],
[ 'cba', 'xxxaxxx', '', ],
[ 'cba', 'xxxabcxxx', '', ],
[ 'cba', 'xbxxcxxax', '', ],
[ 'abcde', '', '', ],
[ 'abcde', 'a', '', ],
[ 'abcde', 'abc', '', ],
[ 'abcde', 'xxxxxx', '', ],
[ 'abcde', 'xxxaxxx', '', ],
[ 'abcde', 'xxxabcxxx', '', ],
[ 'abcde', 'xbxxcxxax', '', ],
[ 'abcdef', '', '', ],
[ 'abcdef', 'a', '', ],
[ 'abcdef', 'abc', '', ],
[ 'abcdef', 'xxxxxx', '', ],
[ 'abcdef', 'xxxaxxx', '', ],
[ 'abcdef', 'xxxabcxxx', '', ],
[ 'abcdef', 'xbxxcxxax', '', ],
[ 'abcdefa', '', '', ],
[ 'abcdefa', 'a', '', ],
[ 'abcdefa', 'abc', '', ],
[ 'abcdefa', 'xxxxxx', '', ],
[ 'abcdefa', 'xxxaxxx', '', ],
[ 'abcdefa', 'xxxabcxxx', '', ],
[ 'abcdefa', 'xbxxcxxax', '', ],
[ 'abacdaefa', '', '', ],
[ 'abacdaefa', 'a', '', ],
[ 'abacdaefa', 'abc', '', ],
[ 'abacdaefa', 'xxxxxx', '', ],
[ 'abacdaefa', 'xxxaxxx', '', ],
[ 'abacdaefa', 'xxxabcxxx', '', ],
[ 'abacdaefa', 'xbxxcxxax', '', ],
'ALL of these strings have repeats',
[ 'abc', 'aa', 1, ],
[ 'abc', 'aab', 1, ],
[ 'abc', 'aba', 1, ],
[ 'abc', 'baa', 1, ],
[ 'abcdefa', 'aa', 1, ],
[ 'abcdefa', 'aab', 1, ],
[ 'abcdefa', 'aba', 1, ],
[ 'abcdefa', 'baa', 1, ],
[ 'abacdaefa', 'aa', 1, ],
[ 'abacdaefa', 'aab', 1, ],
[ 'abacdaefa', 'aba', 1, ],
[ 'abacdaefa', 'baa', 1, ],
);
my @additional = qw(Test::NoWarnings); # these add 1 test
plan 'tests' => (scalar grep { ref eq 'ARRAY' } @Tests)
+ @additional;
VECTOR:
for my $ar_vector (@Tests) {
if (not ref $ar_vector) { # it's a comment
note $ar_vector;
next VECTOR;
}
my ($template, $string, $expected_repeats) = @$ar_vector;
my $got_repeats = repeats($template, $string);
is $got_repeats, $expected_repeats,
"'$template' -> '$string' " . ($expected_repeats ? '' : 'NO ')
+ . 'repeats'
;
} # end for VECTOR
{ # begin closure for repeats()
my $current_template;
my %template_char;
sub repeats {
my ($template,
$string,
) = @_;
if (! defined $current_template || $current_template ne $template)
+ {
%template_char = map { $_ => 1 } split '', $template;
}
my %seen;
++$seen{$_} for grep $template_char{$_}, split '', $string;
return 0 < grep { $_ > 1 } values %seen;
}
} # end closure for repeats()
^Z
1..53
# NONE of these strings have repeats
ok 1 - '' -> '' NO repeats
ok 2 - '' -> 'a' NO repeats
ok 3 - '' -> 'aa' NO repeats
ok 4 - '' -> 'aaa' NO repeats
ok 5 - '' -> 'abc' NO repeats
ok 6 - 'cba' -> '' NO repeats
ok 7 - 'cba' -> 'a' NO repeats
ok 8 - 'cba' -> 'abc' NO repeats
ok 9 - 'cba' -> 'xxxxxx' NO repeats
ok 10 - 'cba' -> 'xxxaxxx' NO repeats
ok 11 - 'cba' -> 'xxxabcxxx' NO repeats
ok 12 - 'cba' -> 'xbxxcxxax' NO repeats
ok 13 - 'abcde' -> '' NO repeats
ok 14 - 'abcde' -> 'a' NO repeats
ok 15 - 'abcde' -> 'abc' NO repeats
ok 16 - 'abcde' -> 'xxxxxx' NO repeats
ok 17 - 'abcde' -> 'xxxaxxx' NO repeats
ok 18 - 'abcde' -> 'xxxabcxxx' NO repeats
ok 19 - 'abcde' -> 'xbxxcxxax' NO repeats
ok 20 - 'abcdef' -> '' NO repeats
ok 21 - 'abcdef' -> 'a' NO repeats
ok 22 - 'abcdef' -> 'abc' NO repeats
ok 23 - 'abcdef' -> 'xxxxxx' NO repeats
ok 24 - 'abcdef' -> 'xxxaxxx' NO repeats
ok 25 - 'abcdef' -> 'xxxabcxxx' NO repeats
ok 26 - 'abcdef' -> 'xbxxcxxax' NO repeats
ok 27 - 'abcdefa' -> '' NO repeats
ok 28 - 'abcdefa' -> 'a' NO repeats
ok 29 - 'abcdefa' -> 'abc' NO repeats
ok 30 - 'abcdefa' -> 'xxxxxx' NO repeats
ok 31 - 'abcdefa' -> 'xxxaxxx' NO repeats
ok 32 - 'abcdefa' -> 'xxxabcxxx' NO repeats
ok 33 - 'abcdefa' -> 'xbxxcxxax' NO repeats
ok 34 - 'abacdaefa' -> '' NO repeats
ok 35 - 'abacdaefa' -> 'a' NO repeats
ok 36 - 'abacdaefa' -> 'abc' NO repeats
ok 37 - 'abacdaefa' -> 'xxxxxx' NO repeats
ok 38 - 'abacdaefa' -> 'xxxaxxx' NO repeats
ok 39 - 'abacdaefa' -> 'xxxabcxxx' NO repeats
ok 40 - 'abacdaefa' -> 'xbxxcxxax' NO repeats
# ALL of these strings have repeats
ok 41 - 'abc' -> 'aa' repeats
ok 42 - 'abc' -> 'aab' repeats
ok 43 - 'abc' -> 'aba' repeats
ok 44 - 'abc' -> 'baa' repeats
ok 45 - 'abcdefa' -> 'aa' repeats
ok 46 - 'abcdefa' -> 'aab' repeats
ok 47 - 'abcdefa' -> 'aba' repeats
ok 48 - 'abcdefa' -> 'baa' repeats
ok 49 - 'abacdaefa' -> 'aa' repeats
ok 50 - 'abacdaefa' -> 'aab' repeats
ok 51 - 'abacdaefa' -> 'aba' repeats
ok 52 - 'abacdaefa' -> 'baa' repeats
ok 53 - no warnings
Regex approach:
Win8 Strawberry 5.8.9.5 (32) Tue 08/16/2022 3:56:38
C:\@Work\Perl\monks
>perl
use strict;
use warnings;
use Test::More;
use Test::NoWarnings;
my @Tests = (
'NONE of these strings have repeats',
# repeats
# present
# template string true/false
[ '', '', '', ],
[ '', 'a', '', ],
[ '', 'aa', '', ],
[ '', 'aaa', '', ],
[ '', 'abc', '', ],
[ 'cba', '', '', ],
[ 'cba', 'a', '', ],
[ 'cba', 'abc', '', ],
[ 'cba', 'xxxxxx', '', ],
[ 'cba', 'xxxaxxx', '', ],
[ 'cba', 'xxxabcxxx', '', ],
[ 'cba', 'xbxxcxxax', '', ],
[ 'abcde', '', '', ],
[ 'abcde', 'a', '', ],
[ 'abcde', 'abc', '', ],
[ 'abcde', 'xxxxxx', '', ],
[ 'abcde', 'xxxaxxx', '', ],
[ 'abcde', 'xxxabcxxx', '', ],
[ 'abcde', 'xbxxcxxax', '', ],
[ 'abcdef', '', '', ],
[ 'abcdef', 'a', '', ],
[ 'abcdef', 'abc', '', ],
[ 'abcdef', 'xxxxxx', '', ],
[ 'abcdef', 'xxxaxxx', '', ],
[ 'abcdef', 'xxxabcxxx', '', ],
[ 'abcdef', 'xbxxcxxax', '', ],
[ 'abcdefa', '', '', ],
[ 'abcdefa', 'a', '', ],
[ 'abcdefa', 'abc', '', ],
[ 'abcdefa', 'xxxxxx', '', ],
[ 'abcdefa', 'xxxaxxx', '', ],
[ 'abcdefa', 'xxxabcxxx', '', ],
[ 'abcdefa', 'xbxxcxxax', '', ],
[ 'abacdaefa', '', '', ],
[ 'abacdaefa', 'a', '', ],
[ 'abacdaefa', 'abc', '', ],
[ 'abacdaefa', 'xxxxxx', '', ],
[ 'abacdaefa', 'xxxaxxx', '', ],
[ 'abacdaefa', 'xxxabcxxx', '', ],
[ 'abacdaefa', 'xbxxcxxax', '', ],
'ALL of these strings have repeats',
[ 'abc', 'aa', 1, ],
[ 'abc', 'aab', 1, ],
[ 'abc', 'aba', 1, ],
[ 'abc', 'baa', 1, ],
[ 'abcdefa', 'aa', 1, ],
[ 'abcdefa', 'aab', 1, ],
[ 'abcdefa', 'aba', 1, ],
[ 'abcdefa', 'baa', 1, ],
[ 'abacdaefa', 'aa', 1, ],
[ 'abacdaefa', 'aab', 1, ],
[ 'abacdaefa', 'aba', 1, ],
[ 'abacdaefa', 'baa', 1, ],
);
my @additional = qw(Test::NoWarnings); # these add 1 test
plan 'tests' => (scalar grep { ref eq 'ARRAY' } @Tests)
+ @additional;
VECTOR:
for my $ar_vector (@Tests) {
if (not ref $ar_vector) { # it's a comment
note $ar_vector;
next VECTOR;
}
my ($template, $string, $expected_result) = @$ar_vector;
my $rx_repeated = build_rx_repeated($template);
# note "\$rx_repeated $rx_repeated \n"; # for debug
my $got_result = $string =~ $rx_repeated;
is $got_result, $expected_result,
"'$template' -> '$string' " . ($expected_result ? '' : 'NO ')
+. 'repeats'
;
} # end for VECTOR
sub build_rx_repeated {
my ($template,
) = @_;
# for empty-string template, must build a regex that
# always fails (to find repeats).
return length $template ? qr{ ([\Q$template\E]) .*? \1 }xms : qr/(
+?!)/;
}
^Z
1..53
# NONE of these strings have repeats
ok 1 - '' -> '' NO repeats
ok 2 - '' -> 'a' NO repeats
ok 3 - '' -> 'aa' NO repeats
ok 4 - '' -> 'aaa' NO repeats
ok 5 - '' -> 'abc' NO repeats
ok 6 - 'cba' -> '' NO repeats
ok 7 - 'cba' -> 'a' NO repeats
ok 8 - 'cba' -> 'abc' NO repeats
ok 9 - 'cba' -> 'xxxxxx' NO repeats
ok 10 - 'cba' -> 'xxxaxxx' NO repeats
ok 11 - 'cba' -> 'xxxabcxxx' NO repeats
ok 12 - 'cba' -> 'xbxxcxxax' NO repeats
ok 13 - 'abcde' -> '' NO repeats
ok 14 - 'abcde' -> 'a' NO repeats
ok 15 - 'abcde' -> 'abc' NO repeats
ok 16 - 'abcde' -> 'xxxxxx' NO repeats
ok 17 - 'abcde' -> 'xxxaxxx' NO repeats
ok 18 - 'abcde' -> 'xxxabcxxx' NO repeats
ok 19 - 'abcde' -> 'xbxxcxxax' NO repeats
ok 20 - 'abcdef' -> '' NO repeats
ok 21 - 'abcdef' -> 'a' NO repeats
ok 22 - 'abcdef' -> 'abc' NO repeats
ok 23 - 'abcdef' -> 'xxxxxx' NO repeats
ok 24 - 'abcdef' -> 'xxxaxxx' NO repeats
ok 25 - 'abcdef' -> 'xxxabcxxx' NO repeats
ok 26 - 'abcdef' -> 'xbxxcxxax' NO repeats
ok 27 - 'abcdefa' -> '' NO repeats
ok 28 - 'abcdefa' -> 'a' NO repeats
ok 29 - 'abcdefa' -> 'abc' NO repeats
ok 30 - 'abcdefa' -> 'xxxxxx' NO repeats
ok 31 - 'abcdefa' -> 'xxxaxxx' NO repeats
ok 32 - 'abcdefa' -> 'xxxabcxxx' NO repeats
ok 33 - 'abcdefa' -> 'xbxxcxxax' NO repeats
ok 34 - 'abacdaefa' -> '' NO repeats
ok 35 - 'abacdaefa' -> 'a' NO repeats
ok 36 - 'abacdaefa' -> 'abc' NO repeats
ok 37 - 'abacdaefa' -> 'xxxxxx' NO repeats
ok 38 - 'abacdaefa' -> 'xxxaxxx' NO repeats
ok 39 - 'abacdaefa' -> 'xxxabcxxx' NO repeats
ok 40 - 'abacdaefa' -> 'xbxxcxxax' NO repeats
# ALL of these strings have repeats
ok 41 - 'abc' -> 'aa' repeats
ok 42 - 'abc' -> 'aab' repeats
ok 43 - 'abc' -> 'aba' repeats
ok 44 - 'abc' -> 'baa' repeats
ok 45 - 'abcdefa' -> 'aa' repeats
ok 46 - 'abcdefa' -> 'aab' repeats
ok 47 - 'abcdefa' -> 'aba' repeats
ok 48 - 'abcdefa' -> 'baa' repeats
ok 49 - 'abacdaefa' -> 'aa' repeats
ok 50 - 'abacdaefa' -> 'aab' repeats
ok 51 - 'abacdaefa' -> 'aba' repeats
ok 52 - 'abacdaefa' -> 'baa' repeats
ok 53 - no warnings
Also tested under Strawberry 5.30.3.1.
Update: Oops... My first posting had some Frankenstein code, the sewn-together bits of a few ideas. The heart of the OPed build_rx_repeated() function was
my ($rx) =
map { length() ? qr{ ([\Q$_\E]) .*? \1 }xms : qr/(?!)/ }
join '',
split '', $template
;
return $rx;
I've calmed down a bit and the code's less crazy now.
Give a man a fish: <%-{-{-{-<
|
|