use 5.010; # needs perl version 5.10 regex extensions
use warnings;
use strict;
use Test::More 'no_plan';
use Test::NoWarnings;
my @tests = (
'no changes to these',
[ ('') x 2 ], # unchanged pair
[ ('`') x 2 ],
[ ('``') x 2 ],
[ ('```') x 2 ],
[ ('<') x 2 ],
[ ('<<') x 2 ],
[ ('<<<') x 2 ],
[ ('``<>``<>``') x 2 ],
[ ('is `not <this> one ') x 2 ],
[ ('is not <this> ` one ') x 2 ],
[ ('is `not ` <this> one ') x 2 ],
[ ('is not <this> ` one`') x 2 ],
[ ('is not ``<this> ` one`') x 2 ],
[ ('is `not` <this> ` one`') x 2 ],
[ ('is \\\\`not <this> one ') x 2 ],
[ ('is not <this> \\\\` one ') x 2 ],
[ ('is \\\\`not ` <this> one ') x 2 ],
[ ('is `not \\\\` <this> one ') x 2 ],
[ ('is not <this> \\\\` one `') x 2 ],
[ ('is not <this> ` one \\\\`') x 2 ],
[ ('``<A>``<B>``<C>``<D>`') x 2 ],
'all these should change',
[ '`<A>``<B>``<C>``<D>`',
'`<A>``<B>``<C>``<D>`',
],
[ '`<A><B><C><D>`', # new: multiple < in bt group
'`<A><B><C><D>`',
],
[ '`<A>\<B>\\\\<C><D>`', # new: < may be escaped
'`<A>\<B>\\\\<C><D>`',
],
[ '`<A>``<B>``<C>``<D>``',
'`<A>``<B>``<C>``<D>``',
],
[ '`<A> <a a>``<B> \<b b>``<C> \\\\<c c>``<D> <d d>``',
'`<A> <a a>``<B> \<b b>``<C> \\\\<c c>``<D> <
+d d>``',
],
[ '```<A>``<B>``<C>``<D>`',
'```<A>``<B>``<C>``<D>`',
],
[ 'u `v <A> ` w `x <B> y` z',
'u `v <A> ` w `x <B> y` z',
],
[ 'u \` `v <A> ` w `x <B> y` z',
'u \` `v <A> ` w `x <B> y` z',
],
[ 'u `v \` <A> ` w `x <B> y` z',
'u `v \` <A> ` w `x <B> y` z',
],
[ 'u `v <A> \` ` w `x <B> y` z',
'u `v <A> \` ` w `x <B> y` z',
],
[ 'u `v <A> ` \` w `x <B> y` z',
'u `v <A> ` \` w `x <B> y` z',
],
[ 'u `v <A> ` w \` `x <B> y` z',
'u `v <A> ` w \` `x <B> y` z',
],
[ 'u `v <A> ` w `x \` <B> y` z',
'u `v <A> ` w `x \` <B> y` z',
],
[ 'u `v <A> ` w `x <B> \` y` z',
'u `v <A> ` w `x <B> \` y` z',
],
[ 'u `v <A> ` w `x <B> y` \` z',
'u `v <A> ` w `x <B> y` \` z',
],
[ 'is `my <string>` that `also <this> one` too',
'is `my <string>` that `also <this> one` too',
],
[ 'is \\\\` `my \\\\` <string>` that `also <this> \\\\` one` \\\\` t
+oo',
'is \\\\` `my \\\\` <string>` that `also <this> \\\\` one` \
+\\\` too',
],
[ 'x \\\\` `y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
'x \\\\` `y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
],
[ 'x y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
'x y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
],
[ 'is \`my <NO> that `but <this> one` yes',
'is \`my <NO> that `but <this> one` yes',
],
'(per Corion pm#1222697)',
[ 'is \\\\`my <this> that `but <this> one` no',
'is \\\\`my <this> that `but <this> one` no',
],
'is this acceptable for < that are unbalanced?',
[ '`<\<\\\\<``\\\\<\<<`', '`<\<\\\\<``\\\\<\<<`', ],
[ 'is `this <UN <BALANCED> `ok?',
'is `this <UN <BALANCED> `ok?',
],
);
FUNT:
for my $func_name (qw(replace_lt_5 replace_lt_6)) {
note "\n=== testing $func_name() ===\n\n";
*replace_lt = do { no strict 'refs'; *$func_name; };
VECTOR:
for my $ar_vector (@tests) {
if (not ref $ar_vector) {
note $ar_vector;
next VECTOR;
}
my ($string, $expected) = @$ar_vector;
is replace_lt($string), $expected, qq{'$string' -> '$expected'};
} # end for VECTOR
} # end for FUNT
note "\n=== done testing functions ===\n\n";
done_testing;
exit;
# Functions UNder Test #############################################
sub replace_lt_5 { # needs 5.10+ regex extensions
# handles multiple < in bt group and escaped <
my ($string,
) = @_;
# replace ALL < patterns in all backtick (bt) groups in a string.
# there may be multiple < patterns in a bt group.
# a backtick group begins and ends with a ` (backtick)
# that is NOT escaped. if the escape is itself escaped,
# it does NOT affect the bt. so:
# ` can begin or end a bt group;
# \` can NOT begin or end a bt group (ordinary char);
# \\` escape is escaped: bt can begin/end a bt group.
# similarly, < (lt) patterns may be escaped and the escape may
# be escaped:
# < subject to replacement;
# \< escaped: < NOT subject to replacement;
# \\< escape is escaped: < subject to replacement.
$string =~ s{
# match to just before valid lt in a valid bt group.
(?: \G (?! \A) # start just after some valid lt already found
(?:
(?&TO_NEXT_LT_IN_THIS_BT_GROUP)
|
(?&TO_FIRST_LT_IN_NEXT_BT_GROUP)
)
|
\A (?&TO_FIRST_LT_IN_FIRST_BT_GROUP)
)
# grab and replace lt.
\K # ignore everything matched so far
(?&TRU_LT) # replace this
(?(DEFINE)
# an un-unescaped backtick: may start a bt group.
(?<TRU_BT> (?<! (?<! \\) \\) `)
# an un-unescaped less-than: may be replaced.
(?<TRU_LT> (?<! (?<! \\) \\) <)
# any character NOT a true backtick.
(?<NOT_BT> (?! (?&TRU_BT)) .)
# any character NOT a true backtick and also NOT an lt.
(?<NOT_BT_LT> (?! (?&TRU_LT)) (?&NOT_BT))
# a bt-group NOT containing an lt.
(?<EMPTY_BT> (?> (?&TRU_BT) (?&NOT_BT_LT)*+ (?&TRU_BT)))
# non-bt-group-with-lt stuff to ignore.
(?<IGNORE> (?: (?&NOT_BT)*+ (?&EMPTY_BT)*+)*+)
# match to first valid lt in first valid bt group.
(?<TO_FIRST_LT_IN_FIRST_BT_GROUP>
(?&IGNORE) # ignore non-group stuff
(?&TRU_BT) (?&NOT_BT_LT)*+ # positioned just before lt
(?= (?&NOT_BT)*+ (?&TRU_BT)) # rest is valid bt
)
# match to first valid lt in NEXT valid bt group.
(?<TO_FIRST_LT_IN_NEXT_BT_GROUP>
(?&NOT_BT)*+ (?&TRU_BT) # to end of current group
(?&TO_FIRST_LT_IN_FIRST_BT_GROUP) # actually in NEXT group
)
# match to next valid lt in current ASSUMED-valid bt group.
(?<TO_NEXT_LT_IN_THIS_BT_GROUP> (?&NOT_BT_LT)*+)
) # end (DEFINE)
}
{<}xmsg;
return $string;
} # end sub replace_lt_5()
sub replace_lt_6 { # needs 5.10+ regex extensions
# handles multiple < in bt group and escaped <
my ($string,
) = @_;
# replace ALL < patterns in all backtick (bt) groups in a string.
# there may be multiple < patterns in a bt group.
# a backtick group begins and ends with a ` (backtick)
# that is NOT escaped. if the escape is itself escaped,
# it does NOT affect the bt. so:
# ` can begin or end a bt group;
# \` can NOT begin or end a bt group (ordinary char);
# \\` escape is escaped: bt can begin/end a bt group.
# similarly, < (lt) patterns may be escaped and the escape may
# be escaped:
# < subject to replacement;
# \< escaped: < NOT subject to replacement;
# \\< escape is escaped: < subject to replacement.
# an un-unescaped backtick: may start a bt group.
my $tru_bt = qr{ (?<! (?<! \\) \\) ` }xmso;
# an un-unescaped less-than: may be replaced.
my $tru_lt = qr{ (?<! (?<! \\) \\) < }xmso;
# any character NOT a true backtick.
my $not_bt = qr{ (?! $tru_bt) . }xmso;
# any character NOT a true backtick and also NOT an lt.
my $not_bt_lt = qr{ (?! $tru_lt) $not_bt }xmso;
# a bt-group NOT containing an lt.
my $empty_bt = qr{ (?> $tru_bt $not_bt_lt*+ $tru_bt) }xmso;
# non-bt-group-with-lt stuff to ignore.
my $ignore = qr{ (?: $not_bt*+ $empty_bt*+)*+ }xmso;
# match to first valid lt in first valid bt group.
my $to_first_lt_in_first_bt_group = qr{
$ignore # ignore non-group stuff
$tru_bt $not_bt_lt*+ # positioned just before lt
(?= $not_bt*+ $tru_bt) # rest is valid bt
}xmso;
# match to first valid lt in NEXT valid bt group.
my $to_first_lt_in_next_bt_group = qr{
$not_bt*+ $tru_bt # to end of current group
$to_first_lt_in_first_bt_group # actually in NEXT group
}xmso;
# match to next valid lt in current ASSUMED-valid bt group.
my $to_next_lt_in_this_bt_group = qr{ $not_bt_lt*+ }xmso;
$string =~ s{
# match to just before valid lt in a valid bt group.
(?: \G (?! \A) # start just after some valid lt already found
(?:
$to_next_lt_in_this_bt_group
|
$to_first_lt_in_next_bt_group
)
|
\A $to_first_lt_in_first_bt_group
)
# grab and replace lt.
\K # ignore everything matched so far
$tru_lt # replace this
}
{<}xmsgo;
return $string;
} # end sub replace_lt_6()
I won't post the output. And more test cases never hurt.
at the end is for my current development testing. Set it to the replacement string, or replace it with a string literal, e.g.,