Here's a way to match/extract ordinal substrings. Note this won't match an improper ordinal like '1th' or '4rd'. (Update: It's also case-sensitive.) Tested on Windows 7 under ActiveState 5.8.9 and Strawberry 5.14.4.
use warnings
# FATAL => 'ALL'
;
use strict;
use Test::More
# tests => ?? + 1 # Test::NoWarnings adds 1 test
'no_plan'
;
use Test::NoWarnings;
my $rx_ordinal = qr{
# ordered alternation: 1st match wins
(?<= [04-9]) th # most common case (check first!)
|
(?<= 1 [123]) th # 11th 12th 13th (most common exceptions?)
|
(?<= (?<! 1) 1) st # 01st 1st 101st 21st, but not 11st 111st
|
(?<= (?<! 1) 2) nd # 02nd 2nd 22nd 222nd 432nd, not 12nd 312nd
|
(?<= (?<! 1) 3) rd # 03rd 3rd 33rd 123rd, not 13rd 313rd
}xms;
my $ident = qr{ [A-Z\d]+ }xms;
my $sep = qr{ [-_] }xms;
my $ordinal = qr{ $rx_ordinal \b }xms;
my $group = qr{ $ident (?: $sep $ident){1,} $ordinal? }xms;
VECTOR:
for my $ar_vector (
[ 'access-list INSIDE_IN remark Web Users To Web Server',
qw(INSIDE_IN) ],
[ 'access-list INSIDE_IN extended permit tcp object-group WEB-CLIE
+NT object-group WEB-SERVER object-group WEB_TCP',
qw(INSIDE_IN WEB-CLIENT WEB-SERVER WEB_TCP) ],
[ 'access-list INSIDE_IN remark EMAIL To EMAIL Server',
qw(INSIDE_IN) ],
[ 'access-list INSIDE_IN extended permit tcp object-group EMAIL-CL
+IENT object-group EMAIL-SERVER object-group SMTP_TCP',
qw(INSIDE_IN EMAIL-CLIENT EMAIL-SERVER SMTP_TCP) ],
[ 'access-list INSIDE_IN remark SRVR to Client',
qw(INSIDE_IN) ],
[ 'access-list INSIDE_IN extended permit tcp object-group MYSRVR_I
+P-S_2nd object-group MYCLIENTS-IP_1st object-group WEB_TCP',
qw(INSIDE_IN MYSRVR_IP-S_2nd MYCLIENTS-IP_1st WEB_TCP) ],
) {
if (not ref $ar_vector) {
note $ar_vector;
next VECTOR;
}
my ($string, @expect) = @$ar_vector;
is_deeply [ $string =~ m{ ($group) }xmsg ], \@expect, "@expect";
} # end for VECTOR