use strict;
use warnings;
for my $uri( qw(https://www.example.de
http://www.example.de
https://example.de
http://example.de
www.example.de
example.de:123
http://www.example.de:445/can?this=happen&too=1#lalala
http://www.example.de/can?this=happen&too=1#foo
http://www.example.de:445
)
) {
print "in ($uri):\n";
my (@spl) = $uri
=~ m|(http(?:s?))?
(?:(?:://)?
(w{0,3})\.{0,1})?
((.*)(?:\.)([^:/]*)) # match if it is not a ":"
(?::(\d{0,10}))?
|x;
print 'out: ', join(', ', map { defined $_ ? $_ : '-' } @spl), "\n\
+n";
}
__DATA__
in (https://www.example.de):
out: https, www, example.de, example, de, -
in (http://www.example.de):
out: http, www, example.de, example, de, -
in (https://example.de):
out: https, , example.de, example, de, -
in (http://example.de):
out: http, , example.de, example, de, -
in (www.example.de):
out: -, www, example.de, example, de, -
in (example.de:123):
out: -, , example.de, example, de, 123
in (http://www.example.de:445/can?this=happen&too=1#lalala):
out: http, www, example.de, example, de, 445
in (http://www.example.de/can?this=happen&too=1#foo):
out: http, www, example.de, example, de, -
in (http://www.example.de:445):
out: http, www, example.de, example, de, 445
Update: Added '/' to character class and example '#foo'
|