use warnings; use strict;

my $regex1 = qr/([a-zA-Z]+(\d*)+)|((\d*)+[a-zA-Z]+)/;		#default words, and words with numbers
my $regex2 = qr/(\w|\d|\.)+@(\w|\d|\.)+/;			#email addresses
my $regex3 = qr/(\w+)\s?(\d)+/;					#word and number combinations: Number 1, Assignment 2, Vol 	
my $regex4 = qr/(\w+)'(\w){0,2}/;				#contractions in English
my $regex5 = qr/(\w)(\/|&)(\w)/;				#abbreviations with slash: c/o, i/o, etc.
my $regex6 = qr/(M).{1,2}\.(\s([A-Z]{1}[a-z]+))?/;		#formal titles: Dr., Mr., Mrs. Agenstein, etc.
my $regex7 = qr/(\w+)-(\w+)(-\w+)?/;				#hyphenated words: cat-like, face-to-face, etc.
my $regex8 = qr/([A-Z]\.?){3}/;					#3-letter abbreviations, using uppercase only, no space, 
my $regex9 = qr/[a-zA-Z]{3}\.\s?(\d+)/;				#3-letter abbreviations containing numbers, mixed case, 
my $regex10 = qr/\$\d+/;					#money expressions $xxxx format
my $regex11 = qr/\$\d+(.\d{2})?/;				#money expressions $xxxx.xx format
my $regex12 = qr/(http:\/\/.*)|(w{3}\.(.)*)/;			#websites beginning with www. or http://
my $regex13 = qr/(\(\d{3}\)\s(\d{3})-(\d{4}))/;			#phone numbers, no country code (xxx) xxx-xxxx

my @regarray = ($regex13, $regex12, $regex11, $regex10, $regex9, $regex8, $regex7, $regex6, $regex5, $regex4, $regex3, $regex2, $regex1);

$/ = undef;
my $text = <DATA>;

TOK: while(1) {
	for my $i (0 .. @regarray-1) {
		my $re = $regarray[$i];
		if ($text =~ /\G($re)/gc) {
			my $word = $1;
			if (12 != $i) {
				printf "%02d (%s)\n", $i, $word;
			}
			next TOK;
		}
	}
	if ($text =~ /\G./gcs) {
		1;
	} elsif ($text =~ /\G\z/gc) {
		print "end of text\n";
		last;
	} else {
		die;
	}
}

__DATA__
Some text here