use warnings; use strict; my $regex1 = qr/([a-zA-Z]+(\d*)+)|((\d*)+[a-zA-Z]+)/; #default words, and words with numbers my $regex2 = qr/(\w|\d|\.)+@(\w|\d|\.)+/; #email addresses my $regex3 = qr/(\w+)\s?(\d)+/; #word and number combinations: Number 1, Assignment 2, Vol my $regex4 = qr/(\w+)'(\w){0,2}/; #contractions in English my $regex5 = qr/(\w)(\/|&)(\w)/; #abbreviations with slash: c/o, i/o, etc. my $regex6 = qr/(M).{1,2}\.(\s([A-Z]{1}[a-z]+))?/; #formal titles: Dr., Mr., Mrs. Agenstein, etc. my $regex7 = qr/(\w+)-(\w+)(-\w+)?/; #hyphenated words: cat-like, face-to-face, etc. my $regex8 = qr/([A-Z]\.?){3}/; #3-letter abbreviations, using uppercase only, no space, my $regex9 = qr/[a-zA-Z]{3}\.\s?(\d+)/; #3-letter abbreviations containing numbers, mixed case, my $regex10 = qr/\$\d+/; #money expressions $xxxx format my $regex11 = qr/\$\d+(.\d{2})?/; #money expressions $xxxx.xx format my $regex12 = qr/(http:\/\/.*)|(w{3}\.(.)*)/; #websites beginning with www. or http:// my $regex13 = qr/(\(\d{3}\)\s(\d{3})-(\d{4}))/; #phone numbers, no country code (xxx) xxx-xxxx my @regarray = ($regex13, $regex12, $regex11, $regex10, $regex9, $regex8, $regex7, $regex6, $regex5, $regex4, $regex3, $regex2, $regex1); $/ = undef; my $text = ; TOK: while(1) { for my $i (0 .. @regarray-1) { my $re = $regarray[$i]; if ($text =~ /\G($re)/gc) { my $word = $1; if (12 != $i) { printf "%02d (%s)\n", $i, $word; } next TOK; } } if ($text =~ /\G./gcs) { 1; } elsif ($text =~ /\G\z/gc) { print "end of text\n"; last; } else { die; } } __DATA__ Some text here