# unabbrev.pm # Fixed the unabbrev.pm file up some. sub uword { join ' ', map unabrev($_), split ' ', shift; } sub unabrev { local $_ = shift; return $_ unless /\w/; # One really big expression s/^e$/east/ or s/^w$/west/ or s/^(?:n|no)$/north/ or s/^(?:s|so)$/south/ or s/^ne$/north east/ or s/^nw$/north west/ or s/^se$/south east/ or s/^sw$/south west/ or s/^(?:avs|aves)$/avenue south/ or s/^beachrd$/beach road/ or s/^ccedar$/cedar/ or s/^(?:adn|add'n)$/addition/ or s/^appache$/apache/ or s/^apt$/apartment/ or s/^apts$/apartments/ or s/^(?:av|ave)$/avenue/ or s/^(?:bch|bchch|beac)/beach/ or s/^(?:bx|b0x)/box $1/ or s/^blvd$/boulevard/ or s/^brg$/burg/ or s/^bldg$/building/ or s/^cen$/center/ or s/^(?:centeral|cental)$/central/ or s/^char$/character/ or s/^chas$/chase/ or s/^ches$/chesapeake/ or s/^chig$/chicago/ or s/^cir$/circle/ or s/^(?:cty|co|cnty)$/county/ or s/^(?:ct|crt|cour)/court/ or s/^cr$/curve/ or s/^crk$/creek/ or s/^crl$/curl/ or s/^(?:crystaln|crytl)$/crystal/ or s/^ctr$/center/ or s/^dist$/district/ or s/^(?:drv|drve|dr)$/drive/ or s/^est$/estate/ or s/^fst$/forest/ or s/^ft$/fort/ or s/^(?:govt|govern|gov't)$/government/ or s/^(?:grv|grov)$/grove/ or s/^hgld$/highland/ or s/^hglds$/highlands/ or s/^(?:hgt|hht|height|ht|hghtss|hghts)$/heights/ or s/^(?:hy|hyw|hwy)$/highway/ or s/^isl$/island/ or s/^(?:jct|jction|jctn|junctn|juncton)$/junction/ or s/^(?:jctns|jcts)$/junctions/ or s/^l00p$/loop/ or s/^(?:lk|lak)$/lake/ or s/^lks$/lakes/ or s/^li'l$/lil/ or s/^(?:la|lanes|ln)$/lane/ or s/^ml$/mill/ or s/^mls$/mills/ or s/^mkt$/market/ or s/^(?:mt|mnt)$/mount/ or s/^mpls$/minneapolis/ or s/^(?:mtn|mntain|mntn)$/mountain/ or s/^(?:mntns|mtns)$/mountains/ or s/^(?:nth|nrth)$/north/ or s/^nrthbrk$/northbrook/ or s/^(?:unorg|unorgized)$/unorganized/ or s/^ph$/penthouse/ or s/^(?:pk|prk)$/park/ or s/^(?:pkwy|parkwy|pkway|pky)$/parkway/ or s/^pl$/place/ or s/^plaz$/plaza/ or s/^(?:pobox|po)$/box/ or s/^prct$/precinct/ or s/^pres$/president/ or s/^pt$/point/ or s/^pts$/points/ or s/^qtr$/quarter/ or s/^qtrs$/quarters/ or s/^(?:r|rt)$/route/ or s/^rd$/road/ or s/^rdg$/ridge/ or s/^resor$/resort/ or s/^(?:ri|rv|riv|rvr)$/river/ or s/^(?:rte|rr|rural)$/route/ or s/^(?:rs|rst)$/rest/ or s/^rverview$/riverview/ or s/^(?:shr|shoar)$/shore/ or s/^(?:shoars|shrs)$/shores/ or s/^(?:spgs|spngs|sprngs)$/springs/ or s/^(?:st|str)$/street/ or s/^svc$/service/ or s/^terr$/terrace/ or s/^twp$/township/ or s/^(?:tr|trl|trails|trls)$/trail/ or s/^trlr$/trailer/ or s/^vac$/vacation/; return $_; } 1; # address_parse.pl #!/usr/bin/perl $ID = 0; $ADDRESS = 1; while ($record = <>) { ($id,$address) = split /\t/, $record; @words = split /\s+/, $address; %record = (); HOUSE: if ($words[0] =~ /^\d+$/) { $record{house} = shift @words; ($record{odd}) = (($record{house} % 2) == 0 ? 'e' : 'o'); } if ($words[0] =~ /^1\/2$/) { $record{fraction} = shift @words; } UNIT: for ($i = $#words; $i >= 0; $i--) { if (is_unit($words[$i])) { $record{unit} = join ' ', @words[$i .. $#words]; $#words = $i - 1; last UNIT; } } unless (defined $record{unit}) { if ($words[$#words] =~ /\d+$/) { $record{unit} = pop @words; } } $t = $words[$#words]; if (is_ew($t)) { $record{direction} = pop @words; $t = $words[$#words]; if (is_ns($t)) { $t = pop @words; $record{direction} = "$t ".$record{direction}; } } elsif (is_ns($t)) { $record{direction} = pop @words; } unless (exists $record{direction}) { for ($i = 0; $i < @words; $i++) { if (is_ns($words[$i]) or is_ew($words[$i])) { $record{direction} .= ' '.$words[$i]; $words[$i] = ''; } } } @words = grep /\w/, @words; for ($i = $#words;$i>=0;$i--) { if (is_type($words[$i])) { $record{type} = $words[$i]; $words[$i] = ''; goto DIR; } } DIR: @words = grep /\w/, @words; for ($i = 0; $i < @words-1; $i++) { if ($words[$i] eq 'p' and ($words[$i+1] =~ /^(?:o|0)$/)) { $words[$i] = ''; $words[$i+1] = ''; if (exists $record{unit}) { $record{unit} = 'po '.$record{unit}; } else { $record{unit} = 'po'; } } } $t = join ' ', @words; $record{street} = $t if $t; if (1) { $line = join "\t", map {defined $_?$_:'\\N'} ($id, @record{qw(house odd fraction street direction type unit )}); for (undef,undef) { $line =~ s/ +/ /g; $line =~ s/ +\t/\t/g; $line =~ s/\t +/\t/g; } print $line,"\n"; } } sub is_ns ($) { return $_[0] =~ /^(?:north|south)$/; } sub is_ew ($) { return $_[0] =~ /^(?:east|west)$/; } sub is_unit ($) { return $_[0] =~ /(?:box|apartment|lot|suite|campus|lower|upper|floor|gymnasium|hall|building)\s*/; } sub is_type ($) { return $_[0] =~ /(?:alley|avenue|aveue|avneu|bay|boulevard|circle|court|courtt|courttt|cove|crest|curve|dale|drive|grove|highway|hill |knoll|lane|mall|orchard|park|parkway|pass|pines|place|plaza|raod|ridge|road|route|square|state|street|summit|ter|terrace|tra il|walk|way)/o; }