http://www.perlmonks.org?node_id=11100159

Dear monks. Some years ago I write small program which can validate coordinates (locations on the Earth) from user input. I didn't to restrict format of input and I wanted support English as well as Finnish.
#!/usr/bin/perl use strict; sub conv { my $in = shift; $in =~ s/[NWSEIPL]//g; my $sign; if ($in =~ s/([-]+)//) { $sign = $1; } my $dd = $in; if ( $in =~ /([\d+-]+)\s+(\d+)\s+([\d\.]+)/ ) { $dd = $1 + $2/60 + $3/3600; } elsif ( $in =~ /([\d+-]+)\s+(\d+[\d\.]+)/ ) { $dd = $1 + $2/60; } if ($sign ) { $dd = -1 * $dd } return $dd; } sub bar { $_ = shift; print "\n$_\n"; my $comma_count =0; my $space_count =0; my $dot_count =0; my %H; my ($FIN, $ENG); my $err = 0; my @lst; my $latc = 1; my $lonc=1; my ($lat, $lon); #strip all unneeded tr/a-z/A-Z/; s/[^0-9NWSEIPL,.+-]/ /g; s/\s+/ /g; s/([NS])\s*([0-9. ]+)\s*,?\s*([EW])\s*([0-9. ]+)/$2$1,$4$3/; s/([EW])\s*([0-9. ]+)\s*,?\s*([NS])\s*([0-9. ]+)/$4$3,$2$1/; s/([PE])\s*([0-9. ]+)\s*,?\s*([IL])\s*([0-9. ]+)/$2$1,$4$3/; s/([IL])\s*([0-9. ]+)\s*,?\s*([PE])\s*([0-9. ]+)/$4$3,$2$1/; s/([0-9. ]+)\s*([NS])\s*,?\s*([0-9. ]+)\s*([EW])/$2$1,$4$3/; s/([0-9. ]+)\s*([EW])\s*,?\s*([0-9. ]+)\s*([NS])/$4$3,$2$1/; s/([0-9. ]+)\s*([PE])\s*,?\s*([0-9. ]+)\s*([IL])/$2$1,$4$3/; s/([0-9. ]+)\s*([IL])\s*,?\s*([0-9. ]+)\s*([PE])/$4$3,$2$1/; s/([0-9+-]+)\s+(\d+)\s+([0-9.]+)\s+([0-9+-]+)\s+(\d+)\s+([0-9.]+)/ +$1 $2 $3, $4 $5 $6/; s/([0-9+-]+)\s+([0-9.]+)\s+([0-9+-]+)\s+([0-9.]+)/$1 $2, $3 $4/; s/\s+/ /g; print "$_\n"; $comma_count = ($_ =~ tr/,//); $space_count = ($_ =~ tr/ //); $dot_count = ($_ =~ tr/.//); $H{E} = ($_ =~ tr/E//); $H{I} = ($_ =~ tr/I//); $H{P} = ($_ =~ tr/P//); $H{L} = ($_ =~ tr/L//); $H{W} = ($_ =~ tr/W//); $H{N} = ($_ =~ tr/N//); $H{S} = ($_ =~ tr/S//); $ENG = $H{W} + $H{N} + $H{S}; $FIN = $H{L} + $H{P} + $H{I}; if(($ENG >0) && ($FIN > 0)) { print "mixed languages\n"; $err++ } elsif (($FIN>0) && ( ($H{I}+$H{L})>1 || ($H{E}+$H{P})>1 )) { print "too many lats or lons\n"; $err++ } elsif (($ENG>0) && ( ($H{N}+$H{S})>1 || ($H{W}+$H{E})>1 )) { print "too many lats or lons\n"; $err++ } elsif ($H{E}>1) { print "too many E\n"; $err++ } elsif ($H{I}>1) { print "too many I\n"; $err++ } elsif ($H{L}>1) { print "too many L\n"; $err++ } elsif ($H{P}>1) { print "too many P\n"; $err++ } elsif ($H{W}>1) { print "too many W\n"; $err++ } elsif ($H{S}>1) { print "too many S\n"; $err++ } elsif ($H{N}>1) { print "too many N\n"; $err++ } elsif ($comma_count == 3) { my $n = 1; s/(,)/!--$n ? '.' : $1/ge; $n = 2; s/(,)/!--$n ? '.' : $1/ge; @lst = (split(/,/,$_)); } elsif ($comma_count == 2 && $space_count == 1) { s/ /./g; @lst = (split(/ /,$_)); } elsif ($comma_count == 2 && $space_count != 1) { s/,/./g; @lst = (split(/ /,$_)); } elsif ($comma_count == 1) { @lst = (split(/,/,$_)); } elsif ($comma_count > 3) { print "too many commas\n"; $err++ } elsif ($dot_count > 2) { print "too many dots\n"; $err++; } if ($ENG >0) { if (s/S//) { $latc=-1; } if (s/W//) { $lonc=-1; } s/[NE]//g; } elsif ($FIN >0) { if (s/E//) { $latc=-1; } if (s/L//) { $lonc=-1; } s/[IP]//g; } if($err > 0) { print "Err: $err\n"; $err=0; } else { $lst[0] =~ s/^\s+|\s+$//; $lst[1] =~ s/^\s+|\s+$//; $lat = conv($lst[0]); $lon = conv($lst[1]); $lat *= $latc; $lon *= $lonc; my $coords = "$lat, $lon"; if ($coords =~ /^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(18 +0(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$/) { print "all ok\n"; } else { print "too big numbers\n"; } #Print even there is too big numbers in lat or lon print "LAT: $lat LON: $lon\n" } } #OK my @test =( 'N65, E25', '65P, I25', '25 E, 32 L', '65 0\' 45.414" N 25 28\' 17.231" E', '65 0\' 45.414" N, 25 28\' 17.231" E', 'S 65 0\' 45.414" W 25 28\' 17.231" ', 'S65 0\' 45.414" E, 25 28\' 17.231"', '47.9805, -116.5586', '47 58.8300 -116 33.5160', '47 58 49 -116 33 30', '50 58.8300 70 33.5160', '50 58.8300 -70 33.5160', '+50 58 83.00 70 33 77.44' ); for(@test) { bar($_); } undef(@test); #NOT OK print "\nNot ok cases\n"; my @test =( 'N65, N25', '65P, E25', '91, 47', '65P, W25', '100 E, 91 N', '47.9805, -186.5586', ); for(@test) { bar($_); }

Replies are listed 'Best First'.
Re: Coordinate validator
by jwkrahn (Monsignor) on May 18, 2019 at 04:35 UTC

    I ran your program which output:

    Not ok cases N65, N25 N65, N25 too many lats or lons Err: 1 65P, E25 65P, E25 too many lats or lons Err: 1 91, 47 91, 47 numbers are too big LAT: 91 LON: 47 65P, W25 65P, W25 mixed languages Err: 1 100 E, 91 N N91 ,E100 numbers are too big LAT: 91 LON: 100 47.9805, -186.5586 47.9805, -186.5586 numbers are too big LAT: 47.9805 LON: -186.5586

    You have 13 error tests in your program:

    1: 74 if(($ENG >0) && ($FIN > 0)) { 75 print "mixed languages\n"; 76 $err++ 77 } 2: 78 elsif (($FIN>0) && ( ($H{I}+$H{L})>1 || ($H{E}+$H{P})> +1 )) { 79 print "too many lats or lons\n"; 80 $err++ 81 } 3: 82 elsif (($ENG>0) && ( ($H{N}+$H{S})>1 || ($H{W}+$H{E})> +1 )) { 83 print "too many lats or lons\n"; 84 $err++ 85 } 4: 86 elsif ($H{E}>1) { 87 print "too many E\n"; 88 $err++ 89 } 5: 90 elsif ($H{I}>1) { 91 print "too many I\n"; 92 $err++ 93 } 6: 94 elsif ($H{L}>1) { 95 print "too many L\n"; 96 $err++ 97 } 7: 98 elsif ($H{P}>1) { 99 print "too many P\n"; 100 $err++ 101 } 8: 102 elsif ($H{W}>1) { 103 print "too many W\n"; 104 $err++ 105 } 9: 106 elsif ($H{S}>1) { 107 print "too many S\n"; 108 $err++ 109 } 10: 110 elsif ($H{N}>1) { 111 print "too many N\n"; 112 $err++ 113 } ... 11: 133 elsif ($comma_count > 3) { 134 print "too many commas\n"; 135 $err++ 136 } 12: 137 elsif ($dot_count > 2) { 138 print "too many dots\n"; 139 $err++; 140 } ... 13: 169 if ($coords =~ /^[-+]?([1-8]?\d(\.\d+)?|90(\.0 ++)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)? 169 )$/) +{ 170 print "all ok\n"; 171 } else { 172 print "too big numbers\n"; 173 }

    But only 1, 2, 3 and 13 are actually tested for in your test data.

    Also, tests 4 through 10 can be combined into one test:

    elsif ( my @keys = grep $H{ $_ } > 1, keys %H ) { for ( @keys ) { print "too many $_\n"; $err++ } }

    Although this test will never execute because tests 2 and 3 take presedense.

    Also this:

    30 my %H; ... 61 $H{E} = ($_ =~ tr/E//); 62 63 $H{I} = ($_ =~ tr/I//); 64 $H{P} = ($_ =~ tr/P//); 65 $H{L} = ($_ =~ tr/L//); 66 67 $H{W} = ($_ =~ tr/W//); 68 $H{N} = ($_ =~ tr/N//); 69 $H{S} = ($_ =~ tr/S//);

    Could be simplified a bit:

    my %H = ( E => tr/E//, I => tr/I//, P => tr/P//, L => tr/L//, W => tr/W//, N => tr/N//, S => tr/S//, );

    And finally, you use tr/// for counting but you could also use it in a few other places:

    7 $in =~ s/[NWSEIPL]//g; $in =~ tr/NWSEIPL//d; 39 s/[^0-9NWSEIPL,.+-]/ /g; tr/0-9NWSEIPL,.+-/ /c; 122 s/ /./g; tr/ /./; 126 s/,/./g; tr/,/./; 148 s/[NE]//g; tr/NE//d; 156 s/[IP]//g; tr/IP//d;
Re: Coordinate validator
by FreeBeerReekingMonk (Deacon) on Jun 10, 2019 at 13:03 UTC
    I like the comma_count solution, you checked all bordercases. Maybe offer a perlgolf competition on this code :)

    Particularly this code ($comma_count==3): s/(,)/!--$n ? '.' : $1/ge;

    I liked that approach.

    Found a bug though. This one is successful, but gives a bad coordinate back:

    47 58.8300, -116 33,5160