http://www.perlmonks.org?node_id=150070
Category: Utilities
Author/Contact Info fmogavero
fmogo@mninter.net
Description: This script will read a file byte by byte and send messages to the screen to signal bytes that are out of the ASCII text range. A client sent us a file with bad data and it totally hosed their data in the database.
use strict;

my $position = 0;

my $line = 1;

my $oldbyte = 0;

my $filesize = -s $ARGV[0];

my $byte;

my $oldbyte;

print "File size is $filesize bytes.\n";

open(INPUT,$ARGV[0]) || die "can't open $ARGV[0]:\n";

while ($position < ($filesize - 1)){

   read INPUT, $byte, 1, 0;

   my $val = ord $byte;

if ( $val == 10 ) {

    if ( $oldbyte ==1) {

        $line++;

        $oldbyte = -1;

    }

    $oldbyte++;

}

if ( ($val < 32 && $val != 10 || $val > 126) ) {

  print "Line $line byte value $val at offset $position is out of ASCI
+I text range!\n";

}

#print ord $byte,"\n";

$position++;

undef $byte;

seek(INPUT, $position, 0);

}

print "$line lines in file!\n";