#!/usr/bin/perl -w
#
# Test program for the dump_data() subroutine.
#
# 061124 liverpole -- created
# 061128 liverpole -- updated to incorporate graff's suggestions
# 090110 liverpole -- updated to allow ref to either a scalar or list of bytes
# 090117 liverpole -- fixed bug in seek (now sysseek); allow options hash to
#                     be passed as a hash ref instead.
###############################################################################
 
# Strict
use strict;
use warnings;
 
 
# Libraries
use File::Basename;
use FileHandle;
use Data::Dumper;
use Getopt::Long;
 
 
# Globals
my $iam = basename $0;
my $b_array  = 0;
my $b_string = 0;
my $syntax = "
    syntax:  $iam  [switches]  <file>
 
    Displays a binary dump of given <file>, or of the contents of STDOUT
    if '-' is given instead of the filename.
 
    Switches:
      string .... Read file into a string, and display the string of bytes
      array ..... Read file into an array, and display the array of bytes

      start <offset> ... Starting offset (default = 0)
      end   <offset> ... Ending offset (default = 1 (eof))
      left  <string> ... Left separator char (default = '|')
      mid   <string> ... Middle separator char (default = '|')
      right <string> ... Right separator char (default = '|')
      bin   <string> ... Non-printing binary char (default = '.')
      afmt  <string> ... Address printf format (default '%08x')
      bfmt  <string> ... Bytes printf format (default '%02x')
      max   <number> ... Max bytes to display (default = -1 (entire file))
      count <number> ... Total bytes perl line (default = 16)
 
    Examples:
        Show bytes 100 through 200 of STDOUT:
            % perl dumptest.pl -start 100 -end 200 -
 
        Show all bytes of file 'binary.dat'
            % perl dumptest.pl binary.dat
 
";
 
# Command-line
my $h_opts = { };
my $result = GetOptions (
    "array"   => \$b_array,
    "string", => \$b_string,
    "start=s" => \$h_opts->{'start'},
    "end=s"   => \$h_opts->{'end'},
    "left=s"  => \$h_opts->{'left'},
    "mid=s"   => \$h_opts->{'mid'},
    "right=s" => \$h_opts->{'right'},
    "bin=s"   => \$h_opts->{'bin'},
    "afmt=s"  => \$h_opts->{'afmt'},
    "bfmt=s"  => \$h_opts->{'bfmt'},
    "max=s"   => \$h_opts->{'max'},
    "count=s" => \$h_opts->{'count'},
);
map { defined $h_opts->{$_} or delete $h_opts->{$_} } (keys %$h_opts);
 
 
# Main program
(my $fname = shift) or die $syntax;

if ($b_string or $b_array) {
    my $fh = new FileHandle;
    open($fh, "<", $fname) or die "$iam:  failed to read '$fname' ($!)\n";
    my $string;
    my $bytes;
    while (1) {
        (read($fh, $bytes, 1024) > 0) or last;
        $string .= $bytes;
    }

    if ($b_string) {
        # Display bytes read into a string
        print "=== Dump of string ===\n";
        dump_data(\$string, %$h_opts);
    }

    if ($b_array) {
        # Display bytes read into an array
        my @bytes = split(//, $string);
        print "=== Dump of array ===\n";
        dump_data([ @bytes ], %$h_opts);
    }
} else {
    # Display bytes directly from file
    print "=== Dump of file '$fname' ===\n";
    my $fh = dump_data($fname, %$h_opts);
}


 

 
###############################################################################
# dump_data()
#
# Given a filename or filehandle $1, and an optional hash (or hash reference)
# containing keys and values, displays a data dump of the given file.  If '-'
# is specified for the filename (or the filename is undefined), STDIN is used
# instead.
#
# The following optional arguments modify the behavior:
#
#   Key   => value   Description                  Default (meaning)
#  ---------------------------------------------------------------------------
#   start => offset  Starting file offset         0
#   end   => offset  Ending file offset           -1  ("end-of-file"))
#   left  => string  Left separator char          '|'
#   mid   => string  Middle separator char        '|'
#   right => string  Right separator char         '|'
#   bin   => string  Non-printing binary char     '.'
#   afmt  => string  Address printf format        '%08x'
#   bfmt  => string  Bytes printf format          '%02x'
#   max   => number  Max bytes to display         -1  (entire file)
#   count => number  Total bytes per line         16
#   out   => number  Scalar ref to store output   0   (print to STDOUT)
#
# For example, the following illustrates a dump of the first 128 bytes
# of a .JPG file with no optional arguments:
#
#   00000000|ff d8 ff e0 00 10 4a 46|49 46 00 01 01 01 00 48|......JFIF.....H
#   00000010|00 48 00 00 ff e1 25 88|45 78 69 66 00 00 49 49|.H....%.Exif..II
#   00000020|2a 00 08 00 00 00 09 00|0f 01 02 00 06 00 00 00|*...............
#   00000030|7a 00 00 00 10 01 02 00|16 00 00 00 80 00 00 00|z...............
#   00000040|12 01 03 00 01 00 00 00|01 00 00 00 1a 01 05 00|................
#   00000050|01 00 00 00 96 00 00 00|1b 01 05 00 01 00 00 00|................
#   00000060|9e 00 00 00 28 01 03 00|01 00 00 00 02 00 00 00|....(...........
#   00000070|32 01 02 00 14 00 00 00|a6 00 00 00 13 02 03 00|2...............
#
# The output format is as follows:  file offset (address) on the left, then
# the left separator char "|" followed by 16 bytes of data (with the middle
# separator char "|" splitting the data to make it more readable), followed
# by the right separator char "|" and the ascii representation of each byte
# (with "." for non-printing chars).
#
# 090110 liverpole -- Modified to allow a reference to either a scalar from
# which to read the bytes, or a list of the bytes themselves.
#
# 090117 liverpole -- Changed seek() to sysseek() and fixed the arg order.
#                     Now allow options hash to be passed as a reference.
###############################################################################
sub dump_data {
    my ($fh, @opts) = @_;

    my $iam = "dump_data";
    require FileHandle;

    # Allow @opts to be a hash -or- a hashref for convenience
    my %opts = (@opts > 0 and ref $opts[0] eq 'HASH')? %{$opts[0]}: @opts;

    # Parse options, and assign defaults
    my $h_assign = sub {
        my ($key, $default) = @_;
        defined($opts{$key})? $opts{$key}: $default
    };
    my $start = $h_assign->('start',  0);     # Starting file offset
    my $end   = $h_assign->('end',   -1);     # Ending file offset
    my $left  = $h_assign->('left',  "|");    # Left separator char
    my $mid   = $h_assign->('mid',   "|");    # Middle separator char
    my $right = $h_assign->('right', "|");    # Right separator char
    my $bin   = $h_assign->('bin',   ".");    # Non-printing binary char
    my $afmt  = $h_assign->('afmt',  "%08x"); # Address printf format
    my $bfmt  = $h_assign->('bfmt',  "%02x"); # Bytes printf format
    my $max   = $h_assign->('max',   -1);     # Max bytes to display
    my $count = $h_assign->('count', 16);     # Total bytes per line
    my $pout  = $h_assign->('out',   0);      # Scalar ref to store output
 
    # If a file (or STDIN), open it.  Make a note if either a scalar ref or
    # an array ref is passed instead of a filehandle; if so, just read each
    # byte from it.
    #
    my $b_stdin  = (!($fh || 0) or $fh eq '-');
    my $a_bytes  = 0;   # Use this for a byte array (eg. [$b0, $b1, $b3])
    my $s_bytes  = 0;   # Use this for a string of bytes (eg. \$bytes)
    my $length   = 0;   # Use this for the length of $s_bytes above
    ($end > 0) and $max = $end - $start;

    if (ref $fh eq "SCALAR") {
        $s_bytes = $fh;
    } elsif (ref $fh eq "ARRAY") {
        $a_bytes = $fh;
    } elsif (ref $fh eq "") {
        # If $fh is a filename, open it
        my $fname = $fh;
        (-e $fname) or die "$iam:  no such file '$fname'\n";
        $fh = new FileHandle;
        sysopen($fh, $fname, 0) or die "$iam:  can't read '$fname' ($!)\n";
    } elsif ($b_stdin) {
        # If $fh is '-', use STDIN
        $fh = \*STDIN;
        binmode $fh;
    }
 
    # Lexically-scoped data
    my ($c, $idx, $offs, $text, $asc) = (0, 0, 0, "", "");
    my $dlen = length(sprintf $bfmt, 255) + 1;
    my $half = ($count % 2)? 999: ($count / 2);
 
    # Adjust the filepointer to the start.  If it's an actual file, seek
    # will work, otherwise a total of $start bytes must be discarded first.
    #
    $s_bytes and $length = length $$s_bytes;
    if ($start) {
        if ($s_bytes) {
            if ($start >= $length) {
                die "$iam:  string is only $length bytes long\n";
            }
        } elsif ($a_bytes) {
            my $size = @$a_bytes;
            if ($start >= $size) {
                die "$iam:  array is only $size bytes long\n";
            }
        } elsif ($b_stdin) {
            while ($offs < $start) {
                defined($c = getc($fh)) or return;
                ++$offs;
            }
        } else {
            sysseek($fh, $offs = $start, 0);
        }
    }
 
    # Define closure to process each byte of data
    my $c_insert = sub {
        my ($byte) = @_;
        if (defined($byte) && ($end < 0 || ($start + $idx) <= $end)) {
            $idx++;
            $text .= sprintf $bfmt, $byte;
            $text .= ($idx % $count)? ($idx % $half)? " ": $mid: "";
            $asc .= ($byte < 32 || $byte > 126)? $bin: chr($byte);
            ($idx and 0 == $idx % $count) and $byte = undef;
        }
        if (!defined($byte)) {
            if ($text) {
                my $pad = 0;
                while ($idx % $count) {
                    $pad += $dlen;
                    ++$idx;
                }
                $text .= " " x ($pad - 1);
                my $out = sprintf $afmt, $offs;
                $out   .= sprintf "%s%s%s%s\n", $left, $text, $right, $asc;
                $pout and $$pout .= $out;
                $pout  or print $out;
                $offs += $count;
            }
            $text = $asc = "";
        }
    };
 
    # Process the file or list
    print "\n";
    while ($max < 0 or $max--) {
        if ($s_bytes) {
            ($start + $idx >= $length) and last;
            $c = substr($$s_bytes, $start + $idx, 1);
        } elsif ($a_bytes) {
            ($start + $idx >= @$a_bytes) and last;
            $c = $a_bytes->[$start + $idx];
        } else {
            $c = getc($fh);
        }
        defined($c) or last;
        $c_insert->(ord $c);
    }
 
    # Dump any final data, and return the filehandle
    $c_insert->();
    return $fh;
}