All-purpose data dump subroutine

This is a subroutine I've written probably several dozen times over the years, whether it was in BASIC, C, C++ or Perl.

It displays bytes of data from a file (like an od in Unix/Linux). At the left of the display is the file offset; the middle part contains the hex (or decimal) representation of each byte, and the right shows the bytes displayed as ascii characters.

For fun, I tried writing an "all-purpose" version in Perl, with the intent of being as flexible as possible; this is the result. The user can specify just about any parameter that might be useful to change. The subroutine even allows for either a filename or a filehandle as the first (and only mandatory) argument.

By default, the data is written to STDOUT, but this can be changed to write to a string, by passing the reference of the string; eg.: out => \$string.

Try playing with the different optional arguments, which are defined in the header of the subroutine comments.

Update: I've incorporated the very good suggestions made by graff below.

Update 2: See my updated code below, which lets the user pass, as the first argument, a reference, either to a string of bytes, or an array of bytes, and then displays those bytes.

#!/usr/bin/perl -w
#
# Test program for the dump_data() subroutine.
#
# 061124 liverpole -- created
# 061128 liverpole -- updated to incorporate graff's suggestions
######################################################################
+#########
 
# Strict
use strict;
use warnings;
 
 
# Libraries
use File::Basename;
use FileHandle;
use Data::Dumper;
use Getopt::Long;
 
 
# Globals
my $iam = basename $0;
my $syntax = "
    syntax:  $iam  [switches]  <file>
 
    Displays a binary dump of given <file>, or of the contents of STDO
+UT
    if '-' is given instead of the filename.
 
    Switches:
      start <offset> ... Starting offset (default = 0)
      end   <offset> ... Ending offset (default = 1 (eof))
      left  <string> ... Left separator char (default = '|')
      mid   <string> ... Middle separator char (default = '|')
      right <string> ... Right separator char (default = '|')
      bin   <string> ... Non-printing binary char (default = '.')
      afmt  <string> ... Address printf format (default '%08x')
      bfmt  <string> ... Bytes printf format (default '%02x')
      max   <number> ... Max bytes to display (default = -1 (entire fi
+le))
      count <number> ... Total bytes perl line (default = 16)
 
    Examples:
        Show bytes 100 through 200 of STDOUT:
            % perl dumptest.pl -start 100 -end 200 -
 
        Show all bytes of file 'binary.dat'
            % perl dumptest.pl binary.dat
 
";
 
# Command-line
my $popts = { };
my $result = GetOptions (
    "start=s" => \$popts->{'start'},
    "end=s"   => \$popts->{'end'},
    "left=s"  => \$popts->{'left'},
    "mid=s"   => \$popts->{'mid'},
    "right=s" => \$popts->{'right'},
    "bin=s"   => \$popts->{'bin'},
    "afmt=s"  => \$popts->{'afmt'},
    "bfmt=s"  => \$popts->{'bfmt'},
    "max=s"   => \$popts->{'max'},
    "count=s" => \$popts->{'count'},
);
map { defined $popts->{$_} or delete $popts->{$_} } (keys %$popts);
 
 
# Main program
(my $fname = shift) or die $syntax;
my $fh = dump_data($fname, %$popts);
 
 
######################################################################
+#########
# dump_data()
#
# Given a filename or filehandle $1, and an optional hash of arguments
+ and
# values, displays a data dump of the given file.  If '-' is specified
+ for
# the filename (or the filename is undefined), STDIN is used instead.
#
# The following optional arguments modify the behavior:
#
#   Key   => value   Description                  Default (meaning)
#  -------------------------------------------------------------------
+--------
#   start => offset  Starting file offset         0
#   end   => offset  Ending file offset           -1  ("end-of-file"))
#   left  => string  Left separator char          '|'
#   mid   => string  Middle separator char        '|'
#   right => string  Right separator char         '|'
#   bin   => string  Non-printing binary char     '.'
#   afmt  => string  Address printf format        '%08x'
#   bfmt  => string  Bytes printf format          '%02x'
#   max   => number  Max bytes to display         -1  (entire file)
#   count => number  Total bytes per line         16
#   out   => number  Scalar ref to store output   0   (print to STDOUT
+)
#
# For example, the following illustrates a dump of the first 128 bytes
# of a .JPG file with no optional arguments:
#
#   00000000|ff d8 ff e0 00 10 4a 46|49 46 00 01 01 01 00 48|......JFI
+F.....H
#   00000010|00 48 00 00 ff e1 25 88|45 78 69 66 00 00 49 49|.H....%.E
+xif..II
#   00000020|2a 00 08 00 00 00 09 00|0f 01 02 00 06 00 00 00|*........
+.......
#   00000030|7a 00 00 00 10 01 02 00|16 00 00 00 80 00 00 00|z........
+.......
#   00000040|12 01 03 00 01 00 00 00|01 00 00 00 1a 01 05 00|.........
+.......
#   00000050|01 00 00 00 96 00 00 00|1b 01 05 00 01 00 00 00|.........
+.......
#   00000060|9e 00 00 00 28 01 03 00|01 00 00 00 02 00 00 00|....(....
+.......
#   00000070|32 01 02 00 14 00 00 00|a6 00 00 00 13 02 03 00|2........
+.......
#
# The output format is as follows:  file offset (address) on the left,
+ then
# the left separator char "|" followed by 16 bytes of data (with the m
+iddle
# separator char "|" splitting the data to make it more readable), fol
+lowed
# by the right separator char "|" and the ascii representation of each
+ byte
# (with "." for non-printing chars).
######################################################################
+#########
sub dump_data {
    my ($fh, %opts) = @_;
 
    # Parse options, and assign defaults
    my $passign = sub {
        my ($key, $default) = @_;
        defined($opts{$key})? $opts{$key}: $default
    };
    my $start = $passign->('start',  0);     # Starting file offset
    my $end   = $passign->('end',   -1);     # Ending file offset
    my $left  = $passign->('left',  "|");    # Left separator char
    my $mid   = $passign->('mid',   "|");    # Middle separator char
    my $right = $passign->('right', "|");    # Right separator char
    my $bin   = $passign->('bin',   ".");    # Non-printing binary cha
+r
    my $afmt  = $passign->('afmt',  "%08x"); # Address printf format
    my $bfmt  = $passign->('bfmt',  "%02x"); # Bytes printf format
    my $max   = $passign->('max',   -1);     # Max bytes to display
    my $count = $passign->('count', 16);     # Total bytes per line
    my $pout  = $passign->('out',   0);      # Scalar ref to store out
+put
 
    # If a file (or STDIN), open it
    my $b_stdin = (!($fh || 0) or $fh eq '-');
    if ($b_stdin) {
        # If $fh is '-', use STDIN
        $fh = \*STDIN;
        binmode $fh;
    } elsif (ref $fh eq "") {
        # If $fh is a filename, open it
        my $fname = $fh;
        (-e $fname) or die "$iam:  no such file '$fname'\n";
        $fh = new FileHandle;
        sysopen($fh, $fname, 0) or die "$iam:  can't read '$fname' ($!
+)\n";
    }
 
    # Lexically-scoped data
    my ($c, $idx,$offs, $txt, $asc) = (0, 0, 0, "", "");
    my $dlen = length(sprintf $bfmt, 255) + 1;
    my $half = ($count % 2)? 999: ($count / 2);
 
    # Adjust the filepointer to the start.  If it's an actual file, se
+ek
    # will work, otherwise a total of $start bytes must be discarded f
+irst.
    #
    if ($start) {
        if (!$b_stdin) {
            seek($fh, 0, $offs = $start);
        } else {
            while ($offs < $start) {
                defined($c = getc($fh)) or return;
                ++$offs;
            }
        }
    }
 
    # Define closure to process each byte of data
    my $pinsert = sub {
        my ($byte) = @_;
        if (defined($byte) && ($end < 0 || ($start + $idx) <= $end)) {
            $idx++;
            $txt .= sprintf $bfmt, $byte;
            $txt .= ($idx % $count)? ($idx % $half)? " ": $mid: "";
            $asc .= ($byte < 32 || $byte > 126)? $bin: chr($byte);
            ($idx and 0 == $idx % $count) and $byte = undef;
        }
        if (!defined($byte)) {
            if ($txt) {
                my $pad = 0;
                while ($idx % $count) {
                    $pad += $dlen;
                    ++$idx;
                }
                $txt .= " " x ($pad - 1);
                my $out = sprintf $afmt, $offs;
                $out   .= sprintf "%s%s%s%s\n", $left, $txt, $right, $
+asc;
                $pout and $$pout .= $out;
                $pout  or print $out;
                $offs += $count;
            }
            $txt = $asc = "";
        }
    };
 
    # Process the file
    print "\n";
    while (($max < 0 or $max--) and defined($c = getc($fh))) {
        $pinsert->(ord $c);
    }
 
    # Dump any final data, and return the filehandle
    $pinsert->();
    return $fh;
}
[download]

Comment on All-purpose data dump subroutine Select or Download Code

Replies are listed 'Best First'.
Re: All-purpose data dump subroutine by graff (Chancellor) on Nov 27, 2006 at 03:30 UTC
It can often be very useful to apply this sort of treatment to data being supplied on STDIN (in fact, I wouldn't call it "all-purpose" if it didn't support reading from STDIN). This would mean skipping the "sysopen()" call (and doing "binmode STDIN" instead) when the "$fh" arg is, e.g. "-" or "stdin" or \*STDIN, or whatever you want to support in this regard. Also, if the given option hash wants a non-zero start offset, you'll have to just read through and do nothing for that many bytes, since you can't use seek() on STDIN. These are minor issues that are easy patch in, given how you've written the code. The other thing is to have a "main()" function (a runnable script) that provides all the suitable command-line options (e.g. via Getopt::Long) that would allow the user to exercise the full potential of the dump_data subroutine (i.e. manipulate all the elements of %opts). Again, this is a pretty simple addition, given the code you've posted. Altogether, thanks for posting this, and ++!	[reply]
Re^2: All-purpose data dump subroutine by liverpole (Monsignor) on Nov 28, 2006 at 16:39 UTC
Some very nice comments! I've updated the code to work with STDIN as you suggested. Supplying '-' in place of the filename will achieve this. Also, as you've suggested, I added switches to test each of the parameters (except for that of writing the data to a string instead of STDOUT). Similarly, I've modified the syntax message to list each option. s''(q.S:$/9=(T1';s;(..)(..);$..=substr+crypt($1,$2),2,3;eg;print$..$/	[reply]
Re: All-purpose data dump subroutine by liverpole (Monsignor) on Jan 11, 2009 at 00:33 UTC
I've updated the subroutine `dump_data()` to allow the user to pass a reference, either to a string of bytes or a list of bytes, which are then displayed. Two new switches, --string and --array allow the testing of those cases. The original code is left above in case any errors were inadvertently introduced. Update: I've changed `seek` to `sysseek`, and fixed a bug in the order of its arguments. Additionally, a hash reference can be passed in place of the options hash. #!/usr/bin/perl -w # # Test program for the dump_data() subroutine. # # 061124 liverpole -- created # 061128 liverpole -- updated to incorporate graff's suggestions # 090110 liverpole -- updated to allow ref to either a scalar or list +of bytes # 090117 liverpole -- fixed bug in seek (now sysseek); allow options h +ash to # be passed as a hash ref instead. ###################################################################### +######### # Strict use strict; use warnings; # Libraries use File::Basename; use FileHandle; use Data::Dumper; use Getopt::Long; # Globals my $iam = basename $0; my $b_array = 0; my $b_string = 0; my $syntax = " syntax: $iam [switches] <file> Displays a binary dump of given <file>, or of the contents of STDO +UT if '-' is given instead of the filename. Switches: string .... Read file into a string, and display the string of b +ytes array ..... Read file into an array, and display the array of by +tes start <offset> ... Starting offset (default = 0) end <offset> ... Ending offset (default = 1 (eof)) left <string> ... Left separator char (default = '\|') mid <string> ... Middle separator char (default = '\|') right <string> ... Right separator char (default = '\|') bin <string> ... Non-printing binary char (default = '.') afmt <string> ... Address printf format (default '%08x') bfmt <string> ... Bytes printf format (default '%02x') max <number> ... Max bytes to display (default = -1 (entire fi +le)) count <number> ... Total bytes perl line (default = 16) Examples: Show bytes 100 through 200 of STDOUT: % perl dumptest.pl -start 100 -end 200 - Show all bytes of file 'binary.dat' % perl dumptest.pl binary.dat "; # Command-line my $h_opts = { }; my $result = GetOptions ( "array" => \$b_array, "string", => \$b_string, "start=s" => \$h_opts->{'start'}, "end=s" => \$h_opts->{'end'}, "left=s" => \$h_opts->{'left'}, "mid=s" => \$h_opts->{'mid'}, "right=s" => \$h_opts->{'right'}, "bin=s" => \$h_opts->{'bin'}, "afmt=s" => \$h_opts->{'afmt'}, "bfmt=s" => \$h_opts->{'bfmt'}, "max=s" => \$h_opts->{'max'}, "count=s" => \$h_opts->{'count'}, ); map { defined $h_opts->{$_} or delete $h_opts->{$_} } (keys %$h_opts); # Main program (my $fname = shift) or die $syntax; if ($b_string or $b_array) { my $fh = new FileHandle; open($fh, "<", $fname) or die "$iam: failed to read '$fname' ($!) +\n"; my $string; my $bytes; while (1) { (read($fh, $bytes, 1024) > 0) or last; $string .= $bytes; } if ($b_string) { # Display bytes read into a string print "=== Dump of string ===\n"; dump_data(\$string, %$h_opts); } if ($b_array) { # Display bytes read into an array my @bytes = split(//, $string); print "=== Dump of array ===\n"; dump_data([ @bytes ], %$h_opts); } } else { # Display bytes directly from file print "=== Dump of file '$fname' ===\n"; my $fh = dump_data($fname, %$h_opts); } ###################################################################### +######### # dump_data() # # Given a filename or filehandle $1, and an optional hash (or hash ref +erence) # containing keys and values, displays a data dump of the given file. + If '-' # is specified for the filename (or the filename is undefined), STDIN +is used # instead. # # The following optional arguments modify the behavior: # # Key => value Description Default (meaning) # ------------------------------------------------------------------- +-------- # start => offset Starting file offset 0 # end => offset Ending file offset -1 ("end-of-file")) # left => string Left separator char '\|' # mid => string Middle separator char '\|' # right => string Right separator char '\|' # bin => string Non-printing binary char '.' # afmt => string Address printf format '%08x' # bfmt => string Bytes printf format '%02x' # max => number Max bytes to display -1 (entire file) # count => number Total bytes per line 16 # out => number Scalar ref to store output 0 (print to STDOUT +) # # For example, the following illustrates a dump of the first 128 bytes # of a .JPG file with no optional arguments: # # 00000000\|ff d8 ff e0 00 10 4a 46\|49 46 00 01 01 01 00 48\|......JFI +F.....H # 00000010\|00 48 00 00 ff e1 25 88\|45 78 69 66 00 00 49 49\|.H....%.E +xif..II # 00000020\|2a 00 08 00 00 00 09 00\|0f 01 02 00 06 00 00 00\|........ +....... # 00000030\|7a 00 00 00 10 01 02 00\|16 00 00 00 80 00 00 00\|z........ +....... # 00000040\|12 01 03 00 01 00 00 00\|01 00 00 00 1a 01 05 00\|......... +....... # 00000050\|01 00 00 00 96 00 00 00\|1b 01 05 00 01 00 00 00\|......... +....... # 00000060\|9e 00 00 00 28 01 03 00\|01 00 00 00 02 00 00 00\|....(.... +....... # 00000070\|32 01 02 00 14 00 00 00\|a6 00 00 00 13 02 03 00\|2........ +....... # # The output format is as follows: file offset (address) on the left, + then # the left separator char "\|" followed by 16 bytes of data (with the m +iddle # separator char "\|" splitting the data to make it more readable), fol +lowed # by the right separator char "\|" and the ascii representation of each + byte # (with "." for non-printing chars). # # 090110 liverpole -- Modified to allow a reference to either a scalar + from # which to read the bytes, or a list of the bytes themselves. # # 090117 liverpole -- Changed seek() to sysseek() and fixed the arg or +der. # Now allow options hash to be passed as a referen +ce. ###################################################################### +######### sub dump_data { my ($fh, @opts) = @_; my $iam = "dump_data"; require FileHandle; # Allow @opts to be a hash -or- a hashref for convenience my %opts = (@opts > 0 and ref $opts[0] eq 'HASH')? %{$opts[0]}: @o +pts; # Parse options, and assign defaults my $h_assign = sub { my ($key, $default) = @_; defined($opts{$key})? $opts{$key}: $default }; my $start = $h_assign->('start', 0); # Starting file offset my $end = $h_assign->('end', -1); # Ending file offset my $left = $h_assign->('left', "\|"); # Left separator char my $mid = $h_assign->('mid', "\|"); # Middle separator char my $right = $h_assign->('right', "\|"); # Right separator char my $bin = $h_assign->('bin', "."); # Non-printing binary ch +ar my $afmt = $h_assign->('afmt', "%08x"); # Address printf format my $bfmt = $h_assign->('bfmt', "%02x"); # Bytes printf format my $max = $h_assign->('max', -1); # Max bytes to display my $count = $h_assign->('count', 16); # Total bytes per line my $pout = $h_assign->('out', 0); # Scalar ref to store ou +tput # If a file (or STDIN), open it. Make a note if either a scalar r +ef or # an array ref is passed instead of a filehandle; if so, just read + each # byte from it. # my $b_stdin = (!($fh \|\| 0) or $fh eq '-'); my $a_bytes = 0; # Use this for a byte array (eg. [$b0, $b1, $b +3]) my $s_bytes = 0; # Use this for a string of bytes (eg. \$bytes) my $length = 0; # Use this for the length of $s_bytes above ($end > 0) and $max = $end - $start; if (ref $fh eq "SCALAR") { $s_bytes = $fh; } elsif (ref $fh eq "ARRAY") { $a_bytes = $fh; } elsif (ref $fh eq "") { # If $fh is a filename, open it my $fname = $fh; (-e $fname) or die "$iam: no such file '$fname'\n"; $fh = new FileHandle; sysopen($fh, $fname, 0) or die "$iam: can't read '$fname' ($! +)\n"; } elsif ($b_stdin) { # If $fh is '-', use STDIN $fh = \STDIN; binmode $fh; } # Lexically-scoped data my ($c, $idx, $offs, $text, $asc) = (0, 0, 0, "", ""); my $dlen = length(sprintf $bfmt, 255) + 1; my $half = ($count % 2)? 999: ($count / 2); # Adjust the filepointer to the start. If it's an actual file, se +ek # will work, otherwise a total of $start bytes must be discarded f +irst. # $s_bytes and $length = length $$s_bytes; if ($start) { if ($s_bytes) { if ($start >= $length) { die "$iam: string is only $length bytes long\n"; } } elsif ($a_bytes) { my $size = @$a_bytes; if ($start >= $size) { die "$iam: array is only $size bytes long\n"; } } elsif ($b_stdin) { while ($offs < $start) { defined($c = getc($fh)) or return; ++$offs; } } else { sysseek($fh, $offs = $start, 0); } } # Define closure to process each byte of data my $c_insert = sub { my ($byte) = @_; if (defined($byte) && ($end < 0 \|\| ($start + $idx) <= $end)) { $idx++; $text .= sprintf $bfmt, $byte; $text .= ($idx % $count)? ($idx % $half)? " ": $mid: ""; $asc .= ($byte < 32 \|\| $byte > 126)? $bin: chr($byte); ($idx and 0 == $idx % $count) and $byte = undef; } if (!defined($byte)) { if ($text) { my $pad = 0; while ($idx % $count) { $pad += $dlen; ++$idx; } $text .= " " x ($pad - 1); my $out = sprintf $afmt, $offs; $out .= sprintf "%s%s%s%s\n", $left, $text, $right, +$asc; $pout and $$pout .= $out; $pout or print $out; $offs += $count; } $text = $asc = ""; } }; # Process the file or list print "\n"; while ($max < 0 or $max--) { if ($s_bytes) { ($start + $idx >= $length) and last; $c = substr($$s_bytes, $start + $idx, 1); } elsif ($a_bytes) { ($start + $idx >= @$a_bytes) and last; $c = $a_bytes->[$start + $idx]; } else { $c = getc($fh); } defined($c) or last; $c_insert->(ord $c); } # Dump any final data, and return the filehandle $c_insert->(); return $fh; } [download] s''(q.S:$/9=(T1';s;(..)(..);$..=substr+crypt($1,$2),2,3;eg;print$..$/	[reply] [d/l] [select]

Back to Cool Uses for Perl