#!/usr/bin/perl
=head1 NAME
uhead -- unicode-aware version of unix "head"
=head1 SYNOPSIS
uhead -c N [file ...] show first N unicode chars from file(s)
=head1 DESCRIPTION
This does what the standard "head -c N" command (GNU version) would do
(i.e. show the first N characters from one or more files), with just
the following differences:
=over 4
=item *
The "-c N" option is required (not optional)
=item *
N refers to a number of UTF-8 encoded unicode characters rather than
bytes
=item *
"Negative" values for N are not supported (you cannot elect to view
all but the last N characters)
=back
If no files are provided on the command line, it will read from STDIN
instead. (But if it notices that STDIN is actually the user's tty, not
a pipe or redirection from a file, it will exit with a suitable error
message.)
=head1 AUTHOR
David Graff <graff(at)ldc.upenn.edu>
=cut
use strict;
my $Usage = "Usage: $0 -c N [file ...]\n";
die $Usage unless ( @ARGV > 1 and $ARGV[0] eq '-c' and
$ARGV[1] =~ /^\d+$/ );
shift;
my $show_chrs = shift;
if ( -t ) {
@ARGV or die "You need to provide some data (pipe or file(s))\n$Us
+age";
}
else {
@ARGV = ( '__STDIN__' );
}
binmode STDOUT, ":utf8";
my $nfiles = @ARGV;
while ( @ARGV ) {
my $file = shift;
my $head;
if ( $file eq '__STDIN__' ) {
binmode STDIN, ":utf8";
read STDIN, $head, $show_chrs;
}
else {
if ( open( I, "<:utf8", $file )) {
read I, $head, $show_chrs;
}
else {
warn "open failed on $file\n";
next;
}
}
print "\n==> $file <==\n" if ( $nfiles > 1 );
print $head,"\n";
}
|