Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl Monk, Perl Meditation
 
PerlMonks  

file-search: File search with minimum horizontal scrolling of output

by parv (Parson)
on Oct 08, 2006 at 04:51 UTC ( [id://576937]=sourcecode: print w/replies, xml ) Need Help??
Category: Text processing
Author/Contact Info Parv, email (parv underscore at yahoo dot com) or /msg
Description:

(From POD) The main reason for existence of this program is to minimize horizontal scrolling by displaying the file name only once (on a line of its own) before display of the lines matched, and by collapsing tabs and multiple spaces. Other reasons are to strip the current directory from the file name paths, and to have matched text highlighted.

Updated version can be found in http://www103.pair.com/parv/comp/src/perl/ as file-search-\d+[.]\d{2}


#!/usr/local/bin/perl

$VERSION = '0.01';

use warnings; use strict;

use File::Spec;
use Pod::Usage;
use Getopt::Long qw(:config gnu_compat no_ignore_case no_debug);

my %default_opt =
  (
    'show-default-opt' => undef
  , 'verbose' => undef

  , 'ignore-case' => undef
  , 'invert-match' => undef

  , 'bin-search' => undef
  , 'recursive' => undef

  , 'line-numbers' => 1
  , 'number-width' => 4

  , 'collapse-space' => 1
  , 'blank-line' => undef
  , 'highlight' => 1

  , 'cleanse-path' => 1
  , 'realpath' => undef
  , 'strip-curdir' => 1
  );

my %opt = %default_opt;
my ( $pattern , @files ) = process_args();

my $re_flags = 'x';
$re_flags .= 'i' if $opt{'ignore-case'};
($pattern) = map qr($_) , "(?$re_flags)$pattern" ;

#  Used elsewhere in multiple places.
my $spec = 'File::Spec';
foreach my $file ( @files )
{
  search_file( $file , $pattern , \%opt );
}

exit;

sub search_file
{
  my ( $file , $pattern , $opt ) = @_;

  unless ( defined $pattern )
  {
    die "Pattern is not defined\n";
    return;
  }

  return if -l $file ;

  if ( -d _ )
  {
    search_directory( $file , $pattern , $opt ) if $opt->{'recursive'}
+ ;
    return;
  }

  unless ( -T $file || $opt->{'bin-search'} )
  {
    warn "Failed 'ASCII test', skipped $file\n" if $opt->{'verbose'};
    return;
  }

  if ( ! -f _ )
  {
    warn "Non-regular file, skipped $file\n" if $opt->{'verbose'};
    return;
  }

  my $save;
  return unless real_search( $file , \$save , $opt );

  my $alt = get_alternate_name( $file , $opt ) || $file ;
  $alt = make_term_bold( $alt , qr{.+} ) if $opt->{'highlight'} ;

  printf "==>>>  %s\n" , $alt;
  print $save ;
  print "\n";
}

sub search_directory
{
  my ( $dir , $re , $opt ) = @_;

  my $dh;
  unless ( opendir $dh , $dir )
  {
    warn "Cannot open directory $dir: $!";
    return;
  }

  while ( my $file = readdir $dh )
  {
    next if $file eq $spec->curdir or $file eq $spec->updir ;
    search_file( $spec->catfile( $dir , $file ) , $re , $opt );
  }

  return;
}

sub real_search
{
  my ( $file , $save , $opt ) = @_;

  my ( $in , $close ) = open_file( $file );
  return unless $in;

  my ( $lines , $matches );
  my $space = qr{ [ \t]+ }x;
  my $result = result_format( $opt );
  while ( my $line = <$in> )
  {
    if ( $opt->{'invert-match'} )
    {
      next if $line =~ m/$pattern/;
    }
    else
    {
      next if $line !~ m/$pattern/;
    }
    $lines++;

    if ( $opt->{'collapse-space'} )
    {
      $line =~ s/^$space//;
      $line =~ s/$space+$//;
      $line =~ s/$space/ /g;
    }

    if ( !$opt->{'invert-match'}  && $opt->{'highlight'} )
    {
      $line = make_term_bold( $line , $pattern );
    }

    $$save .= $result->( $. , $line );
  }

  $close->();
  return $lines;
}

sub result_format
{
  my ( $opt ) = @_;

  my $line_fmt = '%' . $opt->{'number-width'} . "d  %s";
  my $newline = $opt->{'blank-line'} ? "\n" : '';
  return
    $opt->{'line-numbers'}
    ? sub { sprintf $line_fmt , $_[0] , $_[1]  . $newline }
    : sub { $_[1] . $newline } ;
}

sub make_term_bold
{
  my ( $in , $re ) = @_;
  my ( $bold , $norm ) = ( "\e[1m" , "\e[0m" );
  $in =~ s/($re)/$bold$1$norm/g;
  return $in;
}

sub get_alternate_name
{
  my ( $name , $opt ) = @_;

  return unless $opt;

  if ( $opt->{'realpath'} || $opt->{'strip-curdir'} )
  {
    require Cwd;
    import Cwd qw( abs_path getcwd );
    $name = abs_path( $name );
    $name = strip_current_dir( $name ) if $opt->{'strip-curdir'};
    return $name;
  }

  return $name if !$opt->{'cleanse-path'};
  return $spec->canonpath( $name );
}

sub strip_current_dir
{
  my ( $path ) = @_;
  my $curdir = quotemeta getcwd();
  $path =~ s{^ $curdir / }//x;
  return $path;
}

sub open_file
{
  my ( $file ) = @_;
  my $fh;
  unless ( open $fh , '<' , $file )
  {
    warn "Cannot open $file: $!";
    return;
  }
  return ( $fh , sub { close $fh or die "Cannot close $file: $!" } ) ;
}

sub process_args
{
  GetOptions
  (
    'h|help' => \$opt{'help'}

  , 'D|show-default-opt' => \$opt{'show-default-opt'}
  , 'S|show-set-opt'     => \$opt{'show-set-opt'}

  , 'q|quiet'  => sub { $opt{'verbose'} = undef }
  , 'verbose+' => \$opt{'verbose'}

  , 'v|invert-match' => \$opt{'invert-match'}
  , 'i|ignore-case!' => \$opt{'ignore-case'}

  , 'r|recursive!' => \$opt{'recursive'}
  , 'B|bin-search' => \$opt{'bin-search'}

  , 'n|line-numbers!'  => \$opt{'line-numbers'}
  , 'w|number-width=i' => \$opt{'number-width'}

  , 'C|collapse-space!'    => \$opt{'collapse-space'}
  , 'b|blank-line!' => \$opt{'blank-line'}
  , 'H|highlight!'  => \$opt{'highlight'}

  , 'c|cleanse-path!' => \$opt{'cleanse-path'}
  , 'R|realpath!'     => \$opt{'realpath'}
  , 's|strip-curdir!' => \$opt{'strip-curdir'}
  )
    || die pod2usage('-exitval'  => 2 , '-verbose'  => 1);


  show_options( $opt{'show-set-opt'} ? \%opt : \%default_opt , 'exit' 
+)
    if $opt{'show-default-opt'} or $opt{'show-set-opt'} ;

  pod2usage('-exitval' => 0 , '-verbose' => 3)
    if $opt{'help'};


  #  Check if any arguments remain which will be file names
  pod2usage( '-msg' => ''
            , '-exitval' => 1
            , '-verbose' => 1
          )
    if scalar @ARGV < 2;

  return @ARGV;
}

sub show_options
{
  my ( $opt , $exit ) = @_;

  my $out;
  my $max = ( sort { $b <=> $a } map length( $_ ) , keys %{ $opt } )[0
+];
  $max++;
  my $fmt = '%' . $max . "s: %s\n";

  foreach my $k ( sort keys %{ $opt } )
  {
    my $v = $opt->{ $k };
    $out .=
      sprintf $fmt
        , $k , ( !$v ? 'no' : $v == 1 ? 'yes' : $v ) ;
  }
  print $out;
  exit 0 if $exit;
}


__END__

=pod

=head1 NAME

file-search - Search for regular expressions in text files.

=head1 SYNOPSIS

To see default options ...

  file-search -show-default-opt


To search case-insensitively, recursively, highlight text matched,
and preserve spaces & tabs ...

  file-search \
    -ignore-case -recursive -highlight -nocollapse-space \
    '(pat|s)tern' \
    file(s) | directory(ies)


=head1 DESCRIPTION

The main reason for existence of this program is to minimize
horizontal scrolling by displaying the file name only once (on a line
of its own) before display of the lines matched, and by collapsing
tabs and multiple spaces.  Other reasons are to strip the current
directory from the file name paths, and to have matched text
highlighted.

Below are first few lines of output of
C<file-search collapse file-search> with default options, namely
highlight the matched text (actual escpace character has been replaced
by '\e' solely to keep this file "text") ...

  ==>>>  \e[1mfile-search\e[0m
    23  , '\e[1mcollapse\e[0m-space' => 1
   131  if ( $opt->{'\e[1mcollapse\e[0m-space'} )
   230  , 'C|\e[1mcollapse\e[0m-space!' => \$opt{'\e[1mcollapse\e[0m-s
+pace'}
   ...


After the options have been taken into account, first parameter is
taken to be a Perl regular experssion, and rest as the files to search
for.  Directories are skipped if I<-recursive> option is not given.

=head1 OPTIONS

Some of the options can be negated by prefixing it with "no" as listed
below; the last option will override preivous one.  For exmaple, If
I<-norecursive> is followed by I<-recursive>, files will be
recursively searched when a directory is encountered.

=head2 General Options

=over 4

=item B<-help> | B<-h>

Shows this message.

=item B<-quiet> | B<-q>

Produce grave error messages only.

=item B<-show-default-opt> | B<-D>

Show default options.

=item B<-show-set-opt> | B<-S>

Show options given on command line.

=item B<-verbose>

Produce extra messages.  Specifying it multiple times causes more outp
+ut
accordingly.

=back

=head2 Search & Display Options

=over 4

=item B<-bin-search> | B<-B>

Search through binary files (as determined by L<-T> function).

=item B<-blank-line> | B<-b>

Put a blank line after each line where the pattern matches.

B<-noblank-line> turns off this option.

=item B<-cleanse-path> | B<-c>

Do sane path clean up (for the file name to De displayed) without
touching the file system.  See L<File::Spec::canonpath()>.

B<-nocleanse-path> turns off this option.

=item B<-collapse-space> | B<-C>

Display lines after removing multiple spaces and tabs.

B<-nocollapse-space> turns off this option.

=item B<-highlight> | B<-H>

Highlight the text which matches given pattern.

B<-nohighlight> turns off this option.

=item B<-ignore-case> | B<-i>

Do case-insensitive pattern matching.

B<-noignore-case> turns off this option.

=item B<-invert-match> | B<-v>

Show lines which do not match the given pattern(s).

=item B<-line-numbers> | B<-n>

Show line numbers.

B<-noline-numbers> turns off this option.

=item B<-number-width> I<number> | B<-w> I<number>

Number of characters to use to format line numbers.

=item B<-realpath> | B<-R>

Display real path of a file.  See L<realpath(3)>.

B<-norealpath> turns off this option.

=item B<-recursive> | B<-r>

If a directory is given as one of the files, search through all the fi
+les in it.

B<-norecursive> turns off this option.

=item B<-strip-curdir> | B<-s>

Remove current directory path from file names displayed.

B<-nostrip-curdir> turns off this option.

=back


=head1 DEPENDENCY

=over 2

=item *

Cwd

=item *

File::Spec

=item *

Getopt::Long

=back

=head1 TO DO

=over 2

=item *

Ability to see context of given number of lines.

=item *

Possibly use Term::* module(s) to highlight.

=back

=head1 BUGS

=over 2

=item *

When highlighting is used, raw sequence is inserted in the output.
That seems to work well for L<xterm(1)> in my environment, but may not
elsewhere.  Piping output to C<less -R> seems to work too under
Cygwin-X in L<xterm(1)>.

=back

=head1 SEE ALSO

L<grep(1)>

=head1 AUTHOR, LICENSE, DISTRIBUTION, ETC.

Parv, parv_@yahoo.com

MODIFIED:  Oct 07 2006

This software is free to be used in any form only if proper credit is
given.  I am not responsible for any kind of damage or loss.  Use it
at your own risk.

=cut

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://576937]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others lurking in the Monastery: (5)
As of 2024-03-29 13:35 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found