Beefy Boxes and Bandwidth Generously Provided by pair Networks
Pathologically Eclectic Rubbish Lister
 
PerlMonks  

diotalevi's grep

by diotalevi (Canon)
on Sep 14, 2006 at 22:29 UTC ( #573020=sourcecode: print w/ replies, xml ) Need Help??

Category: Utility Scripts
Author/Contact Info
Description:

This grep is much like everyone else's perl reimplementation of grep. It's only distinguishing features are automatically looking inside bzip2, gzip, zip, and tar files. It borrows the pretty formatting used by petdance in ack.

This started life as an improved version of the grep that comes with the Solaris which isn't recursive.

#!/usr/bin/perl
## no critic VersionVar
use strict;
use warnings;

use Getopt::Long 'GetOptions';
use autouse 'File::Find'      => 'find';
use autouse 'Pod::Usage'      => 'pod2usage';
use autouse 'Term::ANSIColor' => 'colored';

use autouse 'IPC::Open3' => 'open3';
$SIG{CHLD} = 'IGNORE';

use vars qw( $TextOnly );

main();
exit;

sub main {

    # Fetch parameters.
    GetOptions(
        man  => sub { pod2usage( -verbose => 2 ) },
        help => sub { pod2usage( -verbose => 1 ) },

        t        => \$TextOnly,
        l        => \my ($filename_only),
        w        => \my ($word),
        i        => \my ($ignore_case),
        Q        => \my ($quotemeta),
        h        => \my ($no_filename),
        n        => \my ($line_no),
        R        => \my ($no_recursive),
        v        => \my ($invert_match),
        plain    => \my ($no_ansicolor),
        'name=s' => \my ($filename_rx),
        )
        or pod2usage( -verbose => 0 );
    my ( $match, @srcs ) = @ARGV;
    if ( not @srcs ) {
        @srcs = '.';    ## no critic Noisy
    }

    # Validate parameters.
    if ( not defined $match ) {
        pod2usage( -verbose => 0 );
    }

    # Pre-process the pattern and then compile it.
    if ($quotemeta) {
        $match = quotemeta $match;
    }
    if ($word) {
        $match = "\\b$match\\b";
    }
    if ($ignore_case) {
        $match = "(?i)$match";
    }
    my $match_rx = qr/$match/;

    # Get a function which formats the output for whatever was
    # requested. All info is passed through the globals
    # $File::Find::rel_name, $., and $_. The input will contain
    # whatever linebreak is currently active so most things don't need
    # to add one.
    my $prev_file = '';
    my $formatter = (
        $line_no && $no_filename
        ? sub { "$.:" . shift }
        : $line_no ? sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' ) . "\n
+$.:"
                        . shift );
            }
            else {
                return "$.:" . shift;
            }
            }
        : $no_filename   ? sub {shift}
        : $filename_only ? sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' )
                        . "\n" );
            }
            else {
                return;
            }
            }
        : sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' ) . "\n
+"
                        . shift );
            }
            else {
                return shift;
            }
        }
    );

    my $grep_file_fn = sub {
        grep_file(
            ignore_rcs   => 1,
            plain        => $no_ansicolor,
            match_rx     => $match_rx,
            filename_rx  => $filename_rx,
            formatter    => $formatter,
            invert_match => $invert_match,
            match_once   => $filename_only
        );
    };

    # Here's the main loop. For each source directory/file, search it.
    for my $src (@srcs) {

        # Examine all files in $src.
        if ($no_recursive) {

            # Mimic the API of File::Find for grep_file().
            # local $File::Find::dir = unimplemented
            ## no critic
            local $File::Find::name = $src;
            local $_                = $src;

            $grep_file_fn->();
        }
        else {
            find( $grep_file_fn, $src );
        }
    }

    return 0;
}

sub open_file_harder {
    my ($filename) = @_;
    return if not defined $filename;

    if ( my ($extension) = $filename =~ /(\.[^.]+)\z/mx ) {
        my @readers = (
            [ qr/\.t(?:ar\.)?gz\z/ => qw( gzcat ),    $filename ],
            [ qr/\.zip\z/,         => qw( unzip -p ), $filename ],
            [ qr/\.Z\z/            => qw( zcat ),     $filename ],
            [ qr/\.gz\z/           => qw( gzcat ),    $filename ],
            [ qr/\.bz2\z/          => qw( bzcat ),    $filename ],
        );
        for my $reader (@readers) {
            my ( $pattern, @command ) = @{$reader};
            if ( $extension =~ $pattern ) {
                open3( undef, my $fh, undef, @command );
                return $fh;
            }
        }
    }

    open my $fh, '<', $filename
        or die "Couldn't open $filename: $!";
    return $fh;
}

sub grep_file {
    my %p            = @_;
    my $match_rx     = $p{match_rx};
    my $formatter    = $p{formatter};
    my $invert_match = $p{invert_match};
    my $plain        = $p{plain};
    my $match_once   = $p{match_once};

    my $filename = $_;

    # Ignore CVS stuff.
    return if $File::Find::name =~ m{/CVS/?};

    # If there is a pattern required of filenames, try that one
    # first. This requires no checks to the FS so I'm doing this
    # before the next stuff.
    return
        if defined $p{filename_rx}
        and not $filename =~ $p{filename_rx};

    # Ignore non-existant files.
    return if not -f $filename;

    # Ignore non-text files if that's what was requested.
    return if $TextOnly and not -T _;

    eval {
        my $fh = open_file_harder($filename);

    LINE: while ( my $line = <$fh> ) {

            # If the line matches the pattern print it as a formatted
            # line.
            my $matched;

            if ($plain) {
                $matched = ( $line =~ /$match_rx/mx );
            }
            else {
                $matched = ( $line =~ /$match_rx/mx );

                $line
                    =~ s/((?:$match_rx)+)/ colored( "$1", 'yellow on_b
+lack' ) /gemx;
            }

            # Given    Match         then exclusive or is great here.
            #            0   1
            #          +---+---+
            # Invert 0 |   | X |
            #        1 | X |   |
            if ( $matched xor $invert_match ) {
                print $formatter->($line);
                last LINE if $match_once;
            }
        }
    };

    return 1;
}

__END__

=head1 NAME

dgrep - A recursive grep that uses perl regular expressions.

=head1 SYNOPSIS

dgrep [options] [file ...]

 Options:
   -help      Prints this help message
   -man       Prints the manual

   -t         Searches only `text' files
   -w         Matches only "words" using \b...\b
   -i         Case-insensitive matching
   -Q         Ignore perl meta-characters

   -v         Invert output, match lines that don't match the pattern
   -h         Exclude filename from output
   -n         Include line number in output

   -R         Disable recursion, no directories.
   -plain     Disable highliting of matched text
   -name EXPR Only open files matching this regular expression

=head1 OPTIONS

=over 4

=item B<-help>

Prints a simple message on usage and then exits.

=item B<-man>

Prints the manual and then exits.

=item B<-t>

Only `text' files are searched.

=item B<-w>

When matching, the pattern is surrounded by perl\'s \b assertion. That
is, the match must be on a "word" boundary, either starting or
finishing. To perl, "word" is locale specific but generally means any
alphanumeric character and underscore.

=item B<-i>

Match without regard to casing. This is affected by locale.

=item B<-Q>

Pattern is a literal string. All regex metacharacters will be escaped
using the quotemeta() function.

=item B<-v>

Print only lines which do B<not> match the pattern. This is equivalent
to grep\'s -v parameter.

=item B<-h>

Omit the filename from the output when a line is matched. This is
semi-equivalent to grep\'s -h parameter.

=item B<-n>

Print the line number.

=item B<-R>

Do not recurse into any subdirectories.

=item B<-plain>

C<dgrep> automatically inserts ANSI escape codes to highlight matched
text. Use the C<-plain> option to disable that.

=item B<-name> EXPR

C<dgrep> usually searches every file and directory, recursively. When
C<-name EXPR> is used, only filenames matching this regular expression
are searched.

=back

=head1 DESCRIPTION

B<dgrep> is an "improved" version of the grep that comes with the Sun
box. It is normally recursive, accepts perl regular expressions, and
optionally prints the filename the match was found in.

=cut

Comment on diotalevi's grep
Download Code
Re: diotalevi's grep
by parv (Priest) on Mar 13, 2007 at 18:55 UTC

    Just curious ... had there been any updates (especially related to any bugs (not that I have used it)) which are not reflected in OP? And, is the posted program being maintained?

      There's been no compelling reason to change it.

      ⠤⠤ ⠙⠊⠕⠞⠁⠇⠑⠧⠊

        Ok then; thanks much. I wanted to verify as I have been giving it (along with ack, and tcgrep) as references (for grep alternatives).

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://573020]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others studying the Monastery: (6)
As of 2014-12-28 12:53 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (181 votes), past polls