Beefy Boxes and Bandwidth Generously Provided by pair Networks
P is for Practical
 
PerlMonks  

Re: search for a pattern in file without opening the file (mem efficient)

by ikegami (Pope)
on Jan 30, 2007 at 22:51 UTC ( #597459=note: print w/ replies, xml ) Need Help??


in reply to search for a pattern in file without opening the file

Ignoring "without opening these files", what do you want returned: the name of the files with a matching pattern, or the matching lines from the files?

Version that outputs the name of the files with a matching pattern:

use File::Glob qw( bsd_glob ); # Inputs my @files = bsd_glob('*'); my $re = qr/^source*/; my @matching_files; foreach my $file_name (@files) { if (!open(my $fh, '<', $file_name)) { warn("Unable to open file \"$file_name\": $!\n"); next; } while (<$fh>) { if (/$re/) { push(@matching_files, $file_name); last; } } } # Output print("$_\n") foreach @matching_files;

Version that outputs the line that match the pattern.

use File::Glob qw( bsd_glob ); # Inputs my @files = bsd_glob('*'); my $re = qr/^source*/; my @matching_lines; foreach my $file_name (@files) { if (!open(my $fh, '<', $file_name)) { warn("Unable to open file \"$file_name\": $!\n"); next; } while (<$fh>) { if (/$re/) { push(@matching_lines, $_); } } } # Output print foreach @matching_lines;

A combination:

use File::Glob qw( bsd_glob ); # Inputs my @files = bsd_glob('*'); my $re = qr/^source*/; my @matches; foreach my $file_name (@files) { if (!open(my $fh, '<', $file_name)) { warn("Unable to open file \"$file_name\": $!\n"); next; } while (<$fh>) { if (/$re/) { # Save the file name, the line number and the line content. push(@matches, [ $file_name, $., $_ ]); } } } # Output print($_->[0], ',', $_->[1], ': ', $_->[2]) foreach @matches;

These solutions are memory efficient.
(Returning the results as an iterator would be even more!)


Comment on Re: search for a pattern in file without opening the file (mem efficient)
Select or Download Code
Re^2: search for a pattern in file without opening the file (iter)
by ikegami (Pope) on Jan 30, 2007 at 23:17 UTC

    Here's a version that only keeps one line in memory at a time (as opposed to every match). It is done elegantly (from the perspective of the function's user) using an iterator.

    use strict; use warnings; use File::Glob qw( bsd_glob ); sub search_files { my ($re, @files) = @_; my $fh; my $file_name; return sub { START: if (not defined $file_name) { return () if not @files; $file_name = shift(@files); if (!open($fh, '<', $file_name)) { warn("Unable to open file \"$file_name\": $!\n"); undef $file_name; goto START; } } while (defined(my $line = <$fh>)) { if ($line =~ /$re/) { return ( $file_name, $., $line ); } } undef $fh; undef $file_name; goto START; }; } { my @files = bsd_glob('*'); my $re = qr/^source*/; my $iter = search_files($re, @files); while (my ($file_name, $line_num, $line) = $iter->()) { print("$file_name,$line_num: $line"); } }

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://597459]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others perusing the Monastery: (2)
As of 2015-07-06 01:11 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (68 votes), past polls