Beefy Boxes and Bandwidth Generously Provided by pair Networks
laziness, impatience, and hubris
 
PerlMonks  

Moriarty's scratchpad

by Moriarty (Abbot)
on Jun 05, 2004 at 11:48 UTC ( [id://361492]=scratchpad: print w/replies, xml ) Need Help??

Latest Problem

#!/usr/bin/perl -w use strict; if($#ARGV < 2) { die "usage = $0 <input file> <output file> <extracts +file>\n"; } my $INPUT; my $OUTPUT; my $EXTRACTS; my $Extracted = 0; my $extract_list = {}; my $Extract_Cnt; my $numCP = 0; open EXTRACTS, "< $ARGV[2]" or die "Can't open $ARGV[2]\n"; while (<EXTRACTS>) { chomp; $extract_list->{$_}++; } close EXTRACTS; # chomp(@extract_list); $Extract_Cnt = scalar(keys %$extract_list); print "$Extract_Cnt extracts loaded\n"; open INPUT, "< $ARGV[0]" or die "Can't open $ARGV[0] for reading\n"; open OUTPUT, "> $ARGV[1]" or die "Can't open $ARGV[1] for writing\n"; while (<INPUT>) { if (substr($_, 0, 4) eq "<CP>") { $numCP++; if ($numCP % 100 == 0) { printf ("%d\r", $numCP); } my $CheckPoint = $_; my $TFN = substr($CheckPoint, 36, 9); while (substr($TFN, 0, 1) == '0') { $TFN =~ s/^0//; } if (exists $extract_list{$TFN}) { $CheckPoint =~ s/[0-9][0-9]0([YN][YN][YN][YN][YN][YN])/910$1/; $Extracted++; print $CheckPoint; } print OUTPUT $CheckPoint; } else { print OUTPUT $_; } } close INPUT; close OUTPUT; print "$Extracted documents extracted\n";
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (5)
As of 2024-09-18 15:16 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    The PerlMonks site front end has:





    Results (25 votes). Check out past polls.

    Notices?
    erzuuli‥ 🛈The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.