Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

Moriarty's scratchpad

by Moriarty (Abbot)
on Jun 05, 2004 at 11:48 UTC ( [id://361492]=scratchpad: print w/replies, xml ) Need Help??

Latest Problem

#!/usr/bin/perl -w use strict; if($#ARGV < 2) { die "usage = $0 <input file> <output file> <extracts +file>\n"; } my $INPUT; my $OUTPUT; my $EXTRACTS; my $Extracted = 0; my $extract_list = {}; my $Extract_Cnt; my $numCP = 0; open EXTRACTS, "< $ARGV[2]" or die "Can't open $ARGV[2]\n"; while (<EXTRACTS>) { chomp; $extract_list->{$_}++; } close EXTRACTS; # chomp(@extract_list); $Extract_Cnt = scalar(keys %$extract_list); print "$Extract_Cnt extracts loaded\n"; open INPUT, "< $ARGV[0]" or die "Can't open $ARGV[0] for reading\n"; open OUTPUT, "> $ARGV[1]" or die "Can't open $ARGV[1] for writing\n"; while (<INPUT>) { if (substr($_, 0, 4) eq "<CP>") { $numCP++; if ($numCP % 100 == 0) { printf ("%d\r", $numCP); } my $CheckPoint = $_; my $TFN = substr($CheckPoint, 36, 9); while (substr($TFN, 0, 1) == '0') { $TFN =~ s/^0//; } if (exists $extract_list{$TFN}) { $CheckPoint =~ s/[0-9][0-9]0([YN][YN][YN][YN][YN][YN])/910$1/; $Extracted++; print $CheckPoint; } print OUTPUT $CheckPoint; } else { print OUTPUT $_; } } close INPUT; close OUTPUT; print "$Extracted documents extracted\n";
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others studying the Monastery: (2)
As of 2025-06-20 22:04 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found

    Notices?
    erzuuliAnonymous Monks are no longer allowed to use Super Search, due to an excessive use of this resource by robots.