#! perl -slw use strict; use threads qw[ yield async ]; use Thread::Queue; use LWP::Simple; $|=1; our $THREADS ||= 4; my $Qwork = new Thread::Queue; my $Qresults = new Thread::Queue; sub work{ my $tid = threads->self->tid(); sleep 1 until $Qwork->pending; while( $Qwork->pending or ( sleep(1) and $Qwork->pending ) ) { my $work = $Qwork->dequeue; print "$tid checking: $work"; if( -e "c:/perlmonks/$work.xml" and not -z _ ) { $Qresults->enqueue( "$work returned exists not fetched" ); next; } print "$tid fetching: $work"; my $rc = getstore( "http://perlmonks.com/index.pl?node_id=$work&displaytype=xml", "c:/perlmonks/$work.xml" ); $Qresults->enqueue( "$work returned $rc" ); } return; } my @t = map{ threads->new( \&work ) } 1 .. $THREADS; open IN, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my $cResults = 0; while( ) { chomp; $Qwork->enqueue( $_ ) and $cResults++; sleep 1 while $Qwork->pending > 100; } $_->join for @t; my %summary; $Qresults->dequeue =~ m[(\S+) returned (\S+)] and push @{ $summary{ $2 } }, $1 while $Qresults->pending; for my $key ( sort keys %summary ) { print $key, scalar @{ $summary{ $key } }; } printf 'Enter to see details or ^C'; ; for my $key ( sort keys %summary ) { print $key; my $details = join ' ', @{ $summary{ $key } }; $details =~ s[.{1,80} ][\n]g; print $details; } #### #! perl -slw use G; my %words; while( <> ) { $words{ $_ }++ for m[\b([a-zA-Z][a-zA-Z']+[a-zA-Z])\b]g; } open WORDS, '>', 'words.dat' or die $!; print WORDS for sort keys %words; close WORDS; #### #! perl -slw use strict; use G; $|=1; chomp( my @words = do{ open my $fh, ' } ); print "loaded: " . @words . ' words'; local $/; my %index; @index{ @ARGV } = ('') x @ARGV; while( <> ) { chomp( my $file = lc ); 1+index( $file, $words[ $_ ] ) and vec( $index{ $ARGV }, $_, 1 ) = 1 for 0 .. $#words; } open INDEX, '>', 'index.dat' or die $!; print INDEX "$_(@{[ unpack '%b*', $index{ $_ } ]}) : [@{[ unpack 'b*', $index{ $_ } ]}]" for sort keys %index; close INDEX; #### .\171594.txt : all an and anonymous asked at back be better but by com concerning contain create directories even excluding expression following for gone has have hours in index jun last list looking monks of on over pl probably question renders replies round seekers simple thanks that the this to want wisdom without would .\171599.txt : am and are at be being brothers but by com comes create darkness directories doubt enlightenment etiquette help here if in index jun light list living me my no not of on order piece pl re replies reply seeking so someone strong sure tell that the thread to unsure until way weak will with without