Right, here is the code with the bug.
As you will see, the actual webpages contain four different document numbers (COM (2000) 862, COM (2000) 847, COM (2000) 844, COM (2000) 840) whereas the program always delivers the same one (COM (2000) 862), which is the one of the first webpage accessed.
Presumably, this is due to an internal storage variable of the GET routine, which is appended and not replaced each time a new GET request is processed.
#! C:/programme/perl
use LWP::Simple;
use LWP::UserAgent;
use HTML::Stripper;
use warnings;
use strict;
our $stripper = HTML::Stripper->new( skip_cdata => 1, strip_ws => 1 );
our $ID;
our @ID=(161060, 160920, 160999, 160899);
our $count=1;
foreach $ID (@ID) {
my $content;
my $content_full;
my $url="http://europa.eu.int/prelex/detail_dossier_real.cfm?CL=en&Do
+sId="."$ID";
$content_full=" ";
$content_full=get($url);
$content=$stripper->strip_html($content_full);
our $i_type=index($content, " COM ");
our $d_type=substr($content, $i_type+1,3);
our $d_year=substr($content, $i_type+6,4);
our $d_number=substr($content, $i_type+12,3);
our $proposal="$d_type "."\($d_year\)"." $d_number";
print "Proposal\: $proposal \n";
open DB, ">> C:/programme/perl/test/prelex.dta"
or die "Problem: $!";
flock (DB, 2);
print DB "$proposal\n";
close DB;
}
|