Interesting
13.zip is a 51meg zip file with 10,000 members
13/00/00.lst is the first
13/99/99.lst is the last
13/50/50.lst is roughly in the middle
use strict;
my %zbl;
use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
use IO::File;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use File::Basename;
compare('13.zip','13/00/00.lst');
compare('13.zip','13/50/50.lst');
compare('13.zip','13/99/99.lst');
compare('13.zip','xx/99/99.lst');
exit;
sub compare {
my $zf=shift;
my $mf=shift;
print 'testing:'.$zf.' '.$mf."\n";
my ($wzbl,$t1zbl,$t2zbl,$lzbl)=zbl($zf,$mf);
my ($wzio,$t1zio,$t2zio,$lzio)=zio($zf,$mf);
if ($wzio eq $wzbl) {print "are same\n"}
else { print "are different\n"; }
print 'io :open'.sprintf('%4d',$t1zio).' read:'.sprintf('%4d',$t2zio
+).' lines'.sprintf('%4d',$lzio)."\n";
print 'zbl:open'.sprintf('%4d',$t1zbl).' read:'.sprintf('%4d',$t2zbl
+).' lines'.sprintf('%4d',$lzbl)."\n";
print "\n";
}
sub zbl {
my $zf=shift;
my $mf=shift;
my $wzbl;
my $time0=time;
my $member=zipbyline_start($zf,$mf);
my $time1=time;
my $lines=0;
while (my $line=zipbyline_read($member)){ $wzbl.=$line; $lines++; }
zipbyline_close($member);
my $dt1=$time1-$time0;
my $dt2=time-$time1;
return ($wzbl,$dt1,$dt2,$lines);
}
sub zio {
my $zf=shift;
my $mf=shift;
my $wio;
my $time0=time;
my $file;
eval {
$file=new IO::Uncompress::Unzip($zf, Name =>$mf) or die "IO::Un
+compress::unzip failed: $UnzipError\n";
};
my $time1=time;
my $lines=0;
if ($@) { print $@."\n"; }
else {
while (my $line=<$file>) { $wio.=$line; $lines++; }
close $file;
}
my $dt1=$time1-$time0;
my $dt2=time-$time1;
return ($wio,$dt1,$dt2,$lines);
}
sub zipbyline_start {
my $zf=shift;
my $mf=shift;
my $zip = Archive::Zip->new();
unless ( $zip->read( $zf ) == AZ_OK ) { die 'read error';}
my ( $member, $status, $bufferRef );
$member = $zip->memberNamed( $mf );
if ($member) {
$member->desiredCompressionMethod( COMPRESSION_STORED );
$status = $member->rewindData();
die "error $status" unless $status == AZ_OK;
$zbl{$member}='';
}
else {$member={};$zbl{$member}=undef; }
return $member;
} # zbl start
sub zipbyline_read {
my $member=shift;
my ( $status, $bufferRef );
if (!defined $zbl{$member}) { return undef;}
my $nl=index($zbl{$member},"\n");
while ( ( $nl == -1) && ! $member->readIsDone() ) {
( $bufferRef, $status ) = $member->readChunk(1000);
die "error $status"
if $status != AZ_OK && $status != AZ_STREAM_END;
# do something with $bufferRef:
$zbl{$member}.=$$bufferRef;
$nl=index($zbl{$member},"\n");
} # while
if ($nl == -1 ) {my $line=$zbl{$member}; $zbl{$member}=undef; return
+ $line;}
my $line=substr($zbl{$member},0,$nl+1);
$zbl{$member}=substr($zbl{$member},$nl+1);
return $line;
} # zbl
sub zipbyline_close {
my $member=shift;
delete $zbl{$member};
} # zbl close
result
testing:13.zip 13/00/00.lst
are same
io :open 2 read: 0 lines 96
zbl:open 1 read: 0 lines 96
testing:13.zip 13/50/50.lst
are same
io :open 101 read: 0 lines 198
zbl:open 1 read: 0 lines 198
testing:13.zip 13/99/99.lst
are same
io :open 192 read: 0 lines 69
zbl:open 2 read: 0 lines 69
testing:13.zip xx/99/99.lst
IO::Uncompress::unzip failed: Cannot find 'xx/99/99.lst'
are same
io :open 199 read: 0 lines 0
zbl:open 1 read: 0 lines 0
I made changes to the zip_by_line routines to not die on a missing member, and to handle files that didnt terminate with \n
Im going to stick with zip_by_line for now, it probably needs more work tho.
|