Beefy Boxes and Bandwidth Generously Provided by pair Networks
good chemistry is complicated,
and a little bit messy -LW
 
PerlMonks  

Archive::Ar - pure perl way to handle Ar archives (comments encouraged)

by JayBonci (Curate)
on Apr 08, 2003 at 07:17 UTC ( #248850=perlcraft: print w/ replies, xml ) Need Help??

   1: package Archive::Ar;
   2: 
   3: ###########################################################
   4: #	Archive::Ar - Pure perl module to handle ar achives
   5: #	
   6: #	Copyright 2003 - Jay Bonci <jaybonci@cpan.org>
   7: #	Licensed under the same terms as perl itself
   8: #
   9: ###########################################################
  10: 
  11: use strict;
  12: use Exporter;
  13: use File::Spec;
  14: use Time::Local;
  15: 
  16: use vars qw($VERSION @ISA @EXPORT);
  17: $VERSION = '1.1';
  18: 
  19: use constant ARMAG => "!<arch>\n";
  20: use constant SARMAG => length(ARMAG);
  21: use constant ARFMAG => "`\n";
  22: 
  23: @ISA=qw(Exporter);
  24: @EXPORT=qw/read read_memory list_files add_files add_data write get_content DEBUG/;
  25: 
  26: sub new {
  27: 	my ($class, $filenameorhandle, $debug) = @_;
  28: 
  29: 	my $this = {};
  30: 
  31: 	my $obj = bless $this, $class;
  32: 
  33: 	$obj->{_verbose} = 0;
  34: 	$obj->_initValues();
  35: 
  36: 
  37: 	if($debug)
  38: 	{
  39: 		$obj->DEBUG();
  40: 	}
  41: 
  42: 	if($filenameorhandle){
  43: 		unless($obj->read($filenameorhandle)){
  44: 			$obj->_dowarn("new() failed on filename or filehandle read");
  45: 			return;
  46: 		}		
  47: 	}
  48: 
  49: 	return $obj;
  50: }
  51: 
  52: sub read
  53: {
  54: 	my ($this, $filenameorhandle) = @_;
  55: 
  56: 	my $retval;
  57: 
  58: 	$this->_initValues();
  59: 
  60: 	if(ref $filenameorhandle eq "GLOB")
  61: 	{
  62: 		unless($retval = $this->_readFromFilehandle($filenameorhandle))
  63: 		{
  64: 			$this->_dowarn("Read from filehandle failed");
  65: 			return;
  66: 		}
  67: 	}else
  68: 	{
  69: 		unless($retval = $this->_readFromFilename($filenameorhandle))
  70: 		{
  71: 			$this->_dowarn("Read from filename failed");
  72: 			return;
  73: 		}
  74: 	}
  75: 
  76: 
  77: 	unless($this->_parseData())
  78: 	{
  79: 		$this->_dowarn("read() failed on data structure analysis. Probable bad file");
  80: 		return; 
  81: 	}
  82: 
  83: 	
  84: 	return $retval;
  85: }
  86: 
  87: sub read_memory
  88: {
  89: 	my ($this, $data) = @_;
  90: 
  91: 	$this->_initValues();
  92: 
  93: 	unless($data)
  94: 	{
  95: 		$this->_dowarn("read_memory() can't continue because no data was given");
  96: 		return;
  97: 	}
  98: 
  99: 	$this->{_filedata} = $data;
 100: 
 101: 	unless($this->_parseData())
 102: 	{
 103: 		$this->_dowarn("read_memory() failed on data structure analysis. Probable bad file");
 104: 		return;
 105: 	}
 106: 
 107: 	return length($data);
 108: }
 109: 
 110: sub list_files
 111: {
 112: 	my($this) = @_;
 113: 
 114: 	return \@{$this->{_files}};
 115: 
 116: }
 117: 
 118: sub add_files
 119: {
 120: 	my($this, $filenameorarray, @otherfiles) = @_;
 121: 	
 122: 	my $filelist;
 123: 
 124: 	if(ref $filenameorarray eq "ARRAY")
 125: 	{
 126: 		$filelist = $filenameorarray;
 127: 	}else
 128: 	{
 129: 		$filelist = [$filenameorarray];
 130: 		if(@otherfiles)
 131: 		{
 132: 			push @$filelist, @otherfiles;
 133: 		}
 134: 	}
 135: 
 136: 	my $filecount = 0;
 137: 
 138: 	foreach my $filename (@$filelist)
 139: 	{
 140: 		my @props = stat($filename);
 141: 		unless(@props)
 142: 		{
 143: 			$this->_dowarn("Could not stat() filename. add_files() for this file failed");
 144: 			next;
 145: 		}
 146: 		my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = @props;  
 147: 		
 148: 		my $header = {
 149: 			"date" => $mtime,
 150: 			"uid"  => $uid,
 151: 			"gid"  => $gid, 
 152: 			"mode" => $mode,
 153: 			"size" => $size,
 154: 		};
 155: 
 156: 		local $/ = undef;
 157: 		unless(open HANDLE, $filename)
 158: 		{
 159: 			$this->_dowarn("Could not open filename. add_files() for this file failed");
 160: 			next;
 161: 		}
 162: 		$header->{data} = <HANDLE>;
 163: 		close HANDLE;
 164: 
 165: 		# fix the filename
 166: 
 167: 		(undef, undef, $filename) = File::Spec->splitpath($filename);
 168: 		$header->{name} = $filename;
 169: 
 170: 		$this->_addFile($header);
 171: 
 172: 		$filecount++;
 173: 	}
 174: 
 175: 	return $filecount;
 176: }
 177: 
 178: sub add_data
 179: {
 180: 	my($this, $filename, $data, $params) = @_;
 181: 	unless ($filename)
 182: 	{
 183: 		$this->_dowarn("No filename given; add_data() can't proceed");
 184: 		return;
 185: 	}
 186: 
 187: 	$params ||= {};
 188: 	$data ||= "";
 189: 	
 190: 	(undef, undef, $filename) = File::Spec->splitpath($filename);
 191: 	
 192: 	$params->{name} = $filename;	
 193: 	$params->{size} = length($data);
 194: 	$params->{data} = $data;
 195: 	$params->{uid} ||= 0;
 196: 	$params->{gid} ||= 0;
 197: 	$params->{date} ||= timelocal(localtime());
 198: 	$params->{mode} ||= "100644";
 199: 	
 200: 	unless($this->_addFile($params))
 201: 	{
 202: 		$this->_dowarn("add_data failed due to a failure in _addFile");
 203: 		return;
 204: 	}
 205: 
 206: 	return $params->{size}; 	
 207: }
 208: 
 209: sub write
 210: {
 211: 	my($this, $filename) = @_;
 212: 
 213: 	my $outstr;
 214: 
 215: 	$outstr= ARMAG;
 216: 	foreach(@{$this->{_files}})
 217: 	{
 218: 		my $content = $this->get_content($_);
 219: 		unless($content)
 220: 		{
 221: 			$this->_dowarn("Internal Error. $_ file in _files list but no filedata");
 222: 			next;
 223: 		}
 224: 		
 225: 
 226: 		# For whatever reason, the uids and gids get stripped
 227: 		# if they are zero. We'll blank them here to emulate that
 228: 
 229: 		$content->{uid} ||= "";
 230: 		$content->{gid} ||= "";
 231: 
 232: 		$outstr.= pack("A16A12A6A6A8A10", @$content{qw/name date uid gid mode size/});
 233: 		$outstr.= ARFMAG;
 234: 		$outstr.= $content->{data};
 235: 	}
 236: 
 237: 	return $outstr unless $filename;
 238: 
 239: 	unless(open HANDLE, ">$filename")
 240: 	{
 241: 		$this->_dowarn("Can't open filename $filename");
 242: 		return;
 243: 	}
 244: 	print HANDLE $outstr;
 245: 	close HANDLE;
 246: 	return length($outstr);
 247: }
 248: 
 249: sub get_content
 250: {
 251: 	my ($this, $filename) = @_;
 252: 
 253: 	unless($filename)
 254: 	{
 255: 		$this->_dowarn("get_content can't continue without a filename");
 256: 		return;
 257: 	}
 258: 
 259: 	unless(exists($this->{_filehash}->{$filename}))
 260: 	{
 261: 		$this->_dowarn("get_content failed because there is not a file named $filename");
 262: 		return;
 263: 	}
 264: 
 265: 	return $this->{_filehash}->{$filename};
 266: }
 267: 
 268: sub DEBUG
 269: {
 270: 	my($this, $verbose) = @_;
 271: 	$verbose = 1 unless(defined($verbose) and int($verbose) == 0);
 272: 	$this->{_verbose} = $verbose;
 273: 	return;
 274: 
 275: }
 276: 
 277: sub _parseData
 278: {
 279: 	my($this) = @_;
 280: 
 281: 	unless($this->{_filedata})
 282: 	{
 283: 		$this->_dowarn("Cannot parse this archive. It appears to be blank");
 284: 		return;
 285: 	}
 286: 
 287: 	my $scratchdata = $this->{_filedata};
 288: 
 289: 	unless(substr($scratchdata, 0, SARMAG, "") eq ARMAG)
 290: 	{
 291: 		$this->_dowarn("Bad magic header token. Either this file is not an ar archive, or it is damaged. If you are sure of the file integrity, Archive::Ar may not support this type of ar archive currently. Please report this as a bug");
 292: 		return "";
 293: 	}
 294: 
 295: 	while($scratchdata =~ /\S/)
 296: 	{
 297: 
 298: 		if($scratchdata =~ s/^(.{58})`\n//m)		
 299: 		{
 300: 			my @fields = unpack("A16A12A6A6A8A10", $1);
 301: 
 302: 			for(0..@fields)
 303: 			{
 304: 				$fields[$_] ||= "";
 305: 				$fields[$_] =~ s/\s*$//g;
 306: 			}
 307: 
 308: 			my $headers = {};
 309: 			@$headers{qw/name date uid gid mode size/} = @fields;
 310: 
 311: 			$headers->{data} = substr($scratchdata, 0, $headers->{size}, "");
 312: 
 313: 			$this->_addFile($headers);
 314: 		}else{
 315: 			$this->_dowarn("File format appears to be corrupt. The file header is not of the right size, or does not exist at all");
 316: 			return;
 317: 		}
 318: 	}
 319: 
 320: 	return scalar($this->{_files});
 321: }
 322: 
 323: sub _readFromFilename
 324: {
 325: 	my ($this, $filename) = @_;
 326: 
 327: 	my $handle;
 328: 	open $handle, $filename or return;
 329: 	return $this->_readFromFilehandle($handle);
 330: }
 331: 
 332: 
 333: sub _readFromFilehandle
 334: {
 335: 	my ($this, $filehandle) = @_;
 336: 	return unless $filehandle;
 337: 
 338: 	#handle has to be open
 339: 	return unless(fileno $filehandle);
 340: 
 341: 	local $/ = undef;
 342: 	$this->{_filedata} = <$filehandle>;
 343: 	close $filehandle;
 344: 
 345: 	return length($this->{_filedata});
 346: }
 347: 
 348: sub _addFile
 349: {
 350: 	my ($this, $file) = @_;
 351: 
 352: 	return unless $file;
 353: 
 354: 	foreach(qw/name date uid gid mode size data/)
 355: 	{
 356: 		unless(exists($file->{$_}))
 357: 		{
 358: 			$this->_dowarn("Can't _addFile because virtual file is missing $_ parameter");
 359: 			return;
 360: 		}
 361: 	}
 362: 	
 363: 	if(exists($this->{_filehash}->{$file->{name}}))
 364: 	{
 365: 		$this->_dowarn("Can't _addFile because virtual file already exists with that name in the archive");
 366: 		return;
 367: 	}
 368: 
 369: 	push @{$this->{_files}}, $file->{name};
 370: 	$this->{_filehash}->{$file->{name}} = $file;
 371: 
 372: 	return $file->{name};
 373: }
 374: 
 375: sub _initValues
 376: {
 377: 	my ($this) = @_;
 378: 
 379: 	$this->{_files} = [];
 380: 	$this->{_filehash} = {};
 381: 	$this->{_filedata} ="";
 382: 
 383: 	return;
 384: }
 385: 
 386: sub _dowarn
 387: {
 388: 	my ($this, $warning) = @_;
 389: 
 390: 	if($this->{_verbose})
 391: 	{
 392: 		warn "DEBUG: $warning";
 393: 	}
 394: 
 395: 	return;
 396: }
 397: 
 398: 1;
 399: 
 400: 
 401: =head1 NAME
 402: 
 403: Archive::Ar - Interface for manipulating ar archives
 404: 
 405: =head1 SYNOPSIS
 406: 
 407: use Archive::Ar;
 408: 
 409: my $ar = new Archive::Ar("./foo.ar");
 410: 
 411: $ar->add_data("newfile.txt","Some contents", $properties);
 412: 
 413: $ar->add_files("./bar.tar.gz", "bat.pl")
 414: $ar->add_files(["./again.gz"]);
 415: 
 416: my $filedata = $ar->get_content("bar.tar.gz");
 417: 
 418: my @files = $ar->list_files();
 419: $ar->read("foo.deb");
 420: 
 421: $ar->write("outbound.ar");
 422: 
 423: $ar->DEBUG();
 424: 
 425: 
 426: =head1 DESCRIPTION
 427: 
 428: Archive::Ar is a pure-perl way to handle standard ar archives.  
 429: 
 430: This is useful if you have those types of old archives on the system, but it 
 431: is also useful because .deb packages for the Debian GNU/Linux distribution are 
 432: ar archives. This is one building block in a future chain of modules to build, 
 433: manipulate, extrace, and test debian modules with no platform or architecture 
 434: independance.
 435: 
 436: You may notice that the API to Archive::Ar is similar to Archive::Tar, and
 437: this was done intentionally to keep similarity between the Archive::*
 438: modules
 439: 
 440: 
 441: =head2 Class Methods
 442: 
 443: =over 4
 444: 
 445: =item new()
 446: =item new($filename);
 447: =item new(*GLOB, $debug);
 448: 
 449: Returns a new Archive::Ar object.  Without a filename or glob, it returns an
 450: empty object.  If passed a filename as a scalar or in a GLOB, it will attempt
 451: to populate from either of those sources.  If it fails, you will receive 
 452: undef, instead of an object reference. 
 453: 
 454: This also can take a second optional debugging parameter.  This acts exactly
 455: as if DEBUG() is called on the object before it is returned.  If you have a
 456: new() that keeps failing, this should help.
 457: 
 458: =item read($filename)
 459: =item read(*GLOB);
 460: 
 461: This reads a new file into the object, removing any ar archive already
 462: represented in the object.  Any calls to DEBUG() are not lost by reading
 463: in a new file. Returns the number of bytes read, undef on failure.
 464: 
 465: =item read_memory($data)
 466: 
 467: This read information from the first parameter, and attempts to parse and treat
 468: it like an ar archive. Like read(), it will wipe out whatever you have in the
 469: object and replace it with the contents of the new archive, even if it fails.
 470: Returns the number of bytes read (processed) if successful, undef otherwise.
 471: 
 472: =item list_files()
 473: 
 474: This lists the files contained inside of the archive by filename, as an 
 475: array.
 476: 
 477: =item add_files("filename1", "filename2")
 478: =item add_files(["filename1", "filename2"])
 479: 
 480: Takes an array or an arrayref of filenames to add to the ar archive, in order.
 481: The filenames can be paths to files, in which case the path information is 
 482: stripped off.  Filenames longer than 16 characters are truncated when written
 483: to disk in the format, so keep that in mind when adding files.
 484: 
 485: Due to the nature of the ar archive format, add_files() will store the uid,
 486: gid, mode, size, and creation date of the file as returned by stat(); 
 487: 
 488: add_files() returns the number of files sucessfully added, or undef on failure.
 489: 
 490: =item add_data("filename", $filedata)
 491: 
 492: Takes an filename and a set of data to represent it. Unlike add_files, add_data
 493: is a virtual add, and does not require data on disk to be present. The
 494: data is a hash that looks like:
 495: 
 496: $filedata = {
 497:         "data" => $data,
 498:         "uid" => $uid, #defaults to zero
 499:         "gid" => $gid, #defaults to zero
 500:         "date" => $date,  #date in epoch seconds. Defaults to now.
 501:         "mode" => $mode, #defaults to "100644";
 502: }
 503: 
 504: You cannot add_data over another file however.  This returns the file length in 
 505: bytes if it is successful, undef otherwise.
 506: 
 507: =item write()
 508: =item write("filename.ar")
 509: 
 510: This method will return the data as an .ar archive, or will write to the 
 511: filename present if specified.  If given a filename, write() will return the 
 512: length of the file written, in bytes, or undef on failure.  If the filename
 513: already exists, it will overwrite that file.
 514: 
 515: =item get_content("filename")
 516: 
 517: This returns a hash with the file content in it, including the data that the 
 518: file would naturally contain.  If the file does not exist or no filename is
 519: given, this returns undef. On success, a hash is returned with the following
 520: keys:
 521: 
 522: name - The file name
 523: date - The file date (in epoch seconds)
 524: uid  - The uid of the file
 525: gid  - The gid of the file
 526: mode - The mode permissions
 527: size - The size (in bytes) of the file
 528: data - The contained data
 529: 
 530: =item DEBUG()
 531: 
 532: This method turns on debugging.  Optionally this can be done by passing in a 
 533: value as the second parameter to new.  While verbosity is enabled, 
 534: Archive::Ar will toss a warn() if there is a suspicious condition or other 
 535: problem while proceeding. This should help iron out any problems you have
 536: while using the module.
 537: 
 538: =head1 CHANGES
 539: 
 540: =over 4
 541:  
 542: =item Version 1.1
 543: 
 544: Documentation cleanups
 545: 
 546: =item Version 1.0
 547: 
 548: This is the initial public release for CPAN, so everything is new.
 549: 
 550: =head1 TODO
 551: 
 552: A better unit test suite perhaps. I have a private one, but a public one would be
 553: nice if there was good file faking module.
 554: 
 555: Fix / investigate stuff in the BUGS section.
 556: 
 557: =head1 BUGS
 558: 
 559: To be honest, I'm not sure of a couple of things. The first is that I know 
 560: of ar archives made on old AIX systems (pre 4.3?) that have a different header
 561: with a different magic string, etc.  This module perfectly (hopefully) handles
 562: ar archives made with the modern ar command from the binutils distribtuion. If
 563: anyone knows of anyway to produce these old-style AIX archives, or would like
 564: to produce a few for testing, I would be much grateful.
 565: 
 566: There's no really good reason why this module /shouldn't/ run on Win32 
 567: platforms, but admittedly, this might change when we have a file exporting 
 568: function that supports owner and permission writing.
 569: 
 570: If you read in and write out a file, you get different md5sums, but it's still
 571: a valid archive. I'm still investigating this, and consider it a minor bug.
 572: 
 573: =head1 COPYRIGHT
 574: 
 575: Archive::Ar is copyright 2003 Jay Bonci E<lt>jaybonci@cpan.orgE<gt>. 
 576: This program is free software; you can redistribute it and/or modify it under
 577: the same terms as Perl itself.
 578: 
 579: =cut

Comment on Archive::Ar - pure perl way to handle Ar archives (comments encouraged)
Download Code
Re: Archive::Ar - pure perl way to handle Ar archives (comments encouraged)
by dash2 (Hermit) on May 04, 2003 at 20:06 UTC
    I don't know if this works - away from my trusty Linuxbox - but it is a good idea. A pure perl Archive::Zip would be a really _brilliant_ idea - great for app packaging.

    A few comments just from reading the source:

    • It's a good idea to stick to one subroutine naming convention: like_this or likeThis. In Perl, like_this is preferred as more readable.
    • You have an OO interface, so why export functions (esp. automatically)? AFAICS Archive::Tar doesn't do this.
    • ... especially as you are exporting functions named the same as Perl core functions (e.g. "read")
    • Hmm... do you come from Java? I don't see anything wrong with using $this instead of $self, it is just a bit unusual.
    • int $verbose == 0: no need. Just and not $verbose would be fine.
    • Similarly, return unless fileno $filehandle; is fine, and avoids non-standard brackets.

    I'm just looking for nitpicks, you understand. It seems like very nice, clear code.

      Heya, thanks for taking a look. In response:
      1. Typically all of the functions anyone would want to use are like_this, and everything that's "internal" is _likeThis or _like_this. I guess it's just a matter of style.
      2. I wasn't sure about the exporting, and learned my mistake several versions out. It's fixed in he one that's in CPAN.
      3. I actually come from C. The $self vs. $this thing
      4. I use $verbose == 0 because calling DEBUG() without parameters turns it on. With a 0 parameter, it turns it off. It needs to be numerically false and existing.
      5. The unless fileno thing is a good suggestion, and i'll update it when I put through some module fixes tonight.
      It needs some compatibility testing. A pure perl Archive::Zip does sound like a good idea. hmmmm....

          --jaybonci

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlcraft [id://248850]
Approved by Three
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others taking refuge in the Monastery: (8)
As of 2014-08-23 08:42 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The best computer themed movie is:











    Results (172 votes), past polls