Beefy Boxes and Bandwidth Generously Provided by pair Networks
Pathologically Eclectic Rubbish Lister
 
PerlMonks  

Archive::Ar - pure perl way to handle Ar archives (comments encouraged)

by JayBonci (Curate)
on Apr 08, 2003 at 07:17 UTC ( #248850=perlcraft: print w/ replies, xml ) Need Help??

   1: package Archive::Ar;
   2: 
   3: ###########################################################
   4: #	Archive::Ar - Pure perl module to handle ar achives
   5: #	
   6: #	Copyright 2003 - Jay Bonci <jaybonci@cpan.org>
   7: #	Licensed under the same terms as perl itself
   8: #
   9: ###########################################################
  10: 
  11: use strict;
  12: use Exporter;
  13: use File::Spec;
  14: use Time::Local;
  15: 
  16: use vars qw($VERSION @ISA @EXPORT);
  17: $VERSION = '1.1';
  18: 
  19: use constant ARMAG => "!<arch>\n";
  20: use constant SARMAG => length(ARMAG);
  21: use constant ARFMAG => "`\n";
  22: 
  23: @ISA=qw(Exporter);
  24: @EXPORT=qw/read read_memory list_files add_files add_data write get_content DEBUG/;
  25: 
  26: sub new {
  27: 	my ($class, $filenameorhandle, $debug) = @_;
  28: 
  29: 	my $this = {};
  30: 
  31: 	my $obj = bless $this, $class;
  32: 
  33: 	$obj->{_verbose} = 0;
  34: 	$obj->_initValues();
  35: 
  36: 
  37: 	if($debug)
  38: 	{
  39: 		$obj->DEBUG();
  40: 	}
  41: 
  42: 	if($filenameorhandle){
  43: 		unless($obj->read($filenameorhandle)){
  44: 			$obj->_dowarn("new() failed on filename or filehandle read");
  45: 			return;
  46: 		}		
  47: 	}
  48: 
  49: 	return $obj;
  50: }
  51: 
  52: sub read
  53: {
  54: 	my ($this, $filenameorhandle) = @_;
  55: 
  56: 	my $retval;
  57: 
  58: 	$this->_initValues();
  59: 
  60: 	if(ref $filenameorhandle eq "GLOB")
  61: 	{
  62: 		unless($retval = $this->_readFromFilehandle($filenameorhandle))
  63: 		{
  64: 			$this->_dowarn("Read from filehandle failed");
  65: 			return;
  66: 		}
  67: 	}else
  68: 	{
  69: 		unless($retval = $this->_readFromFilename($filenameorhandle))
  70: 		{
  71: 			$this->_dowarn("Read from filename failed");
  72: 			return;
  73: 		}
  74: 	}
  75: 
  76: 
  77: 	unless($this->_parseData())
  78: 	{
  79: 		$this->_dowarn("read() failed on data structure analysis. Probable bad file");
  80: 		return; 
  81: 	}
  82: 
  83: 	
  84: 	return $retval;
  85: }
  86: 
  87: sub read_memory
  88: {
  89: 	my ($this, $data) = @_;
  90: 
  91: 	$this->_initValues();
  92: 
  93: 	unless($data)
  94: 	{
  95: 		$this->_dowarn("read_memory() can't continue because no data was given");
  96: 		return;
  97: 	}
  98: 
  99: 	$this->{_filedata} = $data;
 100: 
 101: 	unless($this->_parseData())
 102: 	{
 103: 		$this->_dowarn("read_memory() failed on data structure analysis. Probable bad file");
 104: 		return;
 105: 	}
 106: 
 107: 	return length($data);
 108: }
 109: 
 110: sub list_files
 111: {
 112: 	my($this) = @_;
 113: 
 114: 	return \@{$this->{_files}};
 115: 
 116: }
 117: 
 118: sub add_files
 119: {
 120: 	my($this, $filenameorarray, @otherfiles) = @_;
 121: 	
 122: 	my $filelist;
 123: 
 124: 	if(ref $filenameorarray eq "ARRAY")
 125: 	{
 126: 		$filelist = $filenameorarray;
 127: 	}else
 128: 	{
 129: 		$filelist = [$filenameorarray];
 130: 		if(@otherfiles)
 131: 		{
 132: 			push @$filelist, @otherfiles;
 133: 		}
 134: 	}
 135: 
 136: 	my $filecount = 0;
 137: 
 138: 	foreach my $filename (@$filelist)
 139: 	{
 140: 		my @props = stat($filename);
 141: 		unless(@props)
 142: 		{
 143: 			$this->_dowarn("Could not stat() filename. add_files() for this file failed");
 144: 			next;
 145: 		}
 146: 		my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = @props;  
 147: 		
 148: 		my $header = {
 149: 			"date" => $mtime,
 150: 			"uid"  => $uid,
 151: 			"gid"  => $gid, 
 152: 			"mode" => $mode,
 153: 			"size" => $size,
 154: 		};
 155: 
 156: 		local $/ = undef;
 157: 		unless(open HANDLE, $filename)
 158: 		{
 159: 			$this->_dowarn("Could not open filename. add_files() for this file failed");
 160: 			next;
 161: 		}
 162: 		$header->{data} = <HANDLE>;
 163: 		close HANDLE;
 164: 
 165: 		# fix the filename
 166: 
 167: 		(undef, undef, $filename) = File::Spec->splitpath($filename);
 168: 		$header->{name} = $filename;
 169: 
 170: 		$this->_addFile($header);
 171: 
 172: 		$filecount++;
 173: 	}
 174: 
 175: 	return $filecount;
 176: }
 177: 
 178: sub add_data
 179: {
 180: 	my($this, $filename, $data, $params) = @_;
 181: 	unless ($filename)
 182: 	{
 183: 		$this->_dowarn("No filename given; add_data() can't proceed");
 184: 		return;
 185: 	}
 186: 
 187: 	$params ||= {};
 188: 	$data ||= "";
 189: 	
 190: 	(undef, undef, $filename) = File::Spec->splitpath($filename);
 191: 	
 192: 	$params->{name} = $filename;	
 193: 	$params->{size} = length($data);
 194: 	$params->{data} = $data;
 195: 	$params->{uid} ||= 0;
 196: 	$params->{gid} ||= 0;
 197: 	$params->{date} ||= timelocal(localtime());
 198: 	$params->{mode} ||= "100644";
 199: 	
 200: 	unless($this->_addFile($params))
 201: 	{
 202: 		$this->_dowarn("add_data failed due to a failure in _addFile");
 203: 		return;
 204: 	}
 205: 
 206: 	return $params->{size}; 	
 207: }
 208: 
 209: sub write
 210: {
 211: 	my($this, $filename) = @_;
 212: 
 213: 	my $outstr;
 214: 
 215: 	$outstr= ARMAG;
 216: 	foreach(@{$this->{_files}})
 217: 	{
 218: 		my $content = $this->get_content($_);
 219: 		unless($content)
 220: 		{
 221: 			$this->_dowarn("Internal Error. $_ file in _files list but no filedata");
 222: 			next;
 223: 		}
 224: 		
 225: 
 226: 		# For whatever reason, the uids and gids get stripped
 227: 		# if they are zero. We'll blank them here to emulate that
 228: 
 229: 		$content->{uid} ||= "";
 230: 		$content->{gid} ||= "";
 231: 
 232: 		$outstr.= pack("A16A12A6A6A8A10", @$content{qw/name date uid gid mode size/});
 233: 		$outstr.= ARFMAG;
 234: 		$outstr.= $content->{data};
 235: 	}
 236: 
 237: 	return $outstr unless $filename;
 238: 
 239: 	unless(open HANDLE, ">$filename")
 240: 	{
 241: 		$this->_dowarn("Can't open filename $filename");
 242: 		return;
 243: 	}
 244: 	print HANDLE $outstr;
 245: 	close HANDLE;
 246: 	return length($outstr);
 247: }
 248: 
 249: sub get_content
 250: {
 251: 	my ($this, $filename) = @_;
 252: 
 253: 	unless($filename)
 254: 	{
 255: 		$this->_dowarn("get_content can't continue without a filename");
 256: 		return;
 257: 	}
 258: 
 259: 	unless(exists($this->{_filehash}->{$filename}))
 260: 	{
 261: 		$this->_dowarn("get_content failed because there is not a file named $filename");
 262: 		return;
 263: 	}
 264: 
 265: 	return $this->{_filehash}->{$filename};
 266: }
 267: 
 268: sub DEBUG
 269: {
 270: 	my($this, $verbose) = @_;
 271: 	$verbose = 1 unless(defined($verbose) and int($verbose) == 0);
 272: 	$this->{_verbose} = $verbose;
 273: 	return;
 274: 
 275: }
 276: 
 277: sub _parseData
 278: {
 279: 	my($this) = @_;
 280: 
 281: 	unless($this->{_filedata})
 282: 	{
 283: 		$this->_dowarn("Cannot parse this archive. It appears to be blank");
 284: 		return;
 285: 	}
 286: 
 287: 	my $scratchdata = $this->{_filedata};
 288: 
 289: 	unless(substr($scratchdata, 0, SARMAG, "") eq ARMAG)
 290: 	{
 291: 		$this->_dowarn("Bad magic header token. Either this file is not an ar archive, or it is damaged. If you are sure of the file integrity, Archive::Ar may not support this type of ar archive currently. Please report this as a bug");
 292: 		return "";
 293: 	}
 294: 
 295: 	while($scratchdata =~ /\S/)
 296: 	{
 297: 
 298: 		if($scratchdata =~ s/^(.{58})`\n//m)		
 299: 		{
 300: 			my @fields = unpack("A16A12A6A6A8A10", $1);
 301: 
 302: 			for(0..@fields)
 303: 			{
 304: 				$fields[$_] ||= "";
 305: 				$fields[$_] =~ s/\s*$//g;
 306: 			}
 307: 
 308: 			my $headers = {};
 309: 			@$headers{qw/name date uid gid mode size/} = @fields;
 310: 
 311: 			$headers->{data} = substr($scratchdata, 0, $headers->{size}, "");
 312: 
 313: 			$this->_addFile($headers);
 314: 		}else{
 315: 			$this->_dowarn("File format appears to be corrupt. The file header is not of the right size, or does not exist at all");
 316: 			return;
 317: 		}
 318: 	}
 319: 
 320: 	return scalar($this->{_files});
 321: }
 322: 
 323: sub _readFromFilename
 324: {
 325: 	my ($this, $filename) = @_;
 326: 
 327: 	my $handle;
 328: 	open $handle, $filename or return;
 329: 	return $this->_readFromFilehandle($handle);
 330: }
 331: 
 332: 
 333: sub _readFromFilehandle
 334: {
 335: 	my ($this, $filehandle) = @_;
 336: 	return unless $filehandle;
 337: 
 338: 	#handle has to be open
 339: 	return unless(fileno $filehandle);
 340: 
 341: 	local $/ = undef;
 342: 	$this->{_filedata} = <$filehandle>;
 343: 	close $filehandle;
 344: 
 345: 	return length($this->{_filedata});
 346: }
 347: 
 348: sub _addFile
 349: {
 350: 	my ($this, $file) = @_;
 351: 
 352: 	return unless $file;
 353: 
 354: 	foreach(qw/name date uid gid mode size data/)
 355: 	{
 356: 		unless(exists($file->{$_}))
 357: 		{
 358: 			$this->_dowarn("Can't _addFile because virtual file is missing $_ parameter");
 359: 			return;
 360: 		}
 361: 	}
 362: 	
 363: 	if(exists($this->{_filehash}->{$file->{name}}))
 364: 	{
 365: 		$this->_dowarn("Can't _addFile because virtual file already exists with that name in the archive");
 366: 		return;
 367: 	}
 368: 
 369: 	push @{$this->{_files}}, $file->{name};
 370: 	$this->{_filehash}->{$file->{name}} = $file;
 371: 
 372: 	return $file->{name};
 373: }
 374: 
 375: sub _initValues
 376: {
 377: 	my ($this) = @_;
 378: 
 379: 	$this->{_files} = [];
 380: 	$this->{_filehash} = {};
 381: 	$this->{_filedata} ="";
 382: 
 383: 	return;
 384: }
 385: 
 386: sub _dowarn
 387: {
 388: 	my ($this, $warning) = @_;
 389: 
 390: 	if($this->{_verbose})
 391: 	{
 392: 		warn "DEBUG: $warning";
 393: 	}
 394: 
 395: 	return;
 396: }
 397: 
 398: 1;
 399: 
 400: 
 401: =head1 NAME
 402: 
 403: Archive::Ar - Interface for manipulating ar archives
 404: 
 405: =head1 SYNOPSIS
 406: 
 407: use Archive::Ar;
 408: 
 409: my $ar = new Archive::Ar("./foo.ar");
 410: 
 411: $ar->add_data("newfile.txt","Some contents", $properties);
 412: 
 413: $ar->add_files("./bar.tar.gz", "bat.pl")
 414: $ar->add_files(["./again.gz"]);
 415: 
 416: my $filedata = $ar->get_content("bar.tar.gz");
 417: 
 418: my @files = $ar->list_files();
 419: $ar->read("foo.deb");
 420: 
 421: $ar->write("outbound.ar");
 422: 
 423: $ar->DEBUG();
 424: 
 425: 
 426: =head1 DESCRIPTION
 427: 
 428: Archive::Ar is a pure-perl way to handle standard ar archives.  
 429: 
 430: This is useful if you have those types of old archives on the system, but it 
 431: is also useful because .deb packages for the Debian GNU/Linux distribution are 
 432: ar archives. This is one building block in a future chain of modules to build, 
 433: manipulate, extrace, and test debian modules with no platform or architecture 
 434: independance.
 435: 
 436: You may notice that the API to Archive::Ar is similar to Archive::Tar, and
 437: this was done intentionally to keep similarity between the Archive::*
 438: modules
 439: 
 440: 
 441: =head2 Class Methods
 442: 
 443: =over 4
 444: 
 445: =item new()
 446: =item new($filename);
 447: =item new(*GLOB, $debug);
 448: 
 449: Returns a new Archive::Ar object.  Without a filename or glob, it returns an
 450: empty object.  If passed a filename as a scalar or in a GLOB, it will attempt
 451: to populate from either of those sources.  If it fails, you will receive 
 452: undef, instead of an object reference. 
 453: 
 454: This also can take a second optional debugging parameter.  This acts exactly
 455: as if DEBUG() is called on the object before it is returned.  If you have a
 456: new() that keeps failing, this should help.
 457: 
 458: =item read($filename)
 459: =item read(*GLOB);
 460: 
 461: This reads a new file into the object, removing any ar archive already
 462: represented in the object.  Any calls to DEBUG() are not lost by reading
 463: in a new file. Returns the number of bytes read, undef on failure.
 464: 
 465: =item read_memory($data)
 466: 
 467: This read information from the first parameter, and attempts to parse and treat
 468: it like an ar archive. Like read(), it will wipe out whatever you have in the
 469: object and replace it with the contents of the new archive, even if it fails.
 470: Returns the number of bytes read (processed) if successful, undef otherwise.
 471: 
 472: =item list_files()
 473: 
 474: This lists the files contained inside of the archive by filename, as an 
 475: array.
 476: 
 477: =item add_files("filename1", "filename2")
 478: =item add_files(["filename1", "filename2"])
 479: 
 480: Takes an array or an arrayref of filenames to add to the ar archive, in order.
 481: The filenames can be paths to files, in which case the path information is 
 482: stripped off.  Filenames longer than 16 characters are truncated when written
 483: to disk in the format, so keep that in mind when adding files.
 484: 
 485: Due to the nature of the ar archive format, add_files() will store the uid,
 486: gid, mode, size, and creation date of the file as returned by stat(); 
 487: 
 488: add_files() returns the number of files sucessfully added, or undef on failure.
 489: 
 490: =item add_data("filename", $filedata)
 491: 
 492: Takes an filename and a set of data to represent it. Unlike add_files, add_data
 493: is a virtual add, and does not require data on disk to be present. The
 494: data is a hash that looks like:
 495: 
 496: $filedata = {
 497:         "data" => $data,
 498:         "uid" => $uid, #defaults to zero
 499:         "gid" => $gid, #defaults to zero
 500:         "date" => $date,  #date in epoch seconds. Defaults to now.
 501:         "mode" => $mode, #defaults to "100644";
 502: }
 503: 
 504: You cannot add_data over another file however.  This returns the file length in 
 505: bytes if it is successful, undef otherwise.
 506: 
 507: =item write()
 508: =item write("filename.ar")
 509: 
 510: This method will return the data as an .ar archive, or will write to the 
 511: filename present if specified.  If given a filename, write() will return the 
 512: length of the file written, in bytes, or undef on failure.  If the filename
 513: already exists, it will overwrite that file.
 514: 
 515: =item get_content("filename")
 516: 
 517: This returns a hash with the file content in it, including the data that the 
 518: file would naturally contain.  If the file does not exist or no filename is
 519: given, this returns undef. On success, a hash is returned with the following
 520: keys:
 521: 
 522: name - The file name
 523: date - The file date (in epoch seconds)
 524: uid  - The uid of the file
 525: gid  - The gid of the file
 526: mode - The mode permissions
 527: size - The size (in bytes) of the file
 528: data - The contained data
 529: 
 530: =item DEBUG()
 531: 
 532: This method turns on debugging.  Optionally this can be done by passing in a 
 533: value as the second parameter to new.  While verbosity is enabled, 
 534: Archive::Ar will toss a warn() if there is a suspicious condition or other 
 535: problem while proceeding. This should help iron out any problems you have
 536: while using the module.
 537: 
 538: =head1 CHANGES
 539: 
 540: =over 4
 541:  
 542: =item Version 1.1
 543: 
 544: Documentation cleanups
 545: 
 546: =item Version 1.0
 547: 
 548: This is the initial public release for CPAN, so everything is new.
 549: 
 550: =head1 TODO
 551: 
 552: A better unit test suite perhaps. I have a private one, but a public one would be
 553: nice if there was good file faking module.
 554: 
 555: Fix / investigate stuff in the BUGS section.
 556: 
 557: =head1 BUGS
 558: 
 559: To be honest, I'm not sure of a couple of things. The first is that I know 
 560: of ar archives made on old AIX systems (pre 4.3?) that have a different header
 561: with a different magic string, etc.  This module perfectly (hopefully) handles
 562: ar archives made with the modern ar command from the binutils distribtuion. If
 563: anyone knows of anyway to produce these old-style AIX archives, or would like
 564: to produce a few for testing, I would be much grateful.
 565: 
 566: There's no really good reason why this module /shouldn't/ run on Win32 
 567: platforms, but admittedly, this might change when we have a file exporting 
 568: function that supports owner and permission writing.
 569: 
 570: If you read in and write out a file, you get different md5sums, but it's still
 571: a valid archive. I'm still investigating this, and consider it a minor bug.
 572: 
 573: =head1 COPYRIGHT
 574: 
 575: Archive::Ar is copyright 2003 Jay Bonci E<lt>jaybonci@cpan.orgE<gt>. 
 576: This program is free software; you can redistribute it and/or modify it under
 577: the same terms as Perl itself.
 578: 
 579: =cut

Comment on Archive::Ar - pure perl way to handle Ar archives (comments encouraged)
Download Code
Replies are listed 'Best First'.
Re: Archive::Ar - pure perl way to handle Ar archives (comments encouraged)
by dash2 (Hermit) on May 04, 2003 at 20:06 UTC
    I don't know if this works - away from my trusty Linuxbox - but it is a good idea. A pure perl Archive::Zip would be a really _brilliant_ idea - great for app packaging.

    A few comments just from reading the source:

    • It's a good idea to stick to one subroutine naming convention: like_this or likeThis. In Perl, like_this is preferred as more readable.
    • You have an OO interface, so why export functions (esp. automatically)? AFAICS Archive::Tar doesn't do this.
    • ... especially as you are exporting functions named the same as Perl core functions (e.g. "read")
    • Hmm... do you come from Java? I don't see anything wrong with using $this instead of $self, it is just a bit unusual.
    • int $verbose == 0: no need. Just and not $verbose would be fine.
    • Similarly, return unless fileno $filehandle; is fine, and avoids non-standard brackets.

    I'm just looking for nitpicks, you understand. It seems like very nice, clear code.

      Heya, thanks for taking a look. In response:
      1. Typically all of the functions anyone would want to use are like_this, and everything that's "internal" is _likeThis or _like_this. I guess it's just a matter of style.
      2. I wasn't sure about the exporting, and learned my mistake several versions out. It's fixed in he one that's in CPAN.
      3. I actually come from C. The $self vs. $this thing
      4. I use $verbose == 0 because calling DEBUG() without parameters turns it on. With a 0 parameter, it turns it off. It needs to be numerically false and existing.
      5. The unless fileno thing is a good suggestion, and i'll update it when I put through some module fixes tonight.
      It needs some compatibility testing. A pure perl Archive::Zip does sound like a good idea. hmmmm....

          --jaybonci

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlcraft [id://248850]
Approved by Three
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (8)
As of 2015-07-31 05:06 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (274 votes), past polls