Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

dar - pure perl directory archiver

by Ctrl-z (Friar)
on May 06, 2005 at 19:45 UTC ( [id://454795]=sourcecode: print w/replies, xml ) Need Help??
Category: Utility Scripts
Author/Contact Info Ctrl-z
Description: Simple directory archiver akin to tar, but works on Win32. See pod.
#!/usr/local/bin/perl

use strict;
use warnings;

=pod

    dar.pl: directory archiver akin to tar

    This is the result of a procrastination period during which I need
+ed 
    to get files from Windows onto Solaris quickly and easily.

    2 observations:

    1. I ALWAYS use the same args with tar: tar -cvf <DIR>.tar <DIR>
    2. Tar on Windows does not preserve case in filenames

    This code is pure perl. A rough benchmark against tar: 

        61.5Mb directory (6,363 files - 418 directories)

        tar: 66.4Mb took 33 seconds using 1.04Mb RAM
        dar: 61.9Mb took 47 seconds using 2.13Mb RAM

    The Good:
    * cvf/xvf arguments are implied
    * Extraction or Compression is implied from (lack of) parameter su
+ffix
    * Changing the .dar filename changes the output directory 
    * Text file-format, something like HTTP
    * Output is smaller than tar equivalent

    The Bad:
    * Treatment of UNIX links is undefined
    * Does not retain file stats
    * No conversion between DOS line-endings

    The Ugly:
    * Code is not very perlish as I considered rewriting in C (yeah ri
+ght!)

    Tested fleetingly on Linux, Window XP and Solaris - USE AT YOUR OW
+N RISK!
   
=cut

our $FILE_SEP  =  "/";
our $FIELD_SEP = "\t";
our $REC_SEP   = "\n";

our $TYPE_DIR  = "d";
our $TYPE_TEXT = "t";
our $TYPE_BIN  = "b";

our $BUFSIZ    = 1024;
our $CHMOD     = 0770;

our $TAG       = "DAR";
our $VERSION   = 0.1;
our $PLATFORM  = $^O;



exit main(@ARGV);

#
#
#
sub main
{
    usage() unless scalar(@_);
    
    my $arg = shift;
    my $dir = ($arg =~ /\.dar$/) ? 0 : 1;

    extract($arg) if !$dir;
    archive($arg) if  $dir;
    return 0;
}

#
#
#
sub usage
{
    print "Usage:\n";
    print "dar <DIRECTORY>        archive <DIRECTORY> in <DIRECTORY>.d
+ar\n";
    print "dar <DIRECTORY>.dar    extract <DIRECORY>.dar to <DIRECTORY
+>\n";
    print "\n";
    print "To extract to a different directory, rename the .dar file\n
+";
    exit(0);
}

#
# 
#
sub fopen
{
    my $path = shift;
    my $f;
    open($f, $path) or die "$path: $!";
    binmode($f);
    #my $s = select($f);$|++;select($s);
    return $f;
}

#
# Invoked foreach directory entry
#
sub callback
{
    my($file, $prefix, $heap, $out) = @_;
    my($type, $sz, $name, $buf, $in, $entry);

    ($name = $file) =~ s#^$prefix##;

    if(-d $file)
    {
        $type = $TYPE_DIR; 
        $sz   = 0;
    }
    else
    {
        $type = (-B $file) ? $TYPE_BIN : $TYPE_TEXT ;
        $in   = fopen($file);
        while(1)
        {
            my $read = read($in, $buf, $BUFSIZ);
            last if $read == 0;
            # TODO: fix text line-endings...
            print $heap $buf;
            $sz += $read;
        }
    }

    $entry = $type.$FIELD_SEP.$sz.$FIELD_SEP.$name.$REC_SEP;
    print $out   $entry;
    print STDOUT $entry;
}

#
# Traverse directory structure
#
sub traverse
{
    my $dir      = shift;
    my $callback = shift;
    my $full;

    opendir(my $d, $dir) or die $!;
    while(my $f = readdir($d))
    {
        next if $f =~ /^\.\.?$/;
        $full = $dir.$FILE_SEP.$f;
        &$callback($full, @_);
        traverse($full, $callback, @_) if -d $full;
    }
    closedir($d);
}

#
# Create a .dar file
#
sub archive
{
    my $dir = shift;
    my $buf;
    if(-d $dir)
    {
        my $heap = fopen(">$dir.heap");
        my $out  = fopen(">$dir.dar");

        print $out $TAG,$FIELD_SEP,$VERSION,$FIELD_SEP,$PLATFORM,$REC_
+SEP;
        traverse($dir, \&callback, $dir, $heap, $out);
        print $out $REC_SEP;
        close($heap);

        $heap = fopen("$dir.heap");
        while( read($heap, $buf, $BUFSIZ) )
        {
            print $out $buf;
        }
        close $heap;
        unlink("$dir.heap");
    }
    else
    {
        usage();
    }
}

#
# Explode a .dar file
#
sub extract
{
    my $dar     = shift;
    my $dir     = $dar;
       $dir     =~ s#\.dar$##io;
    my @entries = ();
    my ($in, $t, $v, $p, $buf);

    $in         = fopen($dar);
    ($t,$v,$p)  = split /$FIELD_SEP/, <$in>;

    die "'$dar' is not a dar file" unless $t eq $TAG;

    mkdir($dir);
    chmod $CHMOD, $dir;

    # read filepaths header
    while(<$in>)
    {
        chomp;
        last unless length($_);
        push @entries, $_;
    }

    # extract file contents from 'heap'
    foreach(@entries)
    {
        print STDOUT $_,"\n";
        my($type, $sz, $name) = split($FIELD_SEP, $_);
        my $full = $dir.$name;

        if($type eq $TYPE_DIR)
        {
            mkdir($full);
        }
        else
        {
            my $out = fopen(">".$full);
            my $len = 0;
            my $tot = 0;

            while($tot < $sz)
            {
                $len = read($in, $buf, ($sz-$tot));
                # TODO: fix text line-endings...
                print $out $buf;
                $tot += $len;
            }
            close($out);
        }
        chmod $CHMOD, $full;
    }
}
Replies are listed 'Best First'.
Re: dar - pure perl directory archiver
by merlyn (Sage) on May 06, 2005 at 19:57 UTC
      Archive::Tar does work on windows, as does it's tar clone, ptar.

      MJD says "you can't just make shit up and expect the computer to know what you mean, retardo!"
      I run a Win32 PPM repository for perl 5.6.x and 5.8.x -- I take requests (README).
      ** The third rule of perl club is a statement of fact: pod is sexy.

      No, it doesn't work. It flattens the directory structure and dumps all of the files in the root directory. For the same benchmark above it takes around 74 seconds and uses 80Mb of memory to do this.



      time was, I could move my arms like a bird and...
Re: dar - pure perl directory archiver
by chb (Deacon) on May 25, 2005 at 12:07 UTC
    hm, it seems like you have a name clash with a much older project, unfortunately in the same application domain: http://dar.linux.free.fr/

      Thanks for the heads up - looks good. Not as easy to sneak over FTP though ;-)




      time was, I could move my arms like a bird and...

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://454795]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others goofing around in the Monastery: (2)
As of 2024-11-03 03:55 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    chatterbot is...






    Results (13 votes). Check out past polls.