http://www.perlmonks.org?node_id=168061
Category: Utilities
Author/Contact Info /msg educated_foo
Description: This module creates self-extracting compressed programs using a variety of algorithms. The resulting line-noise is a legal Perl program that will decompress and run on any system with a modern Perl. I'd like to put this on CPAN (with a different name, of course), but I thought I'd put it up here first for three reasons: First, I'd like feedback. Second, there's good golfing to be had in shortening the "compressed" compression routines. Third, I enjoyed writing these, and thought someone else might want to add another algorithm (Markov-model-based coding, anyone?).

Caveat: Yes, I know what strict is, and usually use it. No, I didn't in this case.

Update: Please see Compress::SelfExtracting for the current version.

package Compress;
use Digest::MD5 'md5_hex';

############################################################
# Blech.  Try to be compatible...

sub newheap;            # somehow...

# Try for my C++-based heap.
eval q{
    use STL;
    sub newheap {
    return new STL::Heap sub { $_[0]->[0] > $_[1]->[0] };
    }
};
if ($@) {
# Okay, try for Heap::Binary
eval q{
use Heap;
package MyHE;

sub new
{
    my $class = shift;
    $class = ref($class) || $class;
    return bless [undef, shift], $class;
}

sub heap
{
    my $self = shift;
    if (@_) {
    $self->[0] = shift;
    }
    $self->[0];
}

sub thing
{
    my $self = shift;
    $self->[1];
}

sub cmp
{
    my ($this, $that) = @_;
    return $this->[1][0] <=> $that->[1][0];
}

package Myheap;
use Heap::Binary;

sub new
{
    my $class = shift;
    $class = ref $class || $class;
    return bless [0, new Heap::Binary], $class;
}

sub push
{
    my $self = shift;
    $self->[1]->add(MyHE->new($_)) foreach @_;
    $self->[0] += @_;
}

sub pop
{
    my $self = shift;
    $self->[0]--;
    $self->[1]->extract_minimum->thing;
}

sub size
{
    my $self = shift;
    $self->[0];
}

package Compress;
sub newheap
{
    new Myheap;
}
};
}

if ($@) {
    die "Can't find a usable priority queue on your system:\n$@";
}

sub import
{
    my $me = shift;
    %O = (standalone => 1,
      type => 'LZW',
      uu => 1);
    if (@_) {
    my $o0 = shift;
    my $caller = caller;
    if ($o0 eq 'decompress') {
        *{"$caller\::decompress"} = \&decompress;
    } elsif ($o0 eq 'compress') {
        *{"$caller\::compress"} = \&compress;
    } else {
        # put it back.
        unshift @_, $o0;
    }
    # The rest of the arguments are name=value options.
    my %o = @_;
    @O{keys %o} = values %o;
    }
}

sub compress
{
    my $data = shift;
    my %o = @_;
    @O{keys %o} = values %o;
    my $cdata = &{"Compress::$O{type}::compress"}($data, \%O);
    if ($O{uu}) {
    $cdata = pack 'u', $cdata;
    }
    if ($O{standalone}) {
    my $sa = &{"Compress::$O{type}::standalone"}(\%O);
    return $sa.$cdata;
    } else {
    return join("\n",
            "use FilterCompress ".join(', ', map {
            "$_ => '$O{$_}'"
            } grep!/decompress|file|data/,keys %O).";",
            md5_hex($data),
            $cdata);
    }
}

sub decompress
{
    my $data = shift;
    my %o = @_;
    @O{keys %o} = values %o;
    if ($data =~ /^([0-9a-f]+)\n(.*)\z/so) {
    if ($O{uu}) {
        $data = unpack 'u', $2;
    } else {
        $data = $2;
    }
    $data = &{"Compress::$O{type}::decompress"}($data, \%O);
    my $cksum = md5_hex($data);
    die "Bad checksum ($1 != $cksum) for code:\n$data\n--\n"
        unless $cksum eq $1;
    } else {
    die "Filter::Compress: $0 doesn't look compressed:\n$data\n";
    }
    $data;
}

############################################################
package Compress::LZ77;

sub import { }

sub compress
{
    my $str = shift;
    die "Sorry, code too long\n" if length($str) >= 1<<16;
    my @rep;
    my $la = 0;
    while ($la < length $str) {
    my $n = 1;
    my ($tmp, $p);
    $p = 0;
    while ($la + $n < length $str
           && $n < 255
           && ($tmp = index(substr($str, 0, $la),
                substr($str, $la, $n),
                $p)) >= 0) {
        $p = $tmp;
        $n++;
    }
    --$n;
    my $c = substr($str, $la + $n, 1);
    push @rep, [$p, $n, ord $c];
    $la += $n + 1;
    }
    join('', map { pack 'SCC', @$_ } @rep);
}

sub decompress
{
    my $str = shift;
    my $ret = '';
    while (length $str) {
    my ($s, $l, $c) = unpack 'SCC', $str;
    $ret .= substr($ret, $s, $l).chr$c;
    $str=substr($str,4);
    }
    $ret;
}

sub standalone
{
    my $O = shift;
    my $ret = <<'EOC';
BEGIN{open$^W=0;$/=$!;$_=<0>;s/^.*?}\n//s;#UUDEC#while(length)
{($o,$l,$c)=unpack'SCC',$_;$r.=substr($r,$o,$l).chr$c;$_=substr
$_,4}eval$r;exit}
EOC
    if ($O->{uu}) {
    $ret =~ s/#UUDEC#/\$_=unpack'u',\$_;/;
    } else {
    $ret =~ s/#UUDEC#//;
    }
    $ret;
}

############################################################
package Compress::LZSS;

sub import { }

sub compress
{
    my $str = shift;
    die "Sorry, code too long\n" if length($str) >= 1<<16;
    my @rep;
    my $la = 0;
    while ($la < length $str) {
    my $n = 1;
    my ($tmp, $p);
    $p = 0;
    while ($la + $n < length $str
           && $n < 255
           && ($tmp = index(substr($str, 0, $la),
                substr($str, $la, $n),
                $p)) >= 0) {
        $p = $tmp;
        $n++;
    }
    --$n;
    if ($n < 2) {
        push @rep, "\0".substr($str, $la, 1);
        ++$la;
    } else {
        push @rep, pack 'CS', $n, $p;
        $la += $n;
    }
    }
    join('', @rep);
}

sub decompress
{
    my $str = shift;
    my $ret = '';
    my $o = 0;
    while ($o < length $str) {
    my $n = unpack 'C', substr($str, $o);
    if ($n == 0) {
        $ret .= substr($str, $o + 1, 1);
        $o += 2;
    } else {
        my $p = unpack 'S', substr($str, $o + 1);
        $ret .= substr($ret, $p, $n);
        $o += 3;
    }
    }
    $ret;
}

sub standalone
{
    my $ret = <<'END';
BEGIN{open$^W=$o=0;$/=$!;$_=<0>;s/^.*?}\n//s;#UUDEC#while
($o<length){$n=unpack'C',substr$_,$o++;$r.=($n?substr($r,
(unpack'S',substr$_,$o++),$n):(substr$_,$o,1));$o++}eval
$r;exit}
END
    if (shift->{uu}) {
    $ret =~ s/#UUDEC#/\$_=unpack'u',\$_;/
    } else {
    $ret =~ s/#UUDEC#//;
    }
    $ret;
}

############################################################
package Compress::LZW;

my (%LZ, %UNLZ, %SA);

sub import
{
    %LZ = (12 => sub {
         my $v = '';
         for my $i (0..$#_) {
             vec($v, 3*$i, 4) = $_[$i]/256;
             vec($v, 3*$i+1, 4) = ($_[$i]/16)%16;
             vec($v, 3*$i+2, 4) = $_[$i]%16;
         }
         $v;
         },
         16 => sub { pack 'S*', @_ });
    %UNLZ = (12 => sub {
           my $code = shift;
           my @code;
           foreach (0..length($code)*2/3 - 1) {
               push @code, (vec($code, 3*$_, 4)<<8)
                     | (vec($code, 3*$_+1, 4)<<4)
                 | (vec($code, 3*$_+2, 4));
           }
           @code;
           },
           16 => sub { unpack 'S*', shift; });
    # Now the self-extracting glop:
    my $ANY_16 = <<'EOC';
BEGIN{open$^W=0;$/=$!;%d=map{($_,chr)}0..($n=255);($s=<0>)
=~s/^.*?}\n//s;eval join'',map{($C,$P)=@d{$_,$p};$p=$_;if
(!defined$P){$d{$p}}elsif(defined$C){$d{++$n}=$P.substr$C,0,
1;$C}else{$d{++$n}=$P.substr$P,0,1}}unpack'S*',#UUDEC#;exit}
EOC
    (my $u16 = $ANY_16) =~ s/#UUDEC#/unpack'u',\$s/;
    (my $n16 = $ANY_16) =~ s/#UUDEC#/\$s/;
    my $ANY_12 = <<'EOC';
BEGIN{open$^W=0;$/=$!;%d=map{($_,chr)}0..($n=255);($s=<0>)=~s/^.*?}\n/
+/s;
#UUDEC#eval join'',map{($C,$P)=@d{$_,$p};$p=$_;if(!defined$P){$C}elsif
(defined$C){$d{++$n}=$P.substr$C,0,1;$C}else{$d{++$n}=$P.substr$P,0,1}
+}map{
vec($s,3*$_,4)<<8|vec($s,3*$_+1,4)<<4|vec$s,3*$_+2,4}0..length($s)*2/3
+-1;exit}
EOC
    (my $u12 = $ANY_12) =~ s/#UUDEC#/\$s=unpack'u',\$s;/;
    (my $n12 = $ANY_12) =~ s/#UUDEC#//;
    %SA = ('12u0' => $n12, '12u1' => $u12, '16u0' => $n16, '16u1' => $
+u16);
}

sub compress
{
    my ($str, $O) = @_;
    my $p = ''; my %d = map{(chr $_, $_)} 0..255;
    my @o = ();
    my $ncw = 256;
    for (split '', $str) {
    if (exists $d{$p.$_}) {
        $p .= $_;
    } else {
        push @o, $d{$p};
        $d{$p.$_} = $ncw++;
        $p = $_;
    }
    }
    push @o, $d{$p};
    if ($O->{bits} != 16 && $ncw < 1<<12) {
    $O->{bits} = 12;
    return $LZ{12}->(@o);
    } elsif ($ncw < 1<<16) {
    $O->{bits} = 16;
    return $LZ{16}->(@o);
    } else {
    die "Sorry, code-word overflow";
    }
}

sub decompress
{
    my %d = (map{($_, chr $_)} 0..255);
    my $ncw = 256;
    my $ret = '';
    my ($str, $O) = @_;
    my ($p, @code) = $UNLZ{$O->{bits}}->($str);
    $ret .= $d{$p};
    for (@code) {
    if (exists $d{$_}) {
        $ret .= $d{$_};
        $d{$ncw++} = $d{$p}.substr($d{$_}, 0, 1);
    } else {
        my $dp = $d{$p};
        warn unless $_ == $ncw++;
        $ret .= ($d{$_} = $dp.substr($dp, 0, 1));
    }
    $p = $_;
    }
    $ret;
}

sub standalone
{
    my $o = shift;
    return $SA{"$o->{bits}u$o->{uu}"};
}

############################################################
package Compress::Huffman;

# Compute bit-codes from tree.
sub tree2str
{
    my ($str, $x) = @_;
    if (!defined $x->[2]) {
    $rep{$x->[1]} = $str;
    } else {
    tree2str($str.'0', $x->[1]);
    tree2str($str.'1', $x->[2]);
    }
}

sub compress
{
    my %p = ();
    my $s = shift;
    my @chars;
    if (ref $s eq 'ARRAY') {
    @chars = @$s;
    } else {
    @chars = split '', $s;
    }
    for (@chars) {
    $p{$_}++;
    }
    my $h = Compress::newheap();
    $h->push(map { [ $p{$_}, $_, undef ] } keys %p);
    while ($h->size > 1) {
    my ($x, $y) = ($h->pop, $h->pop);
    $h->push( [$x->[0] + $y->[0], $x, $y] );
    }
    local %rep = ();        # gets filled in by tree2str.
    tree2str('', $h->pop);
    if ($::DEBUG) {
    foreach (sort keys %rep) {
        print STDERR "$_ <- $rep{$_}\n";
    }
    }
    my $data = '';
    for (@chars) {
    $data .= $rep{$_};
    }
    my $nbits = length($data);
    my $tree = pack 'CL', scalar keys %rep, $nbits;
    print STDERR "len = ", scalar keys %rep, "nbits = $nbits\n" if $::
+DEBUG;
    while (my ($k, $v) = each %rep) {
    die "Sorry, Huffman code too long ($v)\n" if length $v >= 32;
    $tree .= pack('Cb32', ord($k), '0'x(31 - length $v).'1'.$v);
    }
    $data = pack 'b*', $data.('0'x((8 - $nbits%8) % 8));
    print STDERR length($data), " bytes of data\n" if $::DEBUG;
    $tree.$data;
}

sub decompress
{
    my $str = shift;
    my ($len, $nbits) = unpack 'CL', $str;
    $str = substr($str, 5);
    print STDERR "len = $len, nbits = $nbits\n" if $::DEBUG;
    my %rep;
    for (0..$len - 1) {
    my ($c, $x) = unpack 'Cb32', substr($str, 5*$_, 5);
    $x =~ s/^0*1//;
    die "Duplicate: $x -> $c" if exists $rep{$x};
    $rep{$x} = chr $c;
    }
    if ($::DEBUG) {
    foreach (sort keys %rep) {
        print STDERR "$_ <- $rep{$_}\n";
    }
    }
    $str = substr($str, 5*$len);
    print STDERR length $str, " bytes of data\n" if $::DEBUG;
    my $data = unpack "b$nbits", $str;
    my $ret = '';
    my $n;
    while (length $data > 0) {
    $n = 1;
    while (!exists($rep{substr($data, 0, $n)})) {
        $n++;
        die $n if $n > length $data;
    }
    $ret .= $rep{substr($data, 0, $n)};
    $data = substr($data, $n);
    }
    $ret;
}

sub standalone
{
    my $ret = <<'EOC';
BEGIN{open 0;$/=$!;($s=<0>)=~s/^.*?}\n//s;#UUDEC#($l,$L)=unpack'CL',$s
+;$s=
substr$s,5;for(1..$l){($c,$x)=unpack'Cb32',$s;$x=~s/^0*1//;$r{$x}=chr$
+c;$s
=substr$s,5}$_=unpack"b$L",$s;while(length){$n=1;1while!exists$r{subst
+r$_,
0,$n++};$r.=$r{substr$_,0,--$n};$_=substr$_,$n}eval$r;exit}
EOC
    if (shift->{uu}) {
    $ret =~ s/#UUDEC#/\$s=unpack'u',\$s;/;
    } else {
    $ret =~ s/#UUDEC#//;
    }
    $ret;
}

############################################################
package Compress::BWT;
# Burrows-Wheeler Transform block-sorting compression (i.e. bzip).
#
# This implementation is a straightforward translation of this Dr
# Dobbs' piece: http://www.ddj.com/documents/s=957/ddj9609f/.  Also
# see
# http://gatekeeper.dec.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
# for the original, which IMO better describes the block-sorting.
#

import Compress::Huffman;

sub import { }

##############################
# BWT block-sorting

sub BLKSIZE() { 16*1024 }    # unused, so this sucks for big files.
sub QSORT_SIZE() { 5 }        # when to use qsort instead of counting 
+sort.
sub _counting_sort
{
    my ($p, $o) = @_;
    if ($::DEBUG) {
    ++$calls;
    if ($o > $maxdepth) {
        $maxdepth = $o;
        print STDERR "$o\r";
    }
    }
    my @a;
    foreach (@$p) {
    push @{$a[ord substr($s, $_+$o, 1)]}, $_;
    }
    my @ret;
    foreach (@a) {
    next unless ref $_;
    if (@$_ == 1) {
        push @ret, $_->[0];
    } elsif (@$_ < QSORT_SIZE) {
        my $tmp = $o+1;
        push @ret, sort { substr($s, $a+$tmp).substr($s, 0, $a+$o) cmp
                  substr($s, $b+$tmp).substr($s, 0, $b+$o) }
        @$_;
    } else {
        push @ret, _counting_sort($_, $o+1);
    }
    }
    @ret;
}

sub counting_sort
{
    local $s = shift;
    local $^W = 0;
    my $l = length $s;
    $s .= $s;
    local $maxdepth = 0;
    local $calls = 0;
    my @ret = _counting_sort([0..$l-1], 0);
    print STDERR "Counting sort max depth $maxdepth, calls = $calls\n"
    if $::DEBUG;
    @ret;
}

sub BWT
{
    my $str = shift;
    my $slow;
    if (length $str > BLKSIZE) {
    $slow = 1;
    warn "BWT will be very slow for ", length $str, " bytes\n";
    }
    my $d = 0;
    my ($pi, @L);
    my @posns = counting_sort($str);
    # This is quite a bit slower than counting sort.
#     my @posns = sort { substr($str, $a).substr($str, 0, $a-1) cmp
#                substr($str, $b).substr($str, 0, $b-1) }
#     (0 .. length($str) - 1);
    my $i;
    foreach $i (0..$#posns) {
    if ($posns[$i] == 0) {
        $pi = $i;
    }
    push @L, ord(substr($str, $posns[$i] - 1, 1));
    }
    ($pi, \@L);
}

sub unBWT
{
    my ($pi, $L) = @_;
    my (@P, @C);
    my @ret;
    print STDERR "length = ".@$L."\n" if $::DEBUG;
    for (0..$#{$L}) {
    my $c = $L->[$_];
    $P[$_] = $C[$c] || 0;
    $C[$c]++;
    }
    my $sum = 0;
    {
    no warnings;
    for (@C) {
        $sum += $_;
        $_ = $sum - $_;
    }
    }
    for (reverse 0..$#{$L}) {
    my $c = $L->[$pi];
    $ret[$_] = $c;
    $pi = $P[$pi] + $C[$c];
    }
    die unless @ret == @$L;
    return \@ret;
}

##############################
# Move-to-front coder

sub MTF
{
    my $L = shift;
    my @ret;
    my @c = 0..255;
    foreach (@$L) {
    for my $i (0..$#c) {
        if ($c[$i] == $_) {
        push @ret, $i;
        splice @c, $i, 1;
        unshift @c, $_;
        last;
        }
    }
    }
    \@ret;
}

sub unMTF
{
    my $L = shift;
    my @ret;
    my @c = 0..255;
    foreach (@$L) {
    my $x = $c[$_];
    push @ret, $x;
    splice @c, $_, 1;
    unshift @c, $x;
    }
    \@ret;
}

##############################
# Run-length coder

sub RLE
{
    my @ret;
    my $l = shift;
    my $c = $l->[0];
    my $n = 1;
    foreach (@{$l}[1..$#{$l}]) {
    if ($c != $_) {
        push @ret, $c, $n;
        $n = 1;
        $c = $_;
    } else {
        if (++$n > 255) {
        push @ret, $c, 255;
        $n = 1;
        }
    }
    }
    push @ret, $c, $n;
    if ($::DEBUG) {
    my $i = 0;
    while ($i < @ret) {
        print STDERR "$ret[$i], $ret[$i+1]\n";
        $i += 2;
    }
    }
    \@ret;
}

sub unRLE
{
    my @l = @{shift @_};
    my @ret;
    die unless @l % 2 == 0;
    my ($c, $n);
    while (@l) {
    $c = shift @l;
    $n = shift @l;
    print STDERR "$c, $n\n" if $::DEBUG;
    push @ret, $c for 1..$n;
    }
    \@ret;
}

##############################
# Main compression routines

sub compress
{
    my ($str, $O) = @_;
    print STDERR "BWT..." if $::DEBUG;
    my ($pi, $L) = BWT($str);
    print STDERR "\nMTF..." if $::DEBUG;
    $L = MTF($L);
    print STDERR "\nRLE..." if $::DEBUG;
    $L = RLE($L);
    print STDERR "\nHuffman..." if $::DEBUG;
    $L = Compress::Huffman::compress(pack('L', $pi)
                         .join('', map { chr } @$L),
                         $O);
    print STDERR "done\n" if $::DEBUG;
    return $L;
}

sub decompress
{
    my $str = shift;
    # Huffman decode to a string:
    $str = Compress::Huffman::decompress($str);
    my $pi = unpack 'L', $str;
    $str = [map {ord} split '', substr($str, 4)];
    $str = unRLE($str);
    $str = unMTF($str);
    $str = unBWT($pi, $str);
    join '', map { chr } @$str;
}

# Oh, yeah.
sub standalone
{
    my $ret = <<'EOC';
BEGIN{open$^W=0;$/=$!;($s=<0>)=~s/^.*?}\n//s;#UUDEC#($l,$L)=unpack'CL'
+,$s;
$s=substr$s,5;for(1..$l){($c,$x)=unpack'Cb32',$s;$x=~s/^0*1//;$r{$x}=c
+hr$c;
$s=substr$s,5}$_=unpack"b$L",$s;while(length){$n=1;1while!exists$r{sub
+str
$_,0,$n++};$r.=$r{substr$_,0,--$n};$_=substr$_,$n}$P=unpack'L',$r;@l=m
+ap{ord
}split'',substr$r,4;while(@l){push@R,(shift@l)x shift@l}@c=0..255;for(
+@R){
push@M,$x=$c[$_];splice@c,$_,1;unshift@c,$x}for(0..$#M){$c=$M[$_];$P[$
+_]=
$C[$c]++}for(@C){$s+=$_;$_=$s-$_}for(reverse 0..$#M){$c=$M[$P];$r[$_]=
+$c;
$P=$P[$P]+$C[$c]}eval join'',map{chr}@r;exit}
EOC
    if (shift->{uu}) {
    $ret =~ s/#UUDEC#/\$s=unpack'u',\$s;/;
    } else {
    $ret =~ s/#UUDEC#//;
    }
    $ret;
}

package Filter::Compress;
import Compress::LZW;

1;
__END__
=head1 NAME

Compress -- Compress your code.

=head1 SYNOPSIS

  use Compress 'compress', OPTIONS;

  compress $data, OPTIONS ... ;

=head1 DESCRIPTION

C<Compress> allows you to create pure-Perl self-extracting scripts
using a variety of compression algorithms.  These scripts will then
run on any system with a recent version of Perl.  The module exports a
function C<compress()> that takes an input program and configuration
parameters, and returns an equivalent compressed program.

=head2 Options

=over

=item type

C<Compress> currently supports five types of compression.  LZW
and LZSS are probably the most useful.

=over

=item BWT -- Burrows-Wheeler Transform (bzip)

B<Note>: BWT currently only uses a single block, and is unusably slow
on files larger than about 12 kilobytes.  Furthermore, the standalone
decompression code is significantly larger than that for other
methods.

=item LZW -- Lempel-Ziv 78-based algorithm

=item LZ77 -- Lempel-Ziv 77

=item LZSS -- a variant of LZ77

=item Huffman -- Huffman character-frequency coding

=back

=item standalone (default: yes)

Create a self-extracting script, rather than one using
C<FilterCompress> (included in the POD appendices for the moment).

=item uu (default: no)

Create a uucompressed script.  The result will be one third larger,
but will still be runnable, and will be 8-bit clean.  Interestingly,
novice programmers may find it hard to distinguish between the
decompression code and the UU-encoded data.

=back 

=head1 NOTES

The Huffman (and hence BWT) algorithms require a priority queue
implementation.  One possibility is the C<Heap> module, available on
CPAN.

=head1 AUTHOR

Sean O'Rourke, E<lt>seano@cpan.orgE<gt>

Bug reports welcome, patches even more welcome.

=head1 COPYRIGHT

Copyright (C) 2002 Sean O'Rourke.  All rights reserved, some wrongs
reversed.  This module is distributed under the same terms as Perl
itself.  Let me know if you actually find it useful.

=head1 APPENDIX: FilterCompress

  package FilterCompress;
  use Compress 'decompress';
  
  my %O;
  
  sub import {
      my $me = shift;
      %O = @_;
  }
  
  use Filter::Simple sub {
      # XXX: I don't know why this gets called with empty data, but th
+at
      # really pisses decompress() off.
      $_ = decompress($_, %O) if length;
  };
  
  1;

=head1 APPENDIX: compress.pl

  #!/usr/bin/env perl
  
  use Getopt::Long;
  use Compress 'compress';
  use strict;
  
  sub usage
  {
      print STDERR <<END;
  Usage: $0 [OPTIONS] [file]
      --type LZW | LZSS | LZ77 | BWT | Huffman
      --standalone
      --uu
  END
      exit 1;
  }
  
  my %types;
  for (qw/BWT LZW LZSS LZ77 Huffman/) {
      $types{$_} = $types{lc $_} = $_;
  }
      
  my %O = (standalone => 1, type => 'LZW', bits => 12, uu => 0);
  GetOptions(\%O, 'type:s', 'standalone', 'uu', 'bits:i', 'help')
      or usage();
  $O{type} = $types{$O{type}};
  usage() if $O{help} || !$O{type};
  
  undef $/;
  my $data = <>;
  print compress $data, %O;
  exit;  

=head1 APPENDIX: simple test program

  #!/usr/bin/env perl
  
  use Compress 'compress';
  require FilterCompress;
  use strict;
  $|++;
  
  my $test_output = "Hello, compressed world";
  my $test = <<END;
  #!/usr/bin/perl
  use strict;
  
  print STDOUT '$test_output';
  END
  
  my $tmp = "test.$$.plz";
  my $n = 1;
  for my $type (qw/BWT LZW LZSS LZ77 Huffman/) {
      for my $uu (0, 1) {
      for my $sa (0, 1) {
          if ($type eq 'LZW') {
          for my $bits (12, 16) {
              test_it(type => $type, uu => $uu, bits => $bits,
                  standalone => $sa);
          }
          } else {
          test_it(type => $type, uu => $uu, standalone => $sa);
          }
      }
      }
  }
  
  unlink "test.$$";
  
  sub test_it
  {
      open(O, ">$tmp") or die "$tmp: $!";
      print O compress $test, @_;
      close O;
      open I, "perl -w $tmp |";
      my $res = <I>;
      close I or die "perl -w $tmp: $! ($?)";
      die "bad output $tmp.out: '$res'\n" unless $res eq $test_output;
      unlink "$tmp";
      print "ok $n\n";
      ++$n;
  }

=cut

__END__

# You don't see this.

# Burrows-Wheeler decompressor, saved with a few comments.  Otherwise,
# this would be completely incomprehensible in no-time flat.

BEGIN{open$^W=0;$/=$!;($s=<0>)=~s/^.*?}\n//s;#UUDEC#($l,$L)=unpack'CL'
+,$s;$s=substr$s,5;for(1..$l){($c,$x)=unpack'Cb32',$s;$x=~s/^0*1//;$r{
+$x}=chr$c;$s=substr$s,5}$_=unpack"b$L",$s;while(length){$n=1;1while!e
+xists$r{substr($_,0,$n++)};$r.=$r{substr$_,0,--$n};$_=substr$_,$n
}$P=unpack'L',$r;        # get $pi.
@l=map{ord}split'',substr($r,4);
# un-RLE:
while(@l){push@R,(shift@l)x shift@l}# un-MTF:
@c=0..255;for(@R){push@M,$x=$c[$_];splice@c,$_,1;unshift@c,$x}# un-BWT
+:
for(0..$#M){$c=$M[$_];$P[$_]=$C[$c]++||0}for(@C){$s+=$_;$_=$s-$_;
}for(reverse 0..$#M){$c=$M[$P];$r[$_]=$c;$P=$P[$P]+$C[$c]
}eval join'',map{chr}@r;exit}
Replies are listed 'Best First'.
Re: Self-extracting compressed code!
by Anonymous Monk on May 22, 2002 at 11:48 UTC
    This is a really cool post.

    But TMTOWTDI:

    use Text::Quote; Text::Quote->quote_prop('compress_at',1); undef $/; print "use Text::Quote;\nundef\$/;\nprint eval(<DATA>);\n__DATA__\n".T +ext::Quote->quote(<>);
    Which is not meant to detract from your very cool code. This one cheats for sure. :-)
      Apologies.

      I wrote this.

      The reason its an Anony Monk is just so stupid Im not even going to tell you what happened.

      But to follow up, id gladly make Text::Quote fallback to your module if Compress::ZLib wasn't handy.

      Yves / DeMerphq
      ---
      Writing a good benchmark isnt as easy as it might look.

        Cool. I hadn't seen Text::Quote, much less realized from its unassuming name that it did compression. I'm curious -- what problem or itch inspired you?

        As for using the self-extracting code as a fallback, the algorithms requiring Huffman coding (bzip and Huffman) are probably not a good choice, since they require a priority queue module, which you're less likely to have than Compress::Zlib. I've only done "compressed", stand-alone versions of the decompression routines. Indeed, I hadn't thought about it as a zlib alternative on the compression end of things.

        The idea is that while I may have a bunch of strange modules on my system, the system where my script is running may just have the bare bones. This way I can compress the script, then use it elsewhere without adding any new module dependencies (like those Stuffit self-extracting archives that were so popular before all compression software was free). So if you changed T::Q to use this, you'd probably want to give the user the option of using it even if they happen to have Compress::Zlib on the system where T::Q is running.

        /s