#!/usr/bin/perl -w use strict; # [A-Za-z0-9.] => unchanged # " " => "_" # "-" => "--" # isolated punctation => "-[a-z0-3]" # other => "-" . ( [A-Z4-7] | [_89][\w.] )+ . "-" # punctuation _=control, 8,9=8-bit my $eight1= pack "C*", 0x80..0xBF; my $eight2= pack "C*", 0xC0..0xFF; my $ctrl= pack "C*", 0..31,128; my $punct= q-!"#$%&'()*+,/:;<=>?@[\\]^_`{|}~-; my $code= join "", '0'..'9','a'..'z','A'..'Z','_','.'; sub encode { local( $_ )= shift; s{(-|[^-a-zA-Z0-9. ]+)}{ if( "-" eq $1 ) { "--"; } elsif( 1 == length($1) && 0 <= index($punct,$1) ) { my $p= $1; $p =~ tr.!"#$%&'()*+,/:;<=>?@[\\]^_`{|}~.a-z0-3.; "-$p"; } else { my $g= $1; $g =~ s{([\Q$punct\E])|([$ctrl])|([$eight1])|([$eight2])}{ my $c= $1 || $2 || $3 || $4; if( $1 ) { $c =~ tr.!"#$%&'()*+,/:;<=>?@[\\]^_`{|}~.A-Z4-7.; $c; } elsif( $2 ) { "_" . substr( $code, index($ctrl,$c), 1 ); } elsif( $3 ) { "8" . substr( $code, index($eight1,$c), 1 ); } else { "9" . substr( $code, index($eight2,$c), 1 ); } }ge; "-$g-"; } }ge; tr/ /_/; return $_; } sub decode { local( $_ )= shift; s{(_)|-(-)|-([a-z0-3])|-((?:[A-Z4-7]|[_89][\w.])+)-}{ if( $1 ) { " "; } elsif( $2 ) { "-"; } elsif( ! $4 ) { my $g= $3; $g =~ tr.a-z0-3.!"#$%&'()*+,/:;<=>?@[\\]^_`{|}~.; $g; } else { my $g= $4; $g =~ s{([A-Z4-7])|([_89])([\w.])}{ #{ local( $^W); # warn "else: 1-($1) 2-($2) 3-($3)\n"; #} my $g= $1 || $3; if( $1 ) { $g =~ tr.A-Z4-7.!"#$%&'()*+,/:;<=>?@[\\]^_`{|}~.; $g; } elsif( "_" eq $2 ) { substr( $ctrl, index($code,$g), 1 ); } elsif( "8" eq $2 ) { substr( $eight1, index($code,$g), 1 ); } else { substr( $eight2, index($code,$g), 1 ); } }ge; $g; } }ge; return $_; } my $decode= grep /^-d/, @ARGV; binmode( $decode ? \*STDOUT : \*STDIN ); if( grep /^-e/, @ARGV ) { my $data= do { local( $/ ); }; print $decode ? decode($data) : encode($data); } else { while( ) { s#\r?\n$##; print $decode ? decode($_) : encode($_), "\n"; } }