use strict; use warnings; use constant DICT => "2of4brif.txt"; use constant TRIE => "2of4brif.trie" ; sub get_cpu { my $t = (times)[0] ; if (@_) { $t = sprintf('%4.2f Secs', $t - $_[0]) ; } ; return $t ; } ; my $took ; print STDERR "Loading dictionary '", DICT, "'" ; $took = get_cpu() ; my $trie = load_dict(DICT) ; print STDERR "... done -- ", get_cpu($took), "\n" ; print STDERR "Writing trie '", TRIE, "'" ; $took = get_cpu() ; my ($now, $was) = write_trie(TRIE, $trie) ; print STDERR "... done -- ", get_cpu($took), "\n" ; print STDERR "Reading trie '", TRIE, "'" ; $took = get_cpu() ; my $check = read_trie(TRIE) ; print STDERR "... done -- ", get_cpu($took), "\n" ; print STDERR "Walking the tries" ; $took = get_cpu() ; walk('', $trie, $check) ; print STDERR "... done -- ", get_cpu($took), "\n" ; use Devel::Size qw(total_size) ; printf STDERR "Original trie is: %3.1fM, new trie is: %3.1fM\n", total_size( $trie)/(1024 * 1024), total_size($check)/(1024 * 1024) ; #========================================================================================= # Loading the Dictionary into the Trie. sub load_dict { my ($d_name) = @_ ; # Constructs a trie from the dictionary. open(my $fh, '<', $d_name) or die("Unable to open dictionary '$d_name': $!\n"); my $trie = undef ; while (<$fh>) { s/\s+$// ; # chomp; my $p = \$trie; for ( split(//, $_), '!' ) { $p = \( $$p->{$_} ) ; } } return $trie ; } #========================================================================================= # Writing the Trie. my %node_map ; my $idx ; my $node_count ; sub write_trie { my ($t_name, $trie) = @_ ; open(my $fh, '>', $t_name) or die("Unable to create trie '$t_name': $!\n"); $node_count = 0 ; $idx = 1 ; # Index 0 reserved for what '!' points at ! %node_map = ('!' => "$idx") ; # Preset end of word node write_node($fh, $trie) ; close $fh ; return ($idx, $node_count) ; } ; sub write_node { my ($fh, $node) = @_ ; $node_count++ ; my @chs = sort keys %$node ; # NB '!' sorts to the front ! my @n = $chs[0] eq '!' ? (shift @chs) : () ; foreach my $ch (@chs) { my $p = write_node($fh, $node->{$ch}) ; push @n, $ch.$p ; } ; my $n = join(' ', @n) ; my $p ; unless (defined($p = $node_map{$n})) { $p = $node_map{$n} = sprintf('%X', ++$idx) ; # Assign new index and record print $fh $n, "\n" ; # Output new node } ; return $p ; } ; #========================================================================================= # Reading the Trie. my @nodes ; sub read_trie { my ($t_name) = @_ ; open(my $fh, '<', $t_name) or die("Unable to open trie '$t_name': $!\n"); @nodes = (undef, {'!' => undef}) ; while (<$fh>) { push @nodes, { map { my ($c, $p) = unpack('AA*', $_) ; ($c, $nodes[hex($p)]) } split } ; # Note hex('') = 0 } ; close $fh ; my $trie = pop @nodes ; @nodes = () ; return $trie ; } ; #========================================================================================= # Walk two tries to check they are identical sub walk { my ($w, $ra, $rb) = @_ ; my @ac = sort keys %$ra ; my @bc = sort keys %$rb ; if (@ac != @bc) { die "node length mismatch \@ '$w': (@ac) vs (@bc)" ; } ; for my $i (0..$#ac) { if ($ac[$i] ne $bc[$i]) { die "node mismatch \@ '$w': (@ac) vs (@bc)" ; } ; } ; foreach my $ch (@ac) { my $ad = $ra->{$ch} ; my $bd = $rb->{$ch} ; if ($ch eq '!') { if (defined($ad)) { die "'!' with defined down ($ad) in 'a' \@ '$w'" ; } ; if (defined($bd)) { die "'!' with defined down ($bd) in 'b' \@ '$w'" ; } ; } else { walk($w.$ch, $ad, $bd) ; } ; } ; } ;