Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

Re^3: A Better Word Morph Builder

by Limbic~Region (Chancellor)
on Jun 30, 2006 at 17:32 UTC ( #558631=note: print w/ replies, xml ) Need Help??


in reply to Re^2: A Better Word Morph Builder
in thread A Better Word Morph Builder

Ieronim,
I have taken the best parts of all the versions and re-written them in what I believe to be relatively clean code. I have not yet benchmarked this but I expect it to be very close to your current best. Here is a list of features:

  • Command line argument parsing
  • Ability to handle non-dictionary words (configurable)
  • Ability to handle absense of Text::LevenshteinXS (automatic)
  • Ability to use different compiled databases (configurable)
  • Ability to create new compiled databases (configurable)
  • Speed and non-duplicated code
#!/usr/bin/perl use strict; use warnings; use Getopt::Std; use Storable; my %opt; get_args(\%opt); build_db(\%opt) if $opt{b}; my $db = retrieve $opt{d}; my $list = slice_db(\%opt, $db); die $list if ! ref $list; my $path = find_path($opt{f}, $opt{t}, $list); print $path; sub build_db { my $opt = shift @_; my (%data, $db); open(my $dict, '<', $opt->{n}) or die "Unable to open '$opt->{n}' +for reading: $!"; while (<$dict>) { chomp; push @{$data{length()}}, $_; } for my $len (keys %data) { my $end = $#{$data{$len}}; for my $i (0 .. $end - 1) { my $word = $data{$len}[$i]; for my $j ($i + 1 .. $end) { my $test = $data{$len}[$j]; ! exists $db->{$len}{$_} and $db->{$len}{$_} = [] for +$word, $test; if (distance($word, $test) == 1) { push @{$db->{$len}{$word}}, $test; push @{$db->{$len}{$test}}, $word; } } } } store $db, $opt->{d}; } sub distance { no warnings 'redefine'; eval { require Text::LevenshteinXS; }; *main::distance = ! $@ ? \&Text::LevenshteinXS::distance : sub { my ($src, $tgt) = @_; my $cnt; substr($src, $_, 1) ne substr($tgt, $_, 1) && ++$cnt for 0 .. +length($src) - 1; return $cnt; }; &distance; } sub find_path { my ($src, $tgt, $list) = @_; my @src_rel = [$src]; my @tgt_rel = [$tgt]; my %src_off = ($src => 0); my %tgt_off = ($tgt => 0); my ($src_pos, $tgt_pos, %src_cnt, %tgt_cnt) = (0, 0); while (1) { for my $dir (qw/src tgt/) { my ($rel, $off, $opp, $cnt, $pos, $val) = $dir eq 'src' ? (\@src_rel, \%src_off, \%tgt_off, \%src_cnt, \$src_p +os, $src) : (\@tgt_rel, \%tgt_off, \%src_off, \%tgt_cnt, \$tgt_p +os, $tgt); my $end = $#$rel; return "Unable to work with $val" if $cnt->{$$pos}++ > 2; for my $i ($$pos .. $#$rel) { my @prefix = @{$rel->[$i]}; my $search = pop @prefix; push @prefix, $i; for my $word (@{$list->{$search}}) { next if $off->{$word}; push @$rel, [@prefix, $word]; $off->{$word} = $#$rel; return solution( $word, \%src_off, \@src_rel, \%tgt_off, \@tgt_ +rel ) if defined $opp->{$word}; } } $$pos = $end + 1; } } } sub get_args { my $opt = shift @_; my $Usage = qq{Usage: $0 -f <from> -t <to> -d <database> [-b -n <f +ile>] [-l] -h : This help message. -f : The word you want to build the bridge (f)rom -t : The word you want to build the bridge (t)o -d : The name of the (d)atabase file -b : Instruct for database to be (b)uild # optional -f : The file (n)ame to build database from # required with -b -l : Allow (l)leniency in the -f and -t words not being in the + database } . "\n"; getopts('hf:t:d:bn:l', $opt) or die $Usage; die $Usage if $opt->{h}; die $Usage if ! $opt->{f} || ! $opt->{t} || ! $opt->{d}; die $Usage if $opt->{b} && ! $opt->{n}; } sub slice_db { my ($opt, $db) = @_; my $len = length($opt->{t}); return "Unequal length of end-points" if $len != length($opt->{f} +); return "No words of length $len found" if ! exists $db->{$len}; my $list = $db->{$len}; if ($opt->{l}) { for my $word ($opt->{f}, $opt->{t}) { if (! exists $list->{$word}) { for (grep distance($_, $word) == 1, keys %$list) { push @{$list->{$word}}, $_; push @{$list->{$_}}, $word; } return "Unable to work with $word" if ! @{$list->{$wor +d}}; } } } return $list; } sub solution { my ($word, $src_off, $src_rel, $tgt_off, $tgt_rel) = @_; my @tgt_pth = @{$tgt_rel->[$tgt_off->{$word}]}; my @src_pth = @{$src_rel->[$src_off->{$word}]}; my $path = pop @tgt_pth; my $i = @tgt_pth; $path .= "-" . $tgt_rel->[$tgt_pth[$i]]->[-1] while $i--; pop @src_pth; $i = @src_pth; $path = $src_rel->[$src_pth[$i]][-1] . "-$path" while $i--; return $path; }

Cheers - L~R


Comment on Re^3: A Better Word Morph Builder
Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://558631]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others cooling their heels in the Monastery: (8)
As of 2014-10-22 12:29 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    For retirement, I am banking on:










    Results (117 votes), past polls