Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change

Comment on

( #3333=superdoc: print w/replies, xml ) Need Help??
I have taken the best parts of all the versions and re-written them in what I believe to be relatively clean code. I have not yet benchmarked this but I expect it to be very close to your current best. Here is a list of features:
  • Command line argument parsing
  • Ability to handle non-dictionary words (configurable)
  • Ability to handle absense of Text::LevenshteinXS (automatic)
  • Ability to use different compiled databases (configurable)
  • Ability to create new compiled databases (configurable)
  • Speed and non-duplicated code
#!/usr/bin/perl use strict; use warnings; use Getopt::Std; use Storable; my %opt; get_args(\%opt); build_db(\%opt) if $opt{b}; my $db = retrieve $opt{d}; my $list = slice_db(\%opt, $db); die $list if ! ref $list; my $path = find_path($opt{f}, $opt{t}, $list); print $path; sub build_db { my $opt = shift @_; my (%data, $db); open(my $dict, '<', $opt->{n}) or die "Unable to open '$opt->{n}' +for reading: $!"; while (<$dict>) { chomp; push @{$data{length()}}, $_; } for my $len (keys %data) { my $end = $#{$data{$len}}; for my $i (0 .. $end - 1) { my $word = $data{$len}[$i]; for my $j ($i + 1 .. $end) { my $test = $data{$len}[$j]; ! exists $db->{$len}{$_} and $db->{$len}{$_} = [] for +$word, $test; if (distance($word, $test) == 1) { push @{$db->{$len}{$word}}, $test; push @{$db->{$len}{$test}}, $word; } } } } store $db, $opt->{d}; } sub distance { no warnings 'redefine'; eval { require Text::LevenshteinXS; }; *main::distance = ! $@ ? \&Text::LevenshteinXS::distance : sub { my ($src, $tgt) = @_; my $cnt; substr($src, $_, 1) ne substr($tgt, $_, 1) && ++$cnt for 0 .. +length($src) - 1; return $cnt; }; &distance; } sub find_path { my ($src, $tgt, $list) = @_; my @src_rel = [$src]; my @tgt_rel = [$tgt]; my %src_off = ($src => 0); my %tgt_off = ($tgt => 0); my ($src_pos, $tgt_pos, %src_cnt, %tgt_cnt) = (0, 0); while (1) { for my $dir (qw/src tgt/) { my ($rel, $off, $opp, $cnt, $pos, $val) = $dir eq 'src' ? (\@src_rel, \%src_off, \%tgt_off, \%src_cnt, \$src_p +os, $src) : (\@tgt_rel, \%tgt_off, \%src_off, \%tgt_cnt, \$tgt_p +os, $tgt); my $end = $#$rel; return "Unable to work with $val" if $cnt->{$$pos}++ > 2; for my $i ($$pos .. $#$rel) { my @prefix = @{$rel->[$i]}; my $search = pop @prefix; push @prefix, $i; for my $word (@{$list->{$search}}) { next if $off->{$word}; push @$rel, [@prefix, $word]; $off->{$word} = $#$rel; return solution( $word, \%src_off, \@src_rel, \%tgt_off, \@tgt_ +rel ) if defined $opp->{$word}; } } $$pos = $end + 1; } } } sub get_args { my $opt = shift @_; my $Usage = qq{Usage: $0 -f <from> -t <to> -d <database> [-b -n <f +ile>] [-l] -h : This help message. -f : The word you want to build the bridge (f)rom -t : The word you want to build the bridge (t)o -d : The name of the (d)atabase file -b : Instruct for database to be (b)uild # optional -f : The file (n)ame to build database from # required with -b -l : Allow (l)leniency in the -f and -t words not being in the + database } . "\n"; getopts('hf:t:d:bn:l', $opt) or die $Usage; die $Usage if $opt->{h}; die $Usage if ! $opt->{f} || ! $opt->{t} || ! $opt->{d}; die $Usage if $opt->{b} && ! $opt->{n}; } sub slice_db { my ($opt, $db) = @_; my $len = length($opt->{t}); return "Unequal length of end-points" if $len != length($opt->{f} +); return "No words of length $len found" if ! exists $db->{$len}; my $list = $db->{$len}; if ($opt->{l}) { for my $word ($opt->{f}, $opt->{t}) { if (! exists $list->{$word}) { for (grep distance($_, $word) == 1, keys %$list) { push @{$list->{$word}}, $_; push @{$list->{$_}}, $word; } return "Unable to work with $word" if ! @{$list->{$wor +d}}; } } } return $list; } sub solution { my ($word, $src_off, $src_rel, $tgt_off, $tgt_rel) = @_; my @tgt_pth = @{$tgt_rel->[$tgt_off->{$word}]}; my @src_pth = @{$src_rel->[$src_off->{$word}]}; my $path = pop @tgt_pth; my $i = @tgt_pth; $path .= "-" . $tgt_rel->[$tgt_pth[$i]]->[-1] while $i--; pop @src_pth; $i = @src_pth; $path = $src_rel->[$src_pth[$i]][-1] . "-$path" while $i--; return $path; }

Cheers - L~R

In reply to Re^3: A Better Word Morph Builder by Limbic~Region
in thread A Better Word Morph Builder by Limbic~Region

Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":

  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?

    What's my password?
    Create A New User
    and all is quiet...

    How do I use this? | Other CB clients
    Other Users?
    Others meditating upon the Monastery: (10)
    As of 2018-06-18 23:28 GMT
    Find Nodes?
      Voting Booth?
      Should cpanminus be part of the standard Perl release?

      Results (111 votes). Check out past polls.