I wrote a program to solve a word ladder puzzle, and when I finished I wondered how my solution would compare to the approach taken by other Monks. This was a 'just for fun' project, but I found it an interesting challenge. First, a definition of the puzzle:
For example, this puzzle changes 'page' into 'book' in 5 steps (4 intermediate words):
You can get perl out of a monk using only 4 intermediate words, but it's not as fun. :)
My code is posted below. My questions are:
use strict;
use warnings;
#************************** USER SETTINGS ***************************
# The starting and ending words - all must be the same length.
# Arrays are used so multiple words can be provided (for solving the
# puzzle in pieces rather than all at once).
my @startword = qw( page );
my @endword = qw( book );
# The number of intermediate words allowed, must be > 1. For example,
# PAGE -> ___ -> ___ -> ___ -> ___ -> BOOK has 4 intermediate words
my $num_intermed_words = 4;
# The file containing all valid words (a dictionary). See the
# read_known_words sub for the format of the word file
my $word_file = '2of4brif_sorted.txt';
#****************************** MAIN ********************************
my $wordlength = length( $startword[0] );
if( $wordlength != length( $endword[0] ) )
{
die "Starting and ending words are not the same length!\n";
}
# read all valid words from the word (dictionary) file
my @all_words;
read_known_words( $word_file, $wordlength, \@all_words );
# create a hash of arrays that lists all possible intermediate words
# the starting word is level 1
# if there are 4 intermediate words the ending word is level 5
my %possible_words;
$possible_words{1} = \@startword;
$possible_words{ $num_intermed_words + 2 } = \@endword;
my $num_steps = int( $num_intermed_words / 2 );
# start at $startword and work to the midpoint
my $num_forsteps = $num_intermed_words % 2 ?
$num_steps + 1 : $num_steps;
foreach my $level ( 1 .. $num_forsteps )
{
print "level $level: ";
my $ref2backlevel = exists $possible_words{ $level - 1 } ?
$possible_words{ $level - 1 } : [ ];
$possible_words{ $level + 1 } =
find_within_one_letter( \@all_words,
$possible_words{$level},
$ref2backlevel );
}
# start at $endword and work to the midpoint
for( my $step = 1, my $level = $num_intermed_words + 2;
$step <= $num_steps; $step++, $level-- )
{
print "level $level: ";
my $ref2backlevel = exists $possible_words{ $level + 1 } ?
$possible_words{ $level + 1 } : [ ];
$possible_words{ $level - 1 } =
find_within_one_letter( \@all_words,
$possible_words{$level},
$ref2backlevel );
}
@all_words = (); # clear the dictionary so the memory can be reused
# create subsets based on prior results
my %final_words;
$final_words{1} = \@startword;
$final_words{ $num_intermed_words + 2 } = \@endword;
# compare the middle two sets of possible words
print 'level ', $num_forsteps + 1, ': ';
$final_words{ $num_forsteps + 1 } =
find_within_one_letter( $possible_words{ $num_forsteps + 1 },
$possible_words{ $num_forsteps + 2 },
[ ] );
# work backwards from the middle to the start word
for( my $level = $num_forsteps; $level > 1; $level-- )
{
print "level $level: ";
$final_words{$level} =
find_within_one_letter( $possible_words{$level},
$final_words{ $level + 1 }, [ ] );
}
# work forwards from the middle to the end word
foreach my $level ( $num_forsteps + 2 .. $num_intermed_words + 1 )
{
print "level $level: ";
$final_words{$level} =
find_within_one_letter( $possible_words{$level},
$final_words{ $level - 1 }, [ ] );
}
# Assemble all possible change paths.
# This is fine for most puzzles, but it does not scale well!! Perhaps
# a blocked approach would be better (list all possible solutions from
# the starting word to the midpoint, then all possible solutions from
# the midpoint to the ending word).
my @paths = @startword;
foreach my $level ( 2 .. $num_intermed_words + 2 )
{
my @temppaths;
foreach my $pathoption ( @paths )
{
my @pathwords = split( '-', $pathoption );
foreach my $leveloption ( @{ $final_words{$level} } )
{
# if this $leveloption is 1 char diff from the last word
# in $pathoption, keep it
my $edit_distance = calc_dist( $pathwords[-1],
$leveloption );
if( $edit_distance == 1 )
{
push( @temppaths,
join( '-', $pathoption, $leveloption ) );
}
}
}
@paths = @temppaths;
}
# Print results
my $outfile = join( '', $startword[0], '2', $endword[0],
'_in', $num_intermed_words, '.txt' );
open( my $out_fh, '>', $outfile )
or die "Error opening output file:\n$!";
print $out_fh 'options for converting \'', $startword[0], '\' to \'',
$endword[0], '\'', "\n";
print $out_fh "number of intermediate words = $num_intermed_words\n";
print $out_fh "dictionary file = $word_file\n";
foreach my $step ( sort { $a <=> $b } keys %final_words )
{
print $out_fh "\nstep $step options:\n",
join( "\n", @{ $final_words{$step} } );
}
print $out_fh "\n\nall possible conversion paths:\n";
foreach my $pathoption ( @paths )
{
print $out_fh "$pathoption\n";
}
close $out_fh;
#*************************** SUBFUNCTIONS ***************************
sub read_known_words
{
my ( $word_file, $word_length, $ref2allwords ) = @_;
open( my $wordfile, "<$word_file" )
or die "Error opening $word_file\n$!";
# find the right section and add the words to the array
while( defined( my $line = <$wordfile> ) )
{
if( $line =~ m/^# LENGTH $word_length\n/ )
{
last;
}
}
while( defined( my $line = <$wordfile> ) )
{
if( $line =~ m/^# LENGTH/ )
{
last;
}
else
{
chomp $line;
push( @{ $ref2allwords }, $line );
}
}
close $wordfile;
if( scalar @{ $ref2allwords } == 0 )
{
die "No words of length $word_length found in $word_file\n";
}
}
sub find_within_one_letter
{
my ( $ref2dictionary, $ref2startwords, $ref2backwords ) = @_;
print 'dictionary = ', scalar @{ $ref2dictionary },
', starting options = ', scalar @{ $ref2startwords };
# make the 'backwords' list into a hash for quick lookup
my %notin;
foreach my $word ( @{ $ref2backwords } )
{
$notin{$word} = 1;
}
# scan through the dictionary list and put words that are within
# 1 letter of a start word (but are not in %notin) into @results
my @results;
DICT_WORD:
foreach my $dictword ( @{ $ref2dictionary } )
{
if( exists( $notin{$dictword} ) )
{
next DICT_WORD;
}
foreach my $startword ( @{ $ref2startwords } )
{
my $edit_distance = calc_dist( $dictword, $startword );
if( $edit_distance == 1 )
{
push( @results, $dictword );
next DICT_WORD;
}
}
}
print ', found = ', scalar @results, "\n";
return \@results;
}
sub calc_dist
{
my ( $word1, $word2 ) = @_;
my $dist = 0;
for( my $i = 0; $i <= length( $word1 ) - 1; $i++ )
{
unless( substr( $word1, $i, 1 ) eq substr( $word2, $i, 1 ) )
{
$dist++;
}
}
return $dist;
}