use strict; use warnings; #************************** USER SETTINGS *************************** # The starting and ending words - all must be the same length. # Arrays are used so multiple words can be provided (for solving the # puzzle in pieces rather than all at once). my @startword = qw( page ); my @endword = qw( book ); # The number of intermediate words allowed, must be > 1. For example, # PAGE -> ___ -> ___ -> ___ -> ___ -> BOOK has 4 intermediate words my $num_intermed_words = 4; # The file containing all valid words (a dictionary). See the # read_known_words sub for the format of the word file my $word_file = '2of4brif_sorted.txt'; #****************************** MAIN ******************************** my $wordlength = length( $startword[0] ); if( $wordlength != length( $endword[0] ) ) { die "Starting and ending words are not the same length!\n"; } # read all valid words from the word (dictionary) file my @all_words; read_known_words( $word_file, $wordlength, \@all_words ); # create a hash of arrays that lists all possible intermediate words # the starting word is level 1 # if there are 4 intermediate words the ending word is level 5 my %possible_words; $possible_words{1} = \@startword; $possible_words{ $num_intermed_words + 2 } = \@endword; my $num_steps = int( $num_intermed_words / 2 ); # start at $startword and work to the midpoint my $num_forsteps = $num_intermed_words % 2 ? $num_steps + 1 : $num_steps; foreach my $level ( 1 .. $num_forsteps ) { print "level $level: "; my $ref2backlevel = exists $possible_words{ $level - 1 } ? $possible_words{ $level - 1 } : [ ]; $possible_words{ $level + 1 } = find_within_one_letter( \@all_words, $possible_words{$level}, $ref2backlevel ); } # start at $endword and work to the midpoint for( my $step = 1, my $level = $num_intermed_words + 2; $step <= $num_steps; $step++, $level-- ) { print "level $level: "; my $ref2backlevel = exists $possible_words{ $level + 1 } ? $possible_words{ $level + 1 } : [ ]; $possible_words{ $level - 1 } = find_within_one_letter( \@all_words, $possible_words{$level}, $ref2backlevel ); } @all_words = (); # clear the dictionary so the memory can be reused # create subsets based on prior results my %final_words; $final_words{1} = \@startword; $final_words{ $num_intermed_words + 2 } = \@endword; # compare the middle two sets of possible words print 'level ', $num_forsteps + 1, ': '; $final_words{ $num_forsteps + 1 } = find_within_one_letter( $possible_words{ $num_forsteps + 1 }, $possible_words{ $num_forsteps + 2 }, [ ] ); # work backwards from the middle to the start word for( my $level = $num_forsteps; $level > 1; $level-- ) { print "level $level: "; $final_words{$level} = find_within_one_letter( $possible_words{$level}, $final_words{ $level + 1 }, [ ] ); } # work forwards from the middle to the end word foreach my $level ( $num_forsteps + 2 .. $num_intermed_words + 1 ) { print "level $level: "; $final_words{$level} = find_within_one_letter( $possible_words{$level}, $final_words{ $level - 1 }, [ ] ); } # Assemble all possible change paths. # This is fine for most puzzles, but it does not scale well!! Perhaps # a blocked approach would be better (list all possible solutions from # the starting word to the midpoint, then all possible solutions from # the midpoint to the ending word). my @paths = @startword; foreach my $level ( 2 .. $num_intermed_words + 2 ) { my @temppaths; foreach my $pathoption ( @paths ) { my @pathwords = split( '-', $pathoption ); foreach my $leveloption ( @{ $final_words{$level} } ) { # if this $leveloption is 1 char diff from the last word # in $pathoption, keep it my $edit_distance = calc_dist( $pathwords[-1], $leveloption ); if( $edit_distance == 1 ) { push( @temppaths, join( '-', $pathoption, $leveloption ) ); } } } @paths = @temppaths; } # Print results my $outfile = join( '', $startword[0], '2', $endword[0], '_in', $num_intermed_words, '.txt' ); open( my $out_fh, '>', $outfile ) or die "Error opening output file:\n$!"; print $out_fh 'options for converting \'', $startword[0], '\' to \'', $endword[0], '\'', "\n"; print $out_fh "number of intermediate words = $num_intermed_words\n"; print $out_fh "dictionary file = $word_file\n"; foreach my $step ( sort { $a <=> $b } keys %final_words ) { print $out_fh "\nstep $step options:\n", join( "\n", @{ $final_words{$step} } ); } print $out_fh "\n\nall possible conversion paths:\n"; foreach my $pathoption ( @paths ) { print $out_fh "$pathoption\n"; } close $out_fh; #*************************** SUBFUNCTIONS *************************** sub read_known_words { my ( $word_file, $word_length, $ref2allwords ) = @_; open( my $wordfile, "<$word_file" ) or die "Error opening $word_file\n$!"; # find the right section and add the words to the array while( defined( my $line = <$wordfile> ) ) { if( $line =~ m/^# LENGTH $word_length\n/ ) { last; } } while( defined( my $line = <$wordfile> ) ) { if( $line =~ m/^# LENGTH/ ) { last; } else { chomp $line; push( @{ $ref2allwords }, $line ); } } close $wordfile; if( scalar @{ $ref2allwords } == 0 ) { die "No words of length $word_length found in $word_file\n"; } } sub find_within_one_letter { my ( $ref2dictionary, $ref2startwords, $ref2backwords ) = @_; print 'dictionary = ', scalar @{ $ref2dictionary }, ', starting options = ', scalar @{ $ref2startwords }; # make the 'backwords' list into a hash for quick lookup my %notin; foreach my $word ( @{ $ref2backwords } ) { $notin{$word} = 1; } # scan through the dictionary list and put words that are within # 1 letter of a start word (but are not in %notin) into @results my @results; DICT_WORD: foreach my $dictword ( @{ $ref2dictionary } ) { if( exists( $notin{$dictword} ) ) { next DICT_WORD; } foreach my $startword ( @{ $ref2startwords } ) { my $edit_distance = calc_dist( $dictword, $startword ); if( $edit_distance == 1 ) { push( @results, $dictword ); next DICT_WORD; } } } print ', found = ', scalar @results, "\n"; return \@results; } sub calc_dist { my ( $word1, $word2 ) = @_; my $dist = 0; for( my $i = 0; $i <= length( $word1 ) - 1; $i++ ) { unless( substr( $word1, $i, 1 ) eq substr( $word2, $i, 1 ) ) { $dist++; } } return $dist; }