use strict; use warnings; my @dictionary = slurp( 'dictionary.txt' ); my $translator = Node689350->new( { dict => \@dictionary } ); while (<>) { chomp; my $out = $translator->words_for_phone( $_ ); foreach my $o ( @{ $out } ) { print "$_: $o\n"; } } sub slurp { my $file = shift; open my $fh, '<', $file or die "Can't read '$file': $!"; chomp( my @lines = <$fh> ); close $fh; return @lines; } package Node689350; # Call as __PACKAGE__->new( { dict => [ ... ] } ) # where "..." is every word in the dictionary sub new { my $class = shift; my $self = shift; foreach my $w ( @{ $self->{dict} } ) { my $orig = $w; # save the original word (for output) # turn to all lowercase letters $w =~ tr/A-Z/a-z/; $w =~ tr/a-z//cd; # translate to the number that makes this word $w =~ tr/ejnqrwxdsyftamcivbkulopghz/01112223334455666777888999/; # Store this word as one possible result of this number push @{ $self->{word_for}{$w} }, $orig; } # I don't need the dictionary anymore delete $self->{dict}; return bless $self, $class; } # This function will NOT produce individual digits as "words" sub words_only { my $self = shift; return $self->words_for_phone( @_ ); } sub words_for_phone { my $self = shift; my $out = []; # If this was called from 'words_only', don't produce an individual digit my $words_only; { no warnings 'uninitialized'; my @call = caller 1; $words_only = ( $call[3] eq __PACKAGE__ . '::words_only' ); } while ( defined( my $phone = shift ) ) { # Turn the input phone number to digits only $phone =~ tr/0-9//cd; # Every truncation, longest first my @subphones = reverse map { substr $phone, 0, $_ } 1 .. length $phone; # Every truncation that has an associated word my @word_subphones = grep { exists $self->{word_for}{$_} } @subphones; # If there were no words found, # and we don't have to stick to words only, if ( ! @word_subphones && ! $words_only ) { # Get the first digit as a "word" my ( $w, $new_phone ) = ( $phone =~ m{ \A (.) (.*) \z }xms ); # If this is the last digit, return this as the only production if ( ! length $new_phone ) { return [ $w ]; } # Make the rest of the number into words (NOT DIGITS) foreach my $more ( @{ $self->words_only( $new_phone ) } ) { push @{ $out }, $w . ' ' . $more; } } SUBPHONE: foreach my $n ( @word_subphones ) { # This is the part of the number we didn't translate my $new_phone = substr $phone, length $n; # If there's no more left to translate, don't try if ( ! length $new_phone ) { push @{ $out }, @{ $self->{word_for}{$n} }; next SUBPHONE; } # for every translation we have, foreach my $w ( @{ $self->{word_for}{$n} } ) { # look for translations of the remaining number foreach my $more ( @{ $self->words_for_phone( $new_phone ) } ) { push @{ $out }, $w . ' ' . $more; } } } } return $out; } #### package main; use Test::More; my $sample_dict = <<'END_SAMPLE_DICT'; an blau Bo" Boot bo"s da Fee fern Fest fort je jemand mir Mix Mixer Name neu o"d Ort so Tor Torf Wasser END_SAMPLE_DICT ; my @sample_phones = grep { length } split /\s+/, q{ 112 5624-82 4824 0721/608-4067 10/783--5 1078-913-5 381482 04824 }; my $sample_output = <<'END_SAMPLE_OUTPUT'; 5624-82: mir Tor 5624-82: Mix Tor 4824: Torf 4824: fort 4824: Tor 4 10/783--5: neu o"d 5 10/783--5: je bo"s 5 10/783--5: je Bo" da 381482: so 1 Tor 04824: 0 Torf 04824: 0 fort 04824: 0 Tor 4 END_SAMPLE_OUTPUT ; if (0) { my %output_for = map { $_ => [] } @sample_phones; foreach my $output_line ( split /\n/, $sample_output ) { my ( $phone, $word ) = split /: /, $output_line; push @{ $output_for{ $phone } }, $word; } my $x = Node689350->new( { dict => [ split /\n/, $sample_dict ] } ); plan 'tests' => scalar @sample_phones; foreach my $phone ( @sample_phones ) { my $phone_words = $x->words_for_phone( $phone ); is_deeply( [ sort @{ $phone_words } ], [ sort @{ $output_for{ $phone } } ], "phone $phone" ); } } else { my @dictionary = slurp( 'dictionary.txt' ); my @input = slurp( 'input.txt' ); my @output = slurp( 'output.txt' ); my %output_for = map { $_ => [] } @input; foreach my $output_line ( @output ) { my ( $phone, $word ) = split /: /, $output_line; push @{ $output_for{ $phone } }, $word; } # This appears twice for some reason $output_for{ '2' } = [ '2' ]; plan 'tests' => scalar @input; my $x = Node689350->new( { dict => \@dictionary } ); foreach my $phone ( @input ) { my $phone_words = $x->words_for_phone( $phone ); is_deeply( [ sort @{ $phone_words } ], [ sort @{ $output_for{ $phone } } ], "phone $phone" ); } }