Beefy Boxes and Bandwidth Generously Provided by pair Networks
The stupid question is the question not asked
 
PerlMonks  

Anagram26

by japhy (Canon)
on Apr 29, 2004 at 21:41 UTC ( #349278=sourcecode: print w/ replies, xml ) Need Help??

Category: Text Processing
Author/Contact Info Jeff japhy Pinyan
Description: Yet Another Anagram Finder.
This module converts the string to be anagrammed into a 26-byte representation of itself, where each byte's ASCII value is the number of times a given letter appears in the string. A string like "preinitialization" becomes \2\0\0\0\1\0\0\0\5\0\0\1\0\2\1\1\0\1\0\2\0\0\0\0\0\1.

Then, candidate substring anagrams are converted into the 26-byte representations too, and then into regexes (so \3 becomes [\0-\3] and so on). The main string is matched against the regex, and if it succeeds, the values in that substring are removed from the main string.

Here's code that uses the module:

#!/usr/bin/perl

use Anagram26;
use strict;

my $file = shift;
my $words = build_word_list($file, @ARGV);

add_more_words($words, qw(
  a I
  I'm you're he's she's we're they're
  I'll you'll he'll she'll we'll they'll
  I've you've we've they've
  it's that's what's
  isn't can't won't don't doesn't
));

print "Anagrams of '@ARGV':\n";

while (my $word = get_anagram($words)) {
  print "-> $word\n";
}

# or wait until they're all computed
# and get them in a hash via:
# my $anagram_hash = anagrams($words);

And here's the module. It supports a forking interface (via the get_anagram() function) and a wait-till-the-end interface (via the anagrams() function).
package Anagram26;

use strict;
require Exporter;

@Anagram26::ISA = qw( Exporter );
@Anagram26::EXPORT = qw(
  build_word_list
  add_more_words
  get_anagram
  anagrams
);

my @VAL = ('\0', map "[\\0-\\$_]", 1 .. 255);
my $SPAWNED = 0;
my $CHILD;


sub build_word_list {
  my $file = shift;
  (my $chars = lc join "", @_) =~ tr/a-z//cd;
  my $len = length $chars;
  my $rx = qr/[^$chars]/i;
  my @words = ($chars);

  open WORDS, "< $file" or die "can't read $file: $!";
  while (<WORDS>) {
    chomp;
    next if $len < length or /$rx/;
    push @{ $words[length]{frequency(lc)} }, $_;
  }
  close WORDS;

  for (keys %{ $words[-1] }) {
    warn freq_to_str($_), " => (@{ $words[-1]{$_} })\n";
  }

  return \@words;
}


sub add_more_words {
  my $words = shift;

  for (@_) {
    (my $canon = lc $_) =~ tr/a-z//cd;
    push @{ $words->[length $canon]{frequency($canon)} }, $_;
  }
}


sub get_anagram {
  &spawn if $SPAWNED == 0;

  my $ana = <IN>;

  if (defined $ana) { chomp $ana }
  else { close IN }

  return $ana;
}


sub spawn {
  pipe IN, OUT;
  select((select(OUT), $|=1)[0]);

  $SPAWNED = 1;
  my $pid = fork;

  if ($pid) {
    $CHILD = $pid;
    close OUT;
  }
  elsif (defined $pid) {
    close IN;
    &anagrams;
    exit;
  }
  else {
    die "fork failed: $!";
  }
}


sub anagrams {
  my $words = shift;
  my $chars = $words->[0];
  my %seen;
  my $ana;

  $ana = sub {
    my ($str, $len, $current, $prune) = @_;
    my $rx = freq_to_rx($str);

    if ($len == 0) {
      for (expand(@$current)) {
        my $ana = join " ", sort split;
        if ($seen{$ana}++ == 0 and $SPAWNED) { print OUT "$ana\n" }
        return;
      }
    }

    for (reverse(1 .. $prune)) {
      my $l = $words->[$_];

      for my $w (grep /$rx/, keys %$l) {
        my $p = ($_, $len - $_)[$_ > $len/2];
        push @$current, $l->{$w};
        $ana->(remove($str, $w, $len), $current, $p);
        pop @$current;
      }
    }
  };

  $ana->(frequency($chars), length($chars), [], @$words-1);
  close OUT if $SPAWNED;
  return \%seen;
}


sub frequency {
  my $word = shift;
  my $s = "\0" x 26;
  ++vec($s, ord($_) - ord('a'), 8) for split //, $word;
  return $s;
}


sub remove {
  my ($str, $rem, $len) = @_;
  my $o = 0;
  vec($str, $o++, 8) -= ord, $len -= ord for split //, $rem;
  return ($str, $len);
}


sub freq_to_rx {
  my $rx = join "", @VAL[map ord, split //, shift];
  qr/^$rx$/;
}


sub freq_to_str {
  my $c = 'a';
  join "", map $c++ x ord, split //, shift;
}


sub expand {
  return @{ +shift } if @_ == 1;
  return map { my $f = $_; map "$f $_", expand(@_) } @{ +shift };
}


END {
  kill TERM => $CHILD if defined $CHILD;
}


1;

Comment on Anagram26
Select or Download Code

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://349278]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (9)
As of 2014-07-10 18:01 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    When choosing user names for websites, I prefer to use:








    Results (215 votes), past polls