Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

comment on

( #3333=superdoc: print w/replies, xml ) Need Help??

Greets,

Here's another entry for you, Limbic~Region. I believe it meets your criteria of checking the longest strings first.

#!/usr/bin/perl use strict; use warnings; use Inline 'C'; for my $file (@ARGV) { open(my $fh, '<', $file) or die "Unable to open '$file' for readin +g: $!"; while (<$fh>) { my ($set) = $_ =~ /^(\w+)/; #print "INPUT: $set\n"; powerset($set, length($set)); } } __END__ __C__ /* Generate a powerset for the set of all unique lowercase letters det +ected in * a string of length [len]. */ void powerset(char *set, unsigned len); /* Set up. */ void init(void); /* Start with a number. Print it's associated powerset if we haven't +seen it * before. Iterate over the bits, negating one each time and recurse. */ void reduce(long fingerprint); /* Print the letter associated with each bit in a 32-bit bitmask */ void decode_and_print(long fingerprint); /* 0x4000000 is 2**26. This array has one slot for every possible unor +dered * set of unique lower-case letters. * * Note: if memory consumption were a concern, this would be implement +ed using * a bit vector rather than an array of char. */ static char seen[0x4000000]; /* Map of individual letters to a bit mask with a single bit set. */ static long letter_masks[128]; void powerset(char *set, unsigned len) { long fingerprint = 0; unsigned i; /* call init the first time this function is invoked */ static bool init_flag = FALSE; if (!init_flag) { init(); init_flag = TRUE; } /* generate a bitmask representing this set */ for (i = 0; i < len; i++) { long bit = letter_masks[ set[i] ]; if (bit == 0) continue; fingerprint |= bit; } /* reduce the bitmask bit by bit */ reduce(fingerprint); } void init(void) { char letter; unsigned bitshift; /* zero out the "seen" array and the "letter_masks" array */ memset(seen, 0, sizeof(seen)); memset(letter_masks, 0, sizeof(letter_masks)); /* skip the null set, 'cause Limbic~Region sez so */ seen[0] = 1; /* assign one bit mask for each letter (assumes contiguity of a-z) + */ for (letter = 'a', bitshift = 0; letter <= 'z'; letter++, bitshift++ ) { letter_masks[letter] = 0x1 << bitshift; } return; } void reduce(long fingerprint) { unsigned bit; /* if we've seen this set, we've seen all its subsets, so bail */ if (seen[fingerprint]) return; /* first time we've seen this set, so print it */ seen[fingerprint] = 1; decode_and_print(fingerprint); /* iterate over the bits in the fingerprint */ for (bit = 1; bit <= 26; bit++) { long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { /* turn off one bit and recurse */ reduce( fingerprint ^ single_bit_mask ); } } } void decode_and_print(long fingerprint) { unsigned bit; char letter; /* shift one bit left, increment one letter */ for (bit = 1, letter = 'a'; bit <= 26; bit++, letter++ ) { /* print the letter if its associated bit is on */ long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { putc(letter, stdout); } } /* finish up with a newline */ putc('\n', stdout); }
--
Marvin Humphrey
Rectangular Research ― http://www.rectangular.com

In reply to Re: Powerset short-circuit optimization by creamygoodness
in thread Powerset short-circuit optimization by Limbic~Region

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others romping around the Monastery: (4)
    As of 2020-10-27 12:31 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?
      My favourite web site is:












      Results (256 votes). Check out past polls.

      Notices?