#!/usr/bin/env perl # # Generate a string with the longest non-repeating subsequences possible. # Include overlaps. # # Input N, size of the alphabet. # Input T, tuples (pairs, triples, quadruples, etc.) # # Start with all N-length permutations. # Create a graph of all pairs A(B...Y)Z, # such that every pair whose left member ends with B...Y, # the right member starts with B...Y (for some length n-1) # Find an Eulerian path through the permutations (visit every node only once) # The sequence of starting node, plus each additional ending letter, # is the De Bruijn sequence for this alphabet. use strict; use warnings; my \$n = (shift or 4); # N for alphabet size my \$t = (shift or \$n); # T for Tuples (pairs, triples, quadruples) my \$n1 = \$n - 1; my \$t1 = \$t - 1; my @alphabet = ('A'..'Z','0'..'9','a'..'z'); if (@alphabet < \$n) { die "Alphabet is smaller than \$n\n"; } # glob character of length 1 my \$alphabet = '{' . join(',', @alphabet[0..\$n1]) . '}'; # Generate all strings of length \$t in the given alphabet my \$glob_string = \$alphabet x \$t; my @nodes = glob("\$glob_string"); # Generate the graph of all strings that overlap in t-1 characters. my %graph; for my \$node1 (@nodes) { for my \$node2 (@nodes) { next if \$node1 eq \$node2; # If they overlap, add node2 to the array for node1 if (substr(\$node1,1,\$t1) eq substr(\$node2,0,\$t1)) { push @{\$graph{\$node1}}, \$node2; } } } # String starts with first node's full string. # Walk through the graph: # Delete the node behind # Add the last char of next node to string # Print result my \$node1 = \$nodes[0]; my \$q = 0; # print "\$q : \$node1\n"; # debug my \$string = \$node1; while (scalar keys %graph > 1) { my \$moved = 0; # "reverse" here somehow "does the right thing", and enables an # Eulerian circuit with no added logic. for my \$node2 (reverse @{\$graph{\$node1}}) { if (exists(\$graph{\$node2})) { \$string .= substr(\$node2,\$t1,1); # Add last char to string delete(\$graph{\$node1}); \$node1 = \$node2; \$moved = 1; # print ++\$q, " : \$node2\n"; # debug last; } } # Avoid endless loops on pathological cases unless (\$moved) { warn "Didn't find next node (\$node1)\n"; last; } } print "\$string\n"; exit;