#!/usr/bin/env perl # # Generate a string with the longest non-repeating subsequences possible. # Include overlaps. # # Input N, size of the alphabet. # Input T, tuples (pairs, triples, quadruples, etc.) # # Start with all N-length permutations. # Create a graph of all pairs A(B...Y)Z, # such that every pair whose left member ends with B...Y, # the right member starts with B...Y (for some length n-1) # Find an Eulerian path through the permutations (visit every node only once) # The sequence of starting node, plus each additional ending letter, # is the De Bruijn sequence for this alphabet. use strict; use warnings; my $n = (shift or 4); # N for alphabet size my $t = (shift or $n); # T for Tuples (pairs, triples, quadruples) my $n1 = $n - 1; my $t1 = $t - 1; my @alphabet = ('A'..'Z','0'..'9','a'..'z'); if (@alphabet < $n) { die "Alphabet is smaller than $n\n"; } # glob character of length 1 my $alphabet = '{' . join(',', @alphabet[0..$n1]) . '}'; # Generate all strings of length $t in the given alphabet my $glob_string = $alphabet x $t; my @nodes = glob("$glob_string"); # Generate the graph of all strings that overlap in t-1 characters. my %graph; for my $node1 (@nodes) { for my $node2 (@nodes) { next if $node1 eq $node2; # If they overlap, add node2 to the array for node1 if (substr($node1,1,$t1) eq substr($node2,0,$t1)) { push @{$graph{$node1}}, $node2; } } } # String starts with first node's full string. # Walk through the graph: # Delete the node behind # Add the last char of next node to string # Print result my $node1 = $nodes[0]; my $q = 0; # print "$q : $node1\n"; # debug my $string = $node1; while (scalar keys %graph > 1) { my $moved = 0; # "reverse" here somehow "does the right thing", and enables an # Eulerian circuit with no added logic. for my $node2 (reverse @{$graph{$node1}}) { if (exists($graph{$node2})) { $string .= substr($node2,$t1,1); # Add last char to string delete($graph{$node1}); $node1 = $node2; $moved = 1; # print ++$q, " : $node2\n"; # debug last; } } # Avoid endless loops on pathological cases unless ($moved) { warn "Didn't find next node ($node1)\n"; last; } } print "$string\n"; exit;