<?xml version="1.0" encoding="windows-1252"?>
<node id="94856" title="Transcramble - Random text generator" created="2001-07-08 22:06:47" updated="2005-08-10 01:14:54">
<type id="1042">
CUFP</type>
<author id="16834">
OeufMayo</author>
<data>
<field name="doctext">
&lt;p&gt;This small script, apart from being mostly useless, generates credible sentences from a source text.&lt;/p&gt;

&lt;p&gt;The idea is to keep track of all the possible words
following a given group of words. Once this hash of arrays
has been constructed, we pick a random starting point and
construct the new text by looking for words that may
follow the preceding group.&lt;/p&gt;

&lt;p&gt;The input text should be large enough to have a broad
vocabulary and sentences range. You can find good samples on
the [http://promo.net/pg/|Project Gutenberg] site.&lt;br /&gt;
You can also try to create interesting mixes of texts, by feeding two novels in the input file!&lt;/p&gt;

&lt;p&gt;Have fun!&lt;/p&gt;


&lt;READMORE&gt;
&lt;code&gt;#!/usr/bin/perl -w
#
# transcramble - random text generator
# author:  Briac Pilpré
#
# v.0.1 : initial release.
# v.0.2 : Added Storable support and Getopt::Mixed 
#         for a nicer interface.
#
# idea 'borrowed' from : http://www.eblong.com/zarf/markov/

use strict;
use locale;
use Storable;
use Getopt::Mixed;
use vars qw($VERSION
			$opt_g $opt_w $opt_s
			$opt_i $opt_h $opt_v
		);

$VERSION = 0.2;

Getopt::Mixed::getOptions(
	'g=i group&gt;g w=i words&gt;w s=s store&gt;s i=s input h help&gt;h v verbose&gt;v'
);

$|++;

# usage: transcramble group words stored_file &gt; output

if ($opt_h){
	print &lt;&lt;"_HELP_";
transcrambler v$VERSION

	usage: transcramble -g 3 -w 500 -i stored_chains &gt; output
	usage: transcramble -g 2 -w 400 -s chains_to_store &lt; input &gt; output

	-g --group : How to group words (1, 2 or 3)
	-w --words : Number of words to generate
	-i --input : Input file generated by the -s switch
	-s --store : Name of the file where the chains
	             generated are to be stored

	-h --help  : Display this help message and exit

_HELP_
	exit 0;
}

# How to group words in the text
# 1: Group words one by one, makes nonsenses
# 2: List every word following each preceding couple of words
# 3: List every word following each preceding triplet of words
#    (make the text really like the original)
my $group = $opt_g || 2;

# Number of words to spew
my $words = $opt_w || 1000;

# Stored chains file
my $in_file = $opt_i;

# File to store the generated chains (can't be used with -i though)
my $out_file = $opt_i ? undef : $opt_s;

# Frequency list creation
my ( %tuple, @remains );

if ($in_file){
	print STDERR "Retrievieng the chains from $in_file:" if $opt_v;
	%tuple = %{ retrieve($in_file) };
	print STDERR " OK\n" if $opt_v;
}
else {
	print STDERR "Creating the chains. This may take a while\n" if $opt_v;
	while (&lt;&gt;) {

		# Split the current line for every 'word', including punctuation
		my @w = split ( /[^\w\n,.!"?;’'-]+/, $_ );
		# If there was some words left of the previous line, add them
		@w = ( @remains, @w );

		while (@w) {
		
			# If we have enough words to fill the group
			if ( scalar @w &gt;= $group + 1 ) {
				
				# Shove the next word in the word array
				push @{ $tuple{ join ( ' ', @w[ 0 .. $group - 1 ] ) } }, $w[$group];
				
				# And move to the next word
				shift (@w);
			}
			else {
				# At the end of the line, we keep the remaining words.
				@remains = @w;
				undef @w;
			}
		}
	}
	print STDERR "Saving chains to $out_file:" if $opt_v;
	store \%tuple, $out_file;
	print STDERR " OK\n" if $opt_v;
}

# Used to pick a random key in the hash
my $lh = scalar keys %tuple;
print STDERR "Done constucting the tuples. $lh tuples\n";

# Select a random key to begin
my $key = ( keys %tuple )[ int rand($lh) ];

while ( $words-- ) {

    my @first = split (/ /, $key );
    shift @first;

	# Pick a word at random in the possible following words
    my $last = @{ $tuple{$key} }[ int rand( @{ $tuple{$key} } ) ];

    print " $last";

	# And see if we can continue with the sentence or
    if ( defined $tuple{ join ( ' ', @first, $last ) } ) {
        $key = join ( ' ', @first, $last );
    }
	# Start a new one by picking a new starting group.
    else {
        $key = ( keys %tuple )[ int rand($lh) ];
    }
}

print STDERR "\nTranscrambling done.\n\n" if $opt_v;

&lt;/code&gt;
&lt;kbd&gt;-- &lt;br&gt;
my $&lt;a href="/index.pl?node=OeufMayo&amp;lastnode_id=1072"&gt;OeufMayo&lt;/a&gt; = new PerlMonger::Paris({http =&gt; '&lt;a href="http://paris.mongueurs.net"&gt;paris.mongueurs.net&lt;/a&gt;'});&lt;/kbd&gt;</field>
</data>
</node>
