$ wget ftp://ftp.ncbi.nih.gov/genomes/Homo_sapiens/CHR_20/hs_ref_GRCh38.p12_chr20.fa.gz # warning ~100MB # this will build the 3-dim probability distribution of the input DNA seq and serialise it to the state file $ analyse_DNA_sequence.pl --input-fasta hs_ref_GRCh38.p12_chr20.fa --ngram-length 3 --output-state hs_ref_GRCh38.p12_chr20.fa.3.state --output-stats stats.txt # now work with some text, e.g http://www.gutenberg.org/files/84/84-0.txt (easy on the gutenberg servers!!!) $ analyse_text.pl --input-corpus ShelleyFrankenstein.txt --ngram-length 2 --output-state shelley.state $ predict_text.pl --input-state shelley.state