<?xml version="1.0" encoding="windows-1252"?>
<node id="932185" title="Re: stupid/simple mistake" created="2011-10-18 11:55:25" updated="2011-10-18 11:55:25">
<type id="11">
note</type>
<author id="144696">
TomDLux</author>
<data>
<field name="doctext">
&lt;code&gt;
#!/usr/bin/perl
use warnings;
use strict;
use autodie;
use List::Util;

my $file = 'sequence.txt';
my $goal = 'GATC';

my $stats = process_file( $file, $goal );

# do something with stats ... 

# ----------------------------------------
# Subroutines
#
sub process_file {
    my ( $file, $goal ) = @_;

    open my $infh, '&lt;', $file;
    my $stats = process_lines( $infh, $goal );
    close $infh;

    return $stats
}

sub process_lines {
    my ( $infh, $goal ) = @_;

    my @stats;
    while ( my $line - &lt;$infh&gt; ) {
        chomp $line;
        my $linestats = process_one_line( $line, $goal );
        push @stats, $linestats || 0;
    }
    return \@stats;
}

sub process_one_line {
    my ( $line, $goal ) = @_

    my @occurences;
    my ( $offset ) = ( 0 );
  SEEK:
    while ( 1 ) {
        $idx = index( $line, $goal, $offset );
        last SEEK if $idx == -1; # no more occurences
        push @occurences, $idx;
        $offset = $idx;
    }

    return calc_avg_distance( \@occurences, length $goal );
}

sub calc_avg_distance {
    my ( $occurences, $len ) = @_;

    return unless $occurences and scalar @$occurences;

    my $start = shift @$occurences;
    my @distances;

    while ( my $end = shift @$occurences ) {
        push @distances, ( $end - $start ) - $len;
        $start = $end;
    }
    my $sum = reduce { $a + $b }, @distances;
    my $n   = scalar @distances;
    return $sum / $n;
}
&lt;/code&gt;
&lt;!-- Node text goes above. Div tags should contain sig only --&gt;
&lt;div class="pmsig"&gt;&lt;div class="pmsig-144696"&gt;
&lt;p&gt;
As Occam said: &lt;em&gt;Entia non sunt multiplicanda praeter necessitatem.&lt;/em&gt;
&lt;/p&gt;
&lt;/div&gt;&lt;/div&gt;</field>
<field name="root_node">
932126</field>
<field name="parent_node">
932126</field>
</data>
</node>
