#!/usr/bin/perl use warnings; use strict; use autodie; use List::Util; my $file = 'sequence.txt'; my $goal = 'GATC'; my $stats = process_file( $file, $goal ); # do something with stats ... # ---------------------------------------- # Subroutines # sub process_file { my ( $file, $goal ) = @_; open my $infh, '<', $file; my $stats = process_lines( $infh, $goal ); close $infh; return $stats } sub process_lines { my ( $infh, $goal ) = @_; my @stats; while ( my $line - <$infh> ) { chomp $line; my $linestats = process_one_line( $line, $goal ); push @stats, $linestats || 0; } return \@stats; } sub process_one_line { my ( $line, $goal ) = @_ my @occurences; my ( $offset ) = ( 0 ); SEEK: while ( 1 ) { $idx = index( $line, $goal, $offset ); last SEEK if $idx == -1; # no more occurences push @occurences, $idx; $offset = $idx; } return calc_avg_distance( \@occurences, length $goal ); } sub calc_avg_distance { my ( $occurences, $len ) = @_; return unless $occurences and scalar @$occurences; my $start = shift @$occurences; my @distances; while ( my $end = shift @$occurences ) { push @distances, ( $end - $start ) - $len; $start = $end; } my $sum = reduce { $a + $b }, @distances; my $n = scalar @distances; return $sum / $n; }