use strict; use warnings; my $start_time = time; my ($input1, $input2) = @ARGV; open my $in, '<', $input1 or die "Can't read source file $input1 : $!\n"; my @lengths = grep{! m/\>/} <$in>; close $in; chomp @lengths; open $in, '<', $input2 or die "Can't read source file $input2 : $!\n"; my @source = <$in>; close $in; chomp @source; #********************# # CALCULATE LENGTH DISTRIBUTION FROM INPUT FILE #1 #********************# my @sorted = sort {$a <=> $b} @lengths; my %seen; my @uniques = grep {!$seen{$_}++} @sorted; # hash of predicted sORF length (key) and number of times (value) that size is # observed in the multifasta input file #1 my %dstrbtn_hash; for my $len (@uniques) { dstrbtn_hash{$len} = grep{$len == $_} @sorted; }