Re: find index of specific array value that occurs multiple times

in reply to find index of specific array value that occurs multiple times

Your problem is complicated by the use of spaces in the consensus lines. If we simply split on whitespace, then we would get an incorrect index/offset.

Working, tested code:

use strict;
use warnings;

push @ARGV, 'pm_962355_01.dat' if not @ARGV;

my @consensus_lines;
my $seq_start_column = 0;
while (<>) {
    chomp;
    die "This algorithm relies on spaces - no tabs allowed!" if /\t/;
    if ( !$seq_start_column and /^(\d+\.\S+\s+)\S/ ) {
        $seq_start_column = length $1;
    }
    next if  /^CLUSTAL/; # Header row
    next if !/\S/;       # Blank rows
    if ( /\*/ ) {
        push @consensus_lines, $_;
    }
}
if (!$seq_start_column) {
    die "Failed to calculate start column for sequences";
}
my $consensus = join '', map { substr $_, $seq_start_column }
                @consensus_lines;

# Just for debugging
use Data::Dumper; $Data::Dumper::Useqq = 1;
print Dumper $consensus; 

my @indexes = 0..length($consensus);
my @index_c = grep { substr($consensus,$_,1) eq '*' } @indexes;
my @index_n = grep { substr($consensus,$_,1) ne '*' } @indexes;

# If positions are 0-based:
print "    Conserved: ", join(',', @index_c), "\n";
print "Not conserved: ", join(',', @index_n), "\n";

# If positions are 1-based:
#my @offset_c = map { $_ + 1 } @index_c;
#my @offset_n = map { $_ + 1 } @index_n;
#print "    Conserved: ", join(',', @offset_c), "\n";
#print "Not conserved: ", join(',', @offset_n), "\n";
[download]

Output:

$VAR1 = "        :***  *   .** *::***** **:::::**: ::*:*** :*****:*.**
+*******: .:* *   **:.*.. *****.***:***:: ..*: **.* ****:*************
+";
    Conserved: 9,10,11,14,19,20,22,25,26,27,28,29,31,32,38,39,44,46,47
+,48,51,52,53,54,55,57,59,60,61,62,63,64,65,66,67,72,74,78,79,82,86,87
+,88,89,90,92,93,94,96,97,98,104,107,108,110,112,113,114,115,117,118,1
+19,120,121,122,123,124,125,126,127,128,129
Not conserved: 0,1,2,3,4,5,6,7,8,12,13,15,16,17,18,21,23,24,30,33,34,3
+5,36,37,40,41,42,43,45,49,50,56,58,68,69,70,71,73,75,76,77,80,81,83,8
+4,85,91,95,99,100,101,102,103,105,106,109,111,116,130
[download]

In Section Seekers of Perl Wisdom