<?xml version="1.0" encoding="windows-1252"?>
<node id="962403" title="Re: find index of specific array value that occurs multiple times" created="2012-03-29 09:46:05" updated="2012-03-29 09:46:05">
<type id="11">
note</type>
<author id="154438">
Util</author>
<data>
<field name="doctext">
&lt;p&gt;
Your problem is complicated by the use of spaces in the consensus lines. If we simply split on whitespace, then we would get an incorrect index/offset.
&lt;/p&gt;
&lt;p&gt;
Working, tested code:&lt;code&gt;
use strict;
use warnings;

push @ARGV, 'pm_962355_01.dat' if not @ARGV;

my @consensus_lines;
my $seq_start_column = 0;
while (&lt;&gt;) {
    chomp;
    die "This algorithm relies on spaces - no tabs allowed!" if /\t/;
    if ( !$seq_start_column and /^(\d+\.\S+\s+)\S/ ) {
        $seq_start_column = length $1;
    }
    next if  /^CLUSTAL/; # Header row
    next if !/\S/;       # Blank rows
    if ( /\*/ ) {
        push @consensus_lines, $_;
    }
}
if (!$seq_start_column) {
    die "Failed to calculate start column for sequences";
}
my $consensus = join '', map { substr $_, $seq_start_column }
                @consensus_lines;

# Just for debugging
use Data::Dumper; $Data::Dumper::Useqq = 1;
print Dumper $consensus; 

my @indexes = 0..length($consensus);
my @index_c = grep { substr($consensus,$_,1) eq '*' } @indexes;
my @index_n = grep { substr($consensus,$_,1) ne '*' } @indexes;

# If positions are 0-based:
print "    Conserved: ", join(',', @index_c), "\n";
print "Not conserved: ", join(',', @index_n), "\n";

# If positions are 1-based:
#my @offset_c = map { $_ + 1 } @index_c;
#my @offset_n = map { $_ + 1 } @index_n;
#print "    Conserved: ", join(',', @offset_c), "\n";
#print "Not conserved: ", join(',', @offset_n), "\n";
&lt;/code&gt;
Output:&lt;code&gt;
$VAR1 = "        :***  *   .** *::***** **:::::**: ::*:*** :*****:*.*********: .:* *   **:.*.. *****.***:***:: ..*: **.* ****:*************";
    Conserved: 9,10,11,14,19,20,22,25,26,27,28,29,31,32,38,39,44,46,47,48,51,52,53,54,55,57,59,60,61,62,63,64,65,66,67,72,74,78,79,82,86,87,88,89,90,92,93,94,96,97,98,104,107,108,110,112,113,114,115,117,118,119,120,121,122,123,124,125,126,127,128,129
Not conserved: 0,1,2,3,4,5,6,7,8,12,13,15,16,17,18,21,23,24,30,33,34,35,36,37,40,41,42,43,45,49,50,56,58,68,69,70,71,73,75,76,77,80,81,83,84,85,91,95,99,100,101,102,103,105,106,109,111,116,130
&lt;/code&gt;
&lt;/p&gt;
</field>
<field name="root_node">
962355</field>
<field name="parent_node">
962355</field>
</data>
</node>
