use strict; use warnings; push @ARGV, 'pm_962355_01.dat' if not @ARGV; my @consensus_lines; my $seq_start_column = 0; while (<>) { chomp; die "This algorithm relies on spaces - no tabs allowed!" if /\t/; if ( !$seq_start_column and /^(\d+\.\S+\s+)\S/ ) { $seq_start_column = length $1; } next if /^CLUSTAL/; # Header row next if !/\S/; # Blank rows if ( /\*/ ) { push @consensus_lines, $_; } } if (!$seq_start_column) { die "Failed to calculate start column for sequences"; } my $consensus = join '', map { substr $_, $seq_start_column } @consensus_lines; # Just for debugging use Data::Dumper; $Data::Dumper::Useqq = 1; print Dumper $consensus; my @indexes = 0..length($consensus); my @index_c = grep { substr($consensus,$_,1) eq '*' } @indexes; my @index_n = grep { substr($consensus,$_,1) ne '*' } @indexes; # If positions are 0-based: print " Conserved: ", join(',', @index_c), "\n"; print "Not conserved: ", join(',', @index_n), "\n"; # If positions are 1-based: #my @offset_c = map { $_ + 1 } @index_c; #my @offset_n = map { $_ + 1 } @index_n; #print " Conserved: ", join(',', @offset_c), "\n"; #print "Not conserved: ", join(',', @offset_n), "\n"; #### $VAR1 = " :*** * .** *::***** **:::::**: ::*:*** :*****:*.*********: .:* * **:.*.. *****.***:***:: ..*: **.* ****:*************"; Conserved: 9,10,11,14,19,20,22,25,26,27,28,29,31,32,38,39,44,46,47,48,51,52,53,54,55,57,59,60,61,62,63,64,65,66,67,72,74,78,79,82,86,87,88,89,90,92,93,94,96,97,98,104,107,108,110,112,113,114,115,117,118,119,120,121,122,123,124,125,126,127,128,129 Not conserved: 0,1,2,3,4,5,6,7,8,12,13,15,16,17,18,21,23,24,30,33,34,35,36,37,40,41,42,43,45,49,50,56,58,68,69,70,71,73,75,76,77,80,81,83,84,85,91,95,99,100,101,102,103,105,106,109,111,116,130