use strict; use warnings; push @ARGV, 'pm_962355_01.dat' if not @ARGV; my @consensus_lines; my $seq_start_column = 0; while (<>) { chomp; die "This algorithm relies on spaces - no tabs allowed!" if /\t/; if ( !$seq_start_column and /^(\d+\.\S+\s+)\S/ ) { $seq_start_column = length $1; } next if /^CLUSTAL/; # Header row next if !/\S/; # Blank rows if ( /\*/ ) { push @consensus_lines, $_; } } if (!$seq_start_column) { die "Failed to calculate start column for sequences"; } my $consensus = join '', map { substr $_, $seq_start_column } @consensus_lines; # Just for debugging use Data::Dumper; $Data::Dumper::Useqq = 1; print Dumper $consensus; my @indexes = 0..length($consensus); my @index_c = grep { substr($consensus,$_,1) eq '*' } @indexes; my @index_n = grep { substr($consensus,$_,1) ne '*' } @indexes; # If positions are 0-based: print " Conserved: ", join(',', @index_c), "\n"; print "Not conserved: ", join(',', @index_n), "\n"; # If positions are 1-based: #my @offset_c = map { $_ + 1 } @index_c; #my @offset_n = map { $_ + 1 } @index_n; #print " Conserved: ", join(',', @offset_c), "\n"; #print "Not conserved: ", join(',', @offset_n), "\n";