#!/usr/bin/perl use strict; use warnings; $/ = ''; # Set paragraph mode my @count; my %absent; my $name; while ( my $para = ) { # Remove fasta header line if ( $para =~ s/^>(.*)//m ){ $name = $1; }; # Remove comment line(s) $para =~ s/^\s*#.*//mg; my %prot; $para =~ s/([ACDEFGHIKLMNPQRSTVWY])/ ++$prot{ $1 } /eg; my $len = length($para); my $num = scalar keys %prot; push @count,[$num,$name]; printf "Counted %d for %s ..\n",$num,substr($name,0,50); print "$name\n"; print join( ' ', map "$_=$prot{$_}", sort keys %prot ), "\n"; printf "Amino acid alphabet = %d\n\n",$num ; print "Sequence length = ", $len; # count absent for ('A'..'Z'){ ++$absent{$_} unless exists $prot{$_}; }; }; # sort names by count in ascending order to get lowest my @sorted = sort { $a->[0] <=> $b->[0] } @count; my $lowest = $sorted[0]->[0]; # maybe more than 1 lowest printf "Least number of amino acids is %d in these entries\n",$lowest; my @lowest = grep { $_->[0] == $lowest } @sorted; print "$_->[1]\n" for @lowest; # show all results print "\nAll results in ascending count\n"; for (@sorted){ printf "%d %s\n", @$_; }; print "\nExclusion of various amino acids is as follows\n"; for (sort keys %absent){ printf "%s=%d\n",$_,$absent{$_}; }; __DATA__ print 'Please enter protein sequence filename: '; chomp( my $prot_filename = ); open my $PROTFILE, '<', $prot_filename or die "Cannot open '$prot_filename' because: $!"; my $report_name = $prot_filename.'_report'; open my $out_file, '>', $report_name or die "Cannot open '$report_name' because: $!"; close $out_file; print "Results are printed in $report_name\n"; # print absent counts print "\nExclusion of various amino acids in $prot_filename is as follows\n";