>sp|P09153|TFAE_ECOLI Prophage tail fiber assembly protein homolog TfaE OS=Escherichia coli (strain K12) OX=83333 GN=tfaE PE=1 SV=2
MHKAILNSDLIATKAGDVTVYNYDGETREYISTSNEYLAVGVGIPACSCLDAPGTHKAGY
AICRSADFNSWEYVPDHRGETVYSTKTGESKEIKAPGDYPENTTTIAPLSPYDKWDGEKW
VTDTEAQHSAAVDAAEAQRQSLIDAAMASISLIQLKLQAGRKLTQAETTRLNAVLDYIDA
VTATDTSTAPDVIWPELPEA
>sp|P20605|FIC_ECOLI Probable protein adenylyltransferase Fic OS=Escherichia coli (strain K12) OX=83333 GN=fic PE=1 SV=1
MSDKFGEGRDPYLYPGLDIMRNRLNIRQQQRLEQAAYEMTALRAATIELGPLVRGLPHLR
TIHRQLYQDIFDWAGQLREVDIYQGDTPFCHFAYIEKEGNALMQDLEEEGYLVGLEKAKF
VERLAHYYCEINVLHPFRVGSGLAQRIFFEQLAIHAGYQLSWQGIEKEAWNQANQSGAMG
DLTALQMIFSKVVSEAGESE
>sp|P0ADH5|FIMB_ECOLI Type 1 fimbriae regulatory protein FimB OS=Escherichia coli (strain K12) OX=83333 GN=fimB PE=3 SV=1
MKNKADNKKRNFLTHSEIESLLKAANTGPHAARNYCLTLLCFIHGFRASEICRLRISDID
LKAKCIYIHRLKKGFSTTHPLLNKEVQALKNWLSIRTSYPHAESEWVFLSRKGNPLSRQQ
FYHIISTSGGNAGLSLEIHPHMLRHSCGFALANMGIDTRLIQDYLGHRNIRHTVWYTASN
AGRFYGIWDRARGRQRHAVL
####
print "This script will count the number of amino acids\n\n";
use strict;
use warnings;
#variables
my $A=0;
my $C=0;
my $D=0;
my $E=0;
my $F=0;
my $G=0;
my $H=0;
my $I=0;
my $K=0;
my $L=0;
my $M=0;
my $N=0;
my $P=0;
my $Q=0;
my $R=0;
my $S=0;
my $T=0;
my $V=0;
my $W=0;
my $Y=0;
my @prot;
my $prot_filename;
my $line;
my $sequence;
my $aa;
open (my $out_file, '>', 'aa_report.txt');
print "PLEASE ENTER THE FILENAME OF THE PROTEIN SEQUENCE: ";
chomp($prot_filename=);
open(PROTFILE,$prot_filename) or die "unable to open the file";
@prot=;
close PROTFILE;
foreach $line (@prot) {
# discard blank line
if ($line =~ /^\s*$/) {
next;
# # discard comment line
} elsif($line =~ /^\s*#/) {
next;
# discard fasta header line
} elsif($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace) from $sequence string
$sequence =~ s/\s//g;
@prot=split("",$sequence); #splits string into an array
print " \nThe original PROTEIN file is:\n$sequence \n";
while(@prot){
$aa = shift (@prot);
if($aa =~/[A]/ig){
$A++;
}
if($aa=~/[C]/ig){
$C++;
}
if($aa=~/[D]/ig){
$D++;
}
if($aa=~/[E]/ig){
$E++;
}
if($aa=~/[F]/ig){
$F++;
}
if($aa=~/[G]/ig){
$G++;
}
if($aa=~/[H]/ig){
$H++;
}
if($aa=~/[I]/ig){
$I++;
}
if($aa=~/[K]/ig){
$K++;
}
if($aa=~/[L]/ig){
$L++;
}
if($aa=~/[M]/ig){
$M++;
}
if($aa=~/[N]/ig){
$N++;
}
if($aa=~/[P]/ig){
$P++;
}
if($aa=~/[Q]/ig){
$Q++;
}
if($aa=~/[R]/ig){
$R++;
}
if($aa=~/[S]/ig){
$S++;
}
if($aa=~/[T]/ig){
$T++;
}
if($aa=~/[V]/ig){
$V++;
}
if($aa=~/[W]/ig){
$W++;
}
if($aa=~/[Y]/ig){
$Y++;
}
}
print "\n";
print $out_file "A=$A C=$C D=$D E=$E F=$F G=$G H=$H I=$I K=$K L=$L M=$M N=$N P=$P Q=$Q R=$R S=$S T=$T V=$V W=$W Y=$Y ";
print "\n";
print "done";
##
##
A=61 C=10 D=31 E=40 F=18 G=41 H=23 I=39 K=28 L=57 M=11 N=24 P=21 Q=27 R=37 S=36 T=35 V=23 W=11 Y=27