>sp|P09153|TFAE_ECOLI Prophage tail fiber assembly protein homolog TfaE OS=Escherichia coli (strain K12) OX=83333 GN=tfaE PE=1 SV=2 MHKAILNSDLIATKAGDVTVYNYDGETREYISTSNEYLAVGVGIPACSCLDAPGTHKAGY AICRSADFNSWEYVPDHRGETVYSTKTGESKEIKAPGDYPENTTTIAPLSPYDKWDGEKW VTDTEAQHSAAVDAAEAQRQSLIDAAMASISLIQLKLQAGRKLTQAETTRLNAVLDYIDA VTATDTSTAPDVIWPELPEA >sp|P20605|FIC_ECOLI Probable protein adenylyltransferase Fic OS=Escherichia coli (strain K12) OX=83333 GN=fic PE=1 SV=1 MSDKFGEGRDPYLYPGLDIMRNRLNIRQQQRLEQAAYEMTALRAATIELGPLVRGLPHLR TIHRQLYQDIFDWAGQLREVDIYQGDTPFCHFAYIEKEGNALMQDLEEEGYLVGLEKAKF VERLAHYYCEINVLHPFRVGSGLAQRIFFEQLAIHAGYQLSWQGIEKEAWNQANQSGAMG DLTALQMIFSKVVSEAGESE >sp|P0ADH5|FIMB_ECOLI Type 1 fimbriae regulatory protein FimB OS=Escherichia coli (strain K12) OX=83333 GN=fimB PE=3 SV=1 MKNKADNKKRNFLTHSEIESLLKAANTGPHAARNYCLTLLCFIHGFRASEICRLRISDID LKAKCIYIHRLKKGFSTTHPLLNKEVQALKNWLSIRTSYPHAESEWVFLSRKGNPLSRQQ FYHIISTSGGNAGLSLEIHPHMLRHSCGFALANMGIDTRLIQDYLGHRNIRHTVWYTASN AGRFYGIWDRARGRQRHAVL #### print "This script will count the number of amino acids\n\n"; use strict; use warnings; #variables my $A=0; my $C=0; my $D=0; my $E=0; my $F=0; my $G=0; my $H=0; my $I=0; my $K=0; my $L=0; my $M=0; my $N=0; my $P=0; my $Q=0; my $R=0; my $S=0; my $T=0; my $V=0; my $W=0; my $Y=0; my @prot; my $prot_filename; my $line; my $sequence; my $aa; open (my $out_file, '>', 'aa_report.txt'); print "PLEASE ENTER THE FILENAME OF THE PROTEIN SEQUENCE: "; chomp($prot_filename=); open(PROTFILE,$prot_filename) or die "unable to open the file"; @prot=; close PROTFILE; foreach $line (@prot) { # discard blank line if ($line =~ /^\s*$/) { next; # # discard comment line } elsif($line =~ /^\s*#/) { next; # discard fasta header line } elsif($line =~ /^>/) { next; # keep line, add to sequence string } else { $sequence .= $line; } } # remove non-sequence data (in this case, whitespace) from $sequence string $sequence =~ s/\s//g; @prot=split("",$sequence); #splits string into an array print " \nThe original PROTEIN file is:\n$sequence \n"; while(@prot){ $aa = shift (@prot); if($aa =~/[A]/ig){ $A++; } if($aa=~/[C]/ig){ $C++; } if($aa=~/[D]/ig){ $D++; } if($aa=~/[E]/ig){ $E++; } if($aa=~/[F]/ig){ $F++; } if($aa=~/[G]/ig){ $G++; } if($aa=~/[H]/ig){ $H++; } if($aa=~/[I]/ig){ $I++; } if($aa=~/[K]/ig){ $K++; } if($aa=~/[L]/ig){ $L++; } if($aa=~/[M]/ig){ $M++; } if($aa=~/[N]/ig){ $N++; } if($aa=~/[P]/ig){ $P++; } if($aa=~/[Q]/ig){ $Q++; } if($aa=~/[R]/ig){ $R++; } if($aa=~/[S]/ig){ $S++; } if($aa=~/[T]/ig){ $T++; } if($aa=~/[V]/ig){ $V++; } if($aa=~/[W]/ig){ $W++; } if($aa=~/[Y]/ig){ $Y++; } } print "\n"; print $out_file "A=$A C=$C D=$D E=$E F=$F G=$G H=$H I=$I K=$K L=$L M=$M N=$N P=$P Q=$Q R=$R S=$S T=$T V=$V W=$W Y=$Y "; print "\n"; print "done"; #### A=61 C=10 D=31 E=40 F=18 G=41 H=23 I=39 K=28 L=57 M=11 N=24 P=21 Q=27 R=37 S=36 T=35 V=23 W=11 Y=27