use strict;
use warnings;

open my $pdb_fh, '<', "6U9D.pdb" or die "unable to open 6U9D.pdb for reading $!";

my %amino_acid_conversion = (
    ALA => 'A',
    ARG => 'R',
    ASN => 'N',
    ASP => 'D',
    CYS => 'C',
    GLN => 'Q',
    GLU => 'E',
    GLY => 'G',
    HIS => 'H',
    ILE => 'I',
    LEU => 'L',
    LYS => 'K',
    MET => 'M',
    PHE => 'F',
    PRO => 'P',
    SER => 'S',
    THR => 'T',
    TRP => 'W',
    TYR => 'Y',
    VAL => 'V');
    
my @atoms;  #2D array of x,y,z of all atoms (many thousands)
my @seqs;   #raw sequences with the 3 letter designations
            #convert later to FASTA format

my $cur_seq = '';
my $cur_ltr = '';

# parse out data of interest from file
#
while (<$pdb_fh>)
{   
   if (my ($ltr, $seq) = (/^SEQRES\s+\d+\s+(\w+)\s+\d+\s+([A-Z ]+)$/) )
   {
      $seq =~ s/\s+$//;
      
      if ($ltr eq $cur_ltr)
      {
          $cur_seq .= " $seq";
      }
      else
      {
          push @seqs, $cur_seq if $cur_seq ne ''; # end of current sequence
          $cur_seq = $seq;                        # begin of the next sequence
          $cur_ltr = $ltr;
      }
   }   
   elsif (my ($x,$y,$z) = (/^ATOM\s+.*?([\d.-]+)\s+([\d.-]+)\s+([\d.-]+)/) )
   {
      push @atoms, [$x,$y,$z];
   }
}
push @seqs, $cur_seq;  # don't forget to finish the last seq!

#### output collected data ###

#make a fasta sequence segments
foreach my $seq (@seqs)
{  
   # my $fasta = join '',map{$amino_acid_conversion{$_}}split ' ',$seq;
  
   # without using a map:
   #
   my $fasta ='';
   foreach my $char3 (split ' ',$seq)
   {
      $fasta.= $amino_acid_conversion{$char3}
   }
   
   print ">Some Fasta Description Line\n";  #use 60 char lines
   while ($fasta)                           #fasta suggested max is 80
   {
      print substr($fasta,0,60,''),"\n";
   }
}

#print the data points
# I am not sure what needs to be done with them
# average of each coordinate?

#foreach my $row_ref (@atoms)  #uncomment to print
#{
#   print @$row_ref,"\n";
#}

my $xsum; my $ysum; my $zsum;
foreach my $row_ref (@atoms)  # @atoms is a 2D array
{
   my ($x, $y , $z ) = @$row_ref;
   $xsum+=$x;
   $ysum+=$y;
   $zsum+=$z;
}

print "avg x = ",$xsum/@atoms,"\n";
print "avg y = ",$ysum/@atoms,"\n";
print "avg z = ",$zsum/@atoms,"\n";

__END__

These are 2 examples:
You will have to figure out what goes in the FASTA description line
And perhaps not all of these sequences are relevant?  Looks like
a lot are duplicates.

>Some Fasta Description Line
MHHHHHHENLYFQGAPSFNVDPLEQPAEPSKLAKKLRAEPDMDTSFVGLTGGQIFNEMMS
RQNVDTVFGYPGGAILPVYDAIHNSDKFNFVLPKHEQGAGHMAEGYARASGKPGVVLVTS
GPGATNVVTPMADAFADGIPMVVFTGQVPTSAIGTDAFQEADVVGISRSCTKWNVMVKSV
EELPLRINEAFEIATSGRPGPVLVDLPKDVTAAILRNPIPTKTTLPSNALNQLTSRAQDE
FVMQSINKAADLINLAKKPVLYVGAGILNHADGPRLLKELSDRAQIPVTTTLQGLGSFDQ
EDPKSLDMLGMHGCATANLAVQNADLIIAVGARFDDRVTGNISKFAPEARRAAAEGRGGI
IHFEVSPKNINKVVQTQIAVEGDATTNLGKMMSKIFPVKERSEWFAQINKWKKEYPYAYM
EETPGSKIKPQTVIKKLSKVANDTGRHVIVTTGVGQHQMWAAQHWTWRNPHTFITSGGLG
TMGYGLPAAIGAQVAKPESLVIDIDGDASFNMTLTELSSAVQAGTPVKILILNNEEQGMV
TQWQSLFYEHRYSHTHQLNPDFIKLAEAMGLKGLRVKKQEELDAKLKEFVSTKGPVLLEV
EVDKKVPVLPMVAGGSGLDEFINFDPEVERQQTELRHKRTGGKH
>Some Fasta Description Line
MGSSHHHHHHSSGLVPRGSHMENLYFQGATRPPLPTLDTPSWNANSAVSSIIYETPAPSR
QPRKQHVLNCLVQNEPGVLSRVSGTLAARGFNIDSLVVCNTEVKDLSRMTIVLQGQDGVI
EQARRQIEDLVPVYAVLDYTNSEIIKRELVMARISLLGTEYFEDLLLHHHTSTNAGAADS
QELVAEIREKQFHPANLPASEVLRLKHEHLNDITNLTNNFGGRVVDISETSCIVELSAKP
TRISAFLKLVEPFGVLECARSGMMALPRTPLKTSTEEAADEDEKISEIVDISQLPPG

I have no idea what these numbers would mean?

avg x = 321.013155298296
avg y = 290.744642162734
avg z = 69.196842162731