$arxeio_structures = $ARGV[0]; $arxeio_seqs = $ARGV[1]; $arxeio_align = $arxeio_seqs.'.align.final_align'; $/="//\n"; #apo to arxeio domwn apothikeuw AC, akolouthia kai consensus tis topologias open DOMES, $arxeio_structures; while() { if($_=~/^>(\w{6})\|.*?\t([\w\d\_]+)/m) { $structure_prot=$_; $tm_ac=$1; $tm_pfam=$2; if($structure_prot=~/^SEQ:(.*)/m) { $tm_seq=$1; } if($structure_prot=~/^CON:(.*)/m) { $tm_cons=$1; } } $hash_structure{$tm_ac} = $tm_seq; $hash_cons{$tm_ac} = $tm_cons; #$HoA_3D{$tm_ac} = [$tm_pfam, $tm_seq, $tm_cons]; # Ektypwsi: print "$ac: @{ $HoA_3D{$ac} }\n"; $tm_ac=$tm_seq=$tm_cons=$tm_pfam=''; } close DOMES; $/="\n"; open ALIGN, $arxeio_align; #apo to arxeio alignment apothikeuw kathe AC, tin antistoixi while() #stoixismeni seq kai apo pou ksekinaei i seq tis stoixisis { #(giati exoume steilei mono to kommati pou xtypaei sto Pfam gia stoixisi) if($_=~/^(.*?)\|(\d+)-\d+\t(.*)/) { $ac_align=$1; $arxi_align=$2; $seq_align = $3; $hash_align{$ac_align}=$seq_align; $hash_start_align{$ac_align}=$arxi_align-1; } } close ALIGN; foreach $a(keys %hash_align) { if(exists($hash_structure{$a})) #an i akolouthia exei 3-D domi, ftiaxnw neo LBL seq me vasi tis allages pou exei epiferei i stoixisi (- kai .) { $seq_structure = $hash_structure{$a}; #print $seq_structure."\n"; $lbl_structure = $hash_cons{$a}; #print $lbl_structure."\n"; $seq_apo_alignment = $hash_align{$a}; $thesi_arxis_alignment = $hash_start_align{$a}; @split_structure_lbl = split(//,$lbl_structure); @split_seq_apo_alignment = split(//,$seq_apo_alignment); $count_trash=0; for ($i=0; $i<=$#split_seq_apo_alignment; $i++) { if($split_seq_apo_alignment[$i] ne '-' && $split_seq_apo_alignment[$i] ne '.') { $thesi_wanted=$thesi_arxis_alignment + ($i-$count_trash); #print $i."\t".$split_seq_apo_alignment[$i]."\t".$count_trash."\t".$thesi_wanted."\t".$split_structure_lbl[$thesi_wanted]."\n"; $lbl_structure_new.=$split_structure_lbl[$thesi_wanted]; } else { $count_trash++; #print $i."\t".$split_seq_apo_alignment[$i]."\t".$count_trash."\t".$thesi_wanted."\t".$split_seq_apo_alignment[$i]."\n"; $lbl_structure_new.=$split_seq_apo_alignment[$i]; } } print $lbl_structure_new."\n"; while ($lbl_structure_new=~/[.-]*([^.-])(?:\1|[.-])*/g) { $arxi = $-[0]; $telos = $+[0]-1; $eidos = $1; push @AoA, [$eidos, $arxi, $telos]; } } else #ean i akolouthia den anikei se 3D domi, apla tin apothikeuw kai tin epeksergazomai parakatw { $not_3D{$a}=$hash_align{$a}; } } #for $i ( 0 .. $#AoA ) #{ # print "row $i is: @{$AoA[$i]}\n"; #} foreach $k(keys %not_3D) #pairnw twra tis akolouthies pou den exoun domi { $seq_no_3D=$not_3D{$k}; $copy_seq_no_3D=$seq_no_3D; #ftiaxnw antigrafo tis akolouthias no3d kai diwxnw - kai . $copy_seq_no_3D =~ s/[-.]+//g; @split_copy_seq_no_3D=split(//, $copy_seq_no_3D); #split i akolouthia align alla xwris - kai . @split_seq_no3D = split(//, $seq_no_3D); #split i original akolouthia align print '>'.$k."\n"; print $seq_no_3D."\n\n"; $count_axrista=0; for $i ( 0 .. $#AoA ) { if($AoA[$i]->[0] eq 'I') { $arxi_I = $AoA[$i]->[1]; $telos_I = $AoA[$i]->[2]; $mikos_I = ($telos_I-$arxi_I)+1; $substring_I=substr($seq_no_3D,$arxi_I,$mikos_I); $axrista_I = ($substring_I =~ tr/-.//); $count_axrista=$axrista_I+$count_axrista; } elsif($AoA[$i]->[0] eq 'O') { $arxi_O = $AoA[$i]->[1]; $telos_O = $AoA[$i]->[2]; $mikos_O = ($telos_O-$arxi_O)+1; $substring_O=substr($seq_no_3D,$arxi_O,$mikos_O); $axrista_O = ($substring_O =~ tr/-.//); $count_axrista=$axrista_O+$count_axrista; } elsif($AoA[$i]->[0] eq 'M') #vriskw ola ta TM kommatia tou AoA tis structure { $count_axrista_entering=$count_axrista; #print "row $i is: @{$AoA[$i]}\n"; $arxi_tm = $AoA[$i]->[1]; #arxi kai telos twn TMs stis seq twn ypoloipwn proteins tou align $telos_tm = $AoA[$i]->[2]; $mikos_tm = ($telos_tm-$arxi_tm)+1; $substring_M=substr($seq_no_3D,$arxi_tm,$mikos_tm); #print $substring_M."\t".length($substring_M)."\t".$arxi_tm."\t".$telos_tm."\n"; $axrista_M = ($substring_M =~ tr/-.//); #ypologizw - kai . se kathe TM kommati $count_axrista_exiting=$axrista_M+$count_axrista; if($axrista_M>0) {$count_axrista=$count_axrista_exiting}; $substring_M=~ s/[-.]+//g; #diwnxnw ta - kai . kai elegxw.... if(length($substring_M)>=7) #...an to TM pou apomenei exei mikos >=7, eimaste OK kai to kratame, alliws... { $thesi_telikis_arxis = $hash_start_align{$k}+$arxi_tm-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$telos_tm-$count_axrista_exiting; #print 'MIKOS_MIN_7'."\t".$substring_M."\t".length($substring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous."\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."\n"; push @AoA, [$k, ($thesi_M_se_arxiki_seq+$arxi_tm), ($thesi_M_se_arxiki_seq+$mikos_tm)]; } else #stis parakatw periptwseis prepei na prostethoun AA sto Tm ,wste na ginei toulaxiston mikos 7 { $extra_length=7-length($substring_M); $arxi_epomeno_kommati=$AoA[$i+1]->[1]; $telos_epomeno_kommati=$AoA[$i+1]->[2]; $mikos_epomeno_kommati=$telos_epomeno_kommati-$arxi_epomeno_kommati+1; $arxi_proigoumeno_kommati=$AoA[$i-1]->[1]; $telos_proigoumeno_kommati=$AoA[$i-1]->[2]; $mikos_proigoumeno_kommati=$telos_proigoumeno_kommati-$arxi_proigoumeno_kommati+1; $substring_proigoumeno = substr($seq_no_3D,$arxi_proigoumeno_kommati,$mikos_proigoumeno_kommati); #ayto to kanw giati mporei to epomeno/proigoumeno kommati na exei 10AA $substring_epomeno = substr($seq_no_3D,$arxi_epomeno_kommati,$mikos_epomeno_kommati); #alla ta 7 na einai - i . $substring_proigoumeno=~ s/[-.]+//g; $substring_epomeno=~ s/[-.]+//g; $actual_length_proigoumeno = length($substring_proigoumeno); $actual_length_epomeno = length($substring_epomeno); if($split_seq_no3D[$telos_tm] ne '-' && $split_seq_no3D[$telos_tm] ne '.' && ($split_seq_no3D[$arxi_tm] eq '.' or $split_seq_no3D[$arxi_tm] eq '-')) #edw prepei na proekteinw pros ta aristera [typos: ---...XX] { print $substring_M."\t".length($substring_M)."\t".$arxi_tm."\t".$telos_tm."\t".'PROIGOUMENO:'."\t".$substring_proigoumeno."\t".'EPOMENO:'."\t".$substring_epomeno."\n"; if ($actual_length_epomeno-$extra_length<1 && $actual_length_proigoumeno-$extra_length<1) #an de mporw na paw pouthena, to afinw ws exei { $final_tm=$substring_M; $thesi_telikis_arxis = $hash_start_align{$k}+$arxi_tm-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$telos_tm-$count_axrista_exiting; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length($substring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous."\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M.":: DE MPORW"; print substr($copy_seq_no_3D,$thesi_telikis_arxis,($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } elsif($actual_length_proigoumeno-$extra_length>=1) #an to mikos tou proigoumeno kommatiou-extra_AA>1, kanw kanonika tin proektasi pros ta aristera { $thesi=index($copy_seq_no_3D,$substring_M); $extra_AAs = substr($copy_seq_no_3D,$thesi-$extra_length,$extra_length); $final_tm=$extra_AAs.$substring_M; #$thesi_telikis_arxis = $hash_start_align{$k}+$arxi_tm-$count_axrista_entering-$extra_length; #$thesi_telikou_telous = $hash_start_align{$k}+$telos_tm-$count_axrista_exiting; $thesi_telikis_arxis = $arxi_tm-$count_axrista_entering-$extra_length; $thesi_telikou_telous = $telos_tm-$count_axrista_exiting; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length($substring_M)."\t".'ARXIKA'.($hash_start_align{$k}+$arxi_tm-$count_axrista_entering)."-".($hash_start_align{$k}+$telos_tm-$count_axrista_exiting)."\t".'TELIKA: '.$thesi_telikis_arxis."-".$thesi_telikou_telous."\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."::"; print substr($copy_seq_no_3D,$thesi_telikis_arxis,($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } else # to paw kai pros tis 2 kateuthinseis, arkei ta oria tou telikou TM na apexoun toulaxiston kata 1 AA apo to proigoumeno/epomeno kommati($actual_length_epomeno-$extra_length>=1 && $actual_length_proigoumeno-$extra_length<1) #an exw provlima me to mikos tou proigoumeno kommatiou, alla to epomeno-extra_AA>1 paw pros ta deksia anagkastika { =pod my $str = "ABCDEFGHIJKLMNOP"; my $seed = "FGHI"; my $length = 7; my $slen = length $seed; my $ipos = index $str,$seed; my $lexpand = $slen < $length ? ($length - $slen)/2 : 0; $lexpand = $ipos < $lexpand ? $ipos : $lexpand; print +(substr $str,($ipos - $lexpand),$length),"\n"; =cut $thesi=index($copy_seq_no_3D,$substring_M); $orio_epomenou=$split_copy_seq_no_3D $extra_AAs = substr($copy_seq_no_3D,$thesi,$extra_length); $final_tm=$extra_AAs.$substring_M; $thesi_telikis_arxis = $hash_start_align{$k}+$arxi_tm-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$telos_tm-$count_axrista_exiting+$extra_length; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length($substring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous."\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."::"; print substr($copy_seq_no_3D,$thesi_telikis_arxis,($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } } } } } print "\n"; }