use strict;
use warnings;
open(MOTIFS, "all_motifs.txt") or die("Unable to open file");
# Read the first 7 lines of metadata.
# # Assuming there are always 7 lines of metadata.
foreach (1..121)
{
foreach (1..7)
{
# Read a line of data.
my $header_data = <MOTIFS>;
# Remove the end of line character.
chomp $header_data;
# Split the string into 2 parts, using white space as a separator
+.
my ($lable, $string) = split /\s+/, $header_data, 2;
# only pay attention to the "Motif" line.
next if ($lable ne 'Motif');
print "$string ";
}
# Process the next lines of data until line containing string "TF
+ Unknown" is reached.
foreach (<MOTIFS>)
{
# Remove the end of line character.
chomp my $line;
# Process lines until "TF Unknowm'
while ($line ne 'TF Unknown')
{
# Declare a variable to hold the data in the file.
my %base_pairs;
# Split the string into 5 parts, using whitespace as a separ
+ator.
# Assuming the Position is always in the same order in the f
+ile.
(undef, $base_pairs{A}, $base_pairs{C}, $base_pairs{G}, $bas
+e_pairs{T}) = split /\s+/, $line, 5;
my @letters = keys %base_pairs;
# Start with the first column value and make it the max. val
+ue.
my $max = pop @letters;
# Compare each value to the maximum.
foreach my $letter (@letters)
{
# What if two (or more) values are equal???
if ($base_pairs{$max} < $base_pairs{$letter})
{
# The current value was greater than the maximum, so
+make it the new maximum.
$max = $letter;
}
}
# Print the letter representing the maximum value.
print $max;
}
}
}
# print an end of line character.
print "\n";
and this is what a few records look like:
TF Unknown
TF Name Unknown
Gene ENSG00000113916
Motif ENSG00000113916___1|1x3
Family C2H2 ZF
Species Homo_sapiens
Pos A C G T
1 0.664794 0.13099 0.0810125 0.123203
2 0.675621 0.0396475 0.144967 0.139764
3 0.0913393 0.0396819 0.847004 0.0219745
4 0.850414 0.0522149 0.0519174 0.0454536
5 0.89157 0.00962148 0.0845269 0.0142814
6 0.122389 0.0875591 0.0734604 0.716591
7 0.226696 0.00745549 0.745549 0.0202999
8 0.156228 0.151994 0.128767 0.563011
9 0.22083 0.561173 0.12007 0.0979266
10 0.507656 0.0711684 0.0652815 0.355894
TF Unknown
TF Name Unknown
Gene ENSG00000113916
Motif ENSG00000113916___1|2x3
Family C2H2 ZF
Species Homo_sapiens
Pos A C G T
1 0.538498 0.157305 0.157633 0.146564
2 0.0728444 0.00877167 0.877166 0.0412175
3 0.959269 0.0131077 0.0159611 0.0116621
4 0.852439 0.0238831 0.0168134 0.106864
5 0.57332 0.0688014 0.181385 0.176494
6 0.139513 0.0747988 0.737607 0.0480813
7 0.735484 0.0912993 0.09091 0.0823067
8 0.79932 0.0270417 0.137306 0.0363319
9 0.16103 0.12536 0.109938 0.603672
10 0.622356 0.06782 0.115463 0.194361
TF Unknown
TF Name Unknown
Gene ENSG00000113916
Motif ENSG00000113916___1|3x3
Family C2H2 ZF
Species Homo_sapiens
Pos A C G T
1 0.616484 0.0886488 0.24602 0.0488468
2 0.0971289 0.591289 0.134781 0.176801
3 0.0715039 0.0237142 0.0432674 0.861514
4 0.73769 0.117011 0.059703 0.0855963
5 0.0728444 0.00877167 0.877166 0.0412175
6 0.959269 0.0131077 0.0159611 0.0116621
7 0.852439 0.0238831 0.0168134 0.106864
8 0.57332 0.0688014 0.181385 0.176494
9 0.139513 0.0747988 0.737607 0.0480813
10 0.615257 0.189034 0.125514 0.0701943
TF Unknown
TF Name Unknown
Gene ENSG00000113916
massive thanks!
|