#!/usr/bin/perl use strict; use Benchmark; use Data::Dumper 'Dumper'; my %methrange; my $t0 = Benchmark->new; open(IN1,"Methylation.gtf"); while () { chomp; if ( /^\s*Gm10/ ) { my ( $bgn, $end, $methcount ) = (split)[3,4,10]; $methrange{$bgn}{$end}{$_} = $methcount; } } my @sorted_meth_starts = sort {$a<=>$b} keys %methrange; my %methhash; open(INPUT, "mc_11268_10.txt"); while() { next unless /^(\d+)\t(\d+)\t\S\t(\w+)/; # skip header line and any blank lines chomp; my ( $meth, $position, $class ) = ( "Gm$1", $2, $3 ); my $indx = $#sorted_meth_starts; while ( $indx >= 0 ) { if ( $position >= $sorted_meth_starts[$indx] ) { my $bgn = $sorted_meth_starts[$indx]; for my $end ( keys %{$methrange{$bgn}} ) { if ( $position <= $end ) { for my $match ( keys %{$methrange{$bgn}{$end}} ) { $methhash{$match}{methcount} = $methrange{$bgn}{$end}{$match}; $methhash{$match}{$class}++; } } } last; } $indx--; } } open(FILEIT,">res_11268_10.txt"); print FILEIT join("\t",qw(Chromosome Gene Feature Start END CG_count_by_C CHG_count_by_C CHH_count_by_C)),"\n"; for my $ge (sort keys %methhash) { my ( $chh, $chg, $cg, $total_count ); if ( $methhash{$ge}{methcount} == 0 ) { $chh = $chg = $cg = "INF"; } else { my $denom = $methhash{$ge}{methcount}; $chh = sprintf( "%.3f", $methhash{$ge}{CHH} / $denom ); $chg = sprintf( "%.3f", $methhash{$ge}{CHG} / $denom ); $cg = sprintf( "%.3f", $methhash{$ge}{CG} / $denom ); $total_count = sprintf( "%.3f", ( $methhash{$ge}{CHH}+$methhash{$ge}{CHG}+$methhash{$ge}{CG}) / $denom ); } print FILEIT join( "\t", ( split /\t/, $ge )[0..4], $cg, $chg, $chh ),"\n"; } my $t1 = Benchmark->new; my $td = timediff($t1, $t0); print "the code took:",timestr($td),"\n";