#! perl -sw use strict; <>; #discard header line my %table; my %lengths; while( <> ) { my( $gene, $id, undef, $site, $len ) = split; my( $pos ) = $site =~ m[(\d+)]; ## extract the digits from the site undef $table{ $gene }{ $pos }{ $id }; ## adds the id as a key with no value (saves space!) $lengths{ $gene } = $len; ## Save the gene lengths for later } #print 'output header line here if required'; for my $gene ( sort keys %table ) { print "$gene"; my $p = 1; for my $pos ( sort{ $a <=> $b } keys %{ $table{ $gene } } ) { print "\t0" x ( $pos - $p ), "\t", scalar keys %{ $table{ $gene }{ $pos } }; $p = $pos + 1; } print "\t0" x ( $lengths{ $gene } - $p ), "\n"; }