You can also make your code more robust by using the existence of the gene in the has instead of keeping "old" copied of the variables. This will allow your code to do-the-right-thing even when genes are in non-contiguous lines.
++tangent, who I see beat me to it.
#!perl
use strict;
use warnings;
use autodie;
my $out_path = 'output.txt';
my @lines = <>;
chomp @lines;
my %gene_maxfreq;
for (@lines) {
my ( $freq, $gene ) = split;
my $max_freq = $gene_maxfreq{$gene};
if ( (!defined $max_freq) or $max_freq < $freq ) {
$gene_maxfreq{$gene} = $freq;
}
}
open my $out_fh, '>', $out_path;
for (@lines) {
my ( $freq, $gene ) = split;
my $max_freq = $gene_maxfreq{$gene};
if ( defined $max_freq ) {
my $line = join "\t", $freq, $gene, $max_freq;
print {$out_fh} "$line\n";
print "$line\n";
}
}
close $out_fh;