use strict; use warnings; use Data::Dumper; open my $inFH, q{<}, \ < }; my %hash; while ( <$inFH> ) { my @tags = split; next unless $tags[ 1 ] eq q{NN}; $hash{ $tags[ 2 ] }->{ frequency } ++; push @{ $hash{ $tags[ 2 ] }->{ variants } }, $tags[ 0 ]; } @{ $hash{ $_ }->{ variants } } = do { my %seen; grep { not $seen{ $_ } ++ } @{ $hash{ $_ }->{ variants } }; } for keys %hash; print Data::Dumper->Dumpxs( [ \ %hash ], [ qw{ *hash } ] );