If tag1 and tag3 are equal, this is overly complicated.
This line:
conferences NN conference
might be wrong?
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper qw(Dumper);
my %hash;
while (my $line = <DATA>)
{
next if $line =~ /^\s*$/; # skip blank lines
my ($tag1, $tag2, $tag3) = split(/\s+/, $line);
next unless $tag2 eq 'NN';
$hash{$tag3}++;
}
print Dumper \%hash;
=prints
$VAR1 = {
'well' => 1,
'conference' => 3,
'International' => 1,
'preparation' => 2
};
=cut
__DATA__
The DT the
International NN International
for IN for
well NN well
preparation NN preparation
preparation NN preparation
in IN in
conference NN conference
conference NN conference
conferences NN conference
good VVG good