use Unicode::Normalize; binmode(STDOUT, ':encoding(UTF-8)'); open HASH, '<:encoding(UTF-8)', 'test_hash.txt'; my %hash = (); while (my $line=) { chomp $line; $line =~ s/^(.*?)\t//; my $key = NFD($1); my @line = split /\s+/, $line; $hash{$key} = \@line; } # foreach my $phoneme (keys %hash) { # print $phoneme . ":"; # my @line = @{ $hash{$phoneme} }; # print join ",", @line; # print "\n"; # } open INPUTFILE, '<:encoding(UTF-8)', 'test_input.txt'; while (my $entry = ) { chomp $entry; print $entry . "\n"; my @letters = $entry =~ /(\X)/g; foreach my $letter (@letters) { my $norm_letter = NFD($letter); my @features = @{ $hash{$norm_letter} }; print join " ", @features; print "\n"; } }