If your input does not contain <, &, or ]]>, you do not need anything special:
#!/usr/bin/perl
use warnings;
use strict;
use constant {
WORD => 0,
TYPE => 1,
};
my @annotations;
while (<DATA>) {
my ($word, $type) = split;
$type =~ s/.-*//;
push @annotations, [ $word, $type ];
}
print '<text>';
print join ' ', map $_->[WORD], @annotations;
print '</text>';
for my $annotation (@annotations) {
print '<annotation>';
print '<type>', $annotation->[TYPE], '</type>';
print '<text>', $annotation->[WORD], '</text>';
print '</annotation>';
}
__DATA__
how B-NP
are I-NP
you I-NP
|