I am working on final year project, flunked it first time os all help is appreciated.
I have a corpus of dialogues in XML format. I am trying to extract the utterance, speaker and da from the XML doc. I have written a program to extract this info and put it in a perticular format so i can use the data.
Necessary Format reqd:
s(1,g).
da(1,acknowledge).
da(acknowledge,ready,1).
u(1,['Right'
]).
snippet of XML doc=
- <dialogues>
- <dialogue name="T:/amities/Data/SwitchBoard/ws97.tar/ws97\sw00utt\sw
+_0001_4325.utt" no="1">
- <turn no="1" speaker="A">
- <utt da="bc" id="utt1
- <![CDATA[ Okay.
]]>
</utt>
- <utt da="qw" id="utt2">
- <![CDATA[ {D So, }
]]>
</utt>
</turn>
- <turn no="2" speaker="B">
- <utt da="qy^d" id="utt1">
- <![CDATA[ [ [ I guess, +
]]>
</utt>
</turn>
My Program.pl
use XML::TreeBuilder;
my $file= 'swbd_50k_42tags.xml';
my $tree = XML::TreeBuilder->new();
$tree->parse_file($file);
foreach my $dialogue ($tree->find_by_tag_name ('dialogue')){
$dialogue_name = $dialogue->attr_get_i('name');
foreach my $turn ($dialogue->find_by_tag_name('turn')){
$turn_no = $turn->attr_get_i('no');
$turn_speaker = $turn->attr_get_i('speaker');
@speaker = ( ++$turn_no, $turn_speaker );
foreach my $utt ($dialogue->find_by_tag_name('utt')){
$da = $utt->attr_get_i('da');
# $id = $utt->attr_get_i('id');
@statement = ('sb','sb'); #array of classified DA
@Question = ( 'qy', 'qw', 'qy\^d' ); #array of classified DA
@Acknowledge = ( 'b', 'bh', 'bk'); #array of classified DA
@Answer = ( 'nn', 'ny'); #array of classified DA
@Agreement = ('aa'); #array of classified DA
if ($da eq "$statement[0]" or $da eq "$statement[1]") { # chec
+king for DA class of utterance
$da = Astatement;
} elsif ($da eq "$Question[0]" or $da eq "$Question[1]" or $
+da eq "$Question[2]") { # checking for DA class of utterance
$da = Question;
} elsif ($da eq "$Acknowledge[0]" or $da eq "$Acknowledge[1]
+" or $da eq "$Acknowledge[2]") { # checking for DA class of utterance
$da = Acknowledgement;
} elsif ($da eq "$Answer[0]" or $da eq "$Answer[1]") { # c
+hecking for DA class of utterance
$da = Answer;
} elsif ($da eq "$Agreement[0]") { # checkin
+g for DA class of utterance
$da = Agreement;
}
$line = $utt->as_text;
@line = split(/\s+ /, $line);
$gram = substr($line, 0, 3); #split utterance into n-gr
+ams of 1-4
if ( $gram =~ /(I think|I believe|It seems|It\'s my opinion that|I
+ mean|Suppose|Of course|we|they|they say)/gi {
$gram = $myda;
$myda = Dstatement;
} elsif ( $gram =~ /(Do you|Do you have|Do you know|Is that|Hav
+e you|what|who\'s your|Does he|Does she|Are they|did you|how about|wh
+at|isn\'t|wasn\'t it|hasn\'t it|how)/gi {
$gram = $myda;
$myda = DQuestion;
} elsif ( $gram =~ /(exactly|definitely|yes|that\'s a fact|that
+\'s true|true)/gi {
$gram = $myda;
$myda = DAgreement;
} elsif ( $gram =~ /(\{F oh \} really|Really|Is that right\?|\{
+F oh\} yeah|Is it|\{F oh\} do you|No \?|Did you|\{F oh} are you\?|was
+ it|Have you\?|\{F oh} is it \?|\{F oh} do you|uh\-huh yeah right|oh
+yes|oh yeah|huh|sure|um|huh-huhokay|\{F oh} okay|oh|\{F oh}|i see|uh\
+-huh|all right|yeah|)/gi {
$gram = $myda;
$myda = DAcknowledgement;
} else ( $gram =~ /(yes|yeah|yep|uh\-huh|yes actually|i do|no|u
+m no|nope|uh actually no|probably not|but uh no)/gi {
$gram = $myda;
$myda = DAnswer;
}
@u = ( ++$turn_no, \[$line\] );
@da = (++$turn_no, '$da' ) ;
@myda = (++$turn_no, $da, $myda)
print 'speaker (@speaker)\n'; # reqd format
print 'u (@u)\n'; # reqd format
print 'da (@da)\n'; #reqd format
print 'da (@myda)\n'; #reqd format
}
}
}
Error message=
syntax error at try2.pl line 67, near "/(I think|I believe|It seems|It
+\'s my opi
nion that|I mean|Suppose|Of course|we|they|they say)/gi {"<br>
syntax error at try2.pl line 74, near "} elsif"<br>
syntax error at try2.pl line 79, near "} elsif"<br>
syntax error at try2.pl line 84, near "} elsif"<br>
syntax error at try2.pl line 92, near "}"<br>
Execution of try2.pl aborted due to compilation errors.
I dont know what it is, maybe i am just too much of a novice but i cant seem to get it right
20050318 Janitored by Corion: Added code tags