use XML::TreeBuilder; my $file= 'swbd_50k_42tags.xml'; my $tree = XML::TreeBuilder->new(); $tree->parse_file($file); foreach my $dialogue ($tree->find_by_tag_name ('dialogue')){ $dialogue_name = $dialogue->attr_get_i('name'); foreach my $turn ($dialogue->find_by_tag_name('turn')){ $turn_no = $turn->attr_get_i('no'); $turn_speaker = $turn->attr_get_i('speaker'); @speaker = ( ++$turn_no, $turn_speaker ); foreach my $utt ($dialogue->find_by_tag_name('utt')){ $da = $utt->attr_get_i('da'); # $id = $utt->attr_get_i('id'); @statement = ('sb','sb'); #array of classified DA @Question = ( 'qy', 'qw', 'qy\^d' ); #array of classified DA @Acknowledge = ( 'b', 'bh', 'bk'); #array of classified DA @Answer = ( 'nn', 'ny'); #array of classified DA @Agreement = ('aa'); #array of classified DA if ($da eq "$statement[0]" or $da eq "$statement[1]") { # checking for DA class of utterance $da = Astatement; } elsif ($da eq "$Question[0]" or $da eq "$Question[1]" or $da eq "$Question[2]") { # checking for DA class of utterance $da = Question; } elsif ($da eq "$Acknowledge[0]" or $da eq "$Acknowledge[1]" or $da eq "$Acknowledge[2]") { # checking for DA class of utterance $da = Acknowledgement; } elsif ($da eq "$Answer[0]" or $da eq "$Answer[1]") { # checking for DA class of utterance $da = Answer; } elsif ($da eq "$Agreement[0]") { # checking for DA class of utterance $da = Agreement; } $line = $utt->as_text; @line = split(/\s+ /, $line); $gram = substr($line, 0, 3); #split utterance into n-grams of 1-4 if ( $gram =~ /(I think|I believe|It seems|It\'s my opinion that|I mean|Suppose|Of course|we|they|they say)/gi { $gram = $myda; $myda = Dstatement; } elsif ( $gram =~ /(Do you|Do you have|Do you know|Is that|Have you|what|who\'s your|Does he|Does she|Are they|did you|how about|what|isn\'t|wasn\'t it|hasn\'t it|how)/gi { $gram = $myda; $myda = DQuestion; } elsif ( $gram =~ /(exactly|definitely|yes|that\'s a fact|that\'s true|true)/gi { $gram = $myda; $myda = DAgreement; } elsif ( $gram =~ /(\{F oh \} really|Really|Is that right\?|\{F oh\} yeah|Is it|\{F oh\} do you|No \?|Did you|\{F oh} are you\?|was it|Have you\?|\{F oh} is it \?|\{F oh} do you|uh\-huh yeah right|oh yes|oh yeah|huh|sure|um|huh-huhokay|\{F oh} okay|oh|\{F oh}|i see|uh\-huh|all right|yeah|)/gi { $gram = $myda; $myda = DAcknowledgement; } else ( $gram =~ /(yes|yeah|yep|uh\-huh|yes actually|i do|no|um no|nope|uh actually no|probably not|but uh no)/gi { $gram = $myda; $myda = DAnswer; } @u = ( ++$turn_no, \[$line\] ); @da = (++$turn_no, '$da' ) ; @myda = (++$turn_no, $da, $myda) print 'speaker (@speaker)\n'; # reqd format print 'u (@u)\n'; # reqd format print 'da (@da)\n'; #reqd format print 'da (@myda)\n'; #reqd format } } }