A customer wanted me to use SAPI, but also wanted events from SAPI to fire. Google says nobody has been successful at getting SAPI's events to work in Perl. That is because of a 3 years and counting, patch provided, bug
rt://43574 in Win32::OLE. Once fixed, the example below works.
use strict;
use Win32::OLE qw(EVENTS);
use Win32::OLE::Const;
use Data::Dumper;
#Win32::OLE::Const is terrible, the tree like interface ( Win32::OLE::
+TypeInfo class )
#is explictly undocumented
my $constHash = Win32::OLE::Const->Load('Microsoft Speech Object Libra
+ry');
#print Dumper($constHash);
my $vox = Win32::OLE->new ('SAPI.SpVoice')
|| die "Unable to create SAPI object\n";
sub Event {
my %hash;
#foreach (@_) {
# print " ".$_." ";
#}
#print "\n";
#system("pause");
#stack trace from the system pause above
# ntdll.dll!_KiFastSystemCallRet@0()
# ntdll.dll!_ZwWaitForMultipleObjects@20() + 0xc
# kernel32.dll!_WaitForMultipleObjectsEx@20() - 0x48
# user32.dll!_RealMsgWaitForMultipleObjectsEx@20() + 0xd9
# user32.dll!_MsgWaitForMultipleObjects@20() + 0x1f
#> perl512.dll!win32_msgwait(interpreter * my_perl=0x003940dc, unsi
+gned long count=0x00000001, void * * handles=0x0006f500, unsigned lon
+g timeout=0xffffffff, unsigned long * resultp=0x00000000) Line 2181
++ 0x19 C
# perl512.dll!win32_spawnvp(int mode=0x00000000, const char * cmdn
+ame=0x00a11604, const char * const * argv=0x00a1abfc) Line 4249 + 0x
+19 C
# perl512.dll!do_spawn2(interpreter * my_perl=0x003940dc, const ch
+ar * cmd=0x0083a3f4, int exectype=0x00000002) Line 784 + 0x11 C
# perl512.dll!Perl_do_spawn(interpreter * my_perl=0x003940dc, char
+ * cmd=0x0083a3f4) Line 820 + 0xf C
# perl512.dll!Perl_pp_system(interpreter * my_perl=0x003940dc) Li
+ne 4277 + 0x5e C
# perl512.dll!Perl_runops_debug(interpreter * my_perl=0x003940dc)
+ Line 2049 + 0xd C
# perl512.dll!Perl_call_sv(interpreter * my_perl=0x003940dc, sv *
+sv=0x00a042ec, volatile long flags=0x00000006) Line 2590 + 0x36 C
# OLE.dll!EventSink::Invoke(long dispidMember=0x00000003, const _G
+UID & riid={...}, unsigned long lcid=0x00000400, unsigned short wFlag
+s=0x0001, tagDISPPARAMS * pdispparams=0x0006f7d8, tagVARIANT * pvarRe
+sult=0x00000000, tagEXCEPINFO * pexcepinfo=0x00000000, unsigned int *
+ puArgErr=0x00000000) Line 2161 + 0xf C++
# sapi.dll!CProxy_ISpeechVoiceEvents<CSpVoice>::Fire_VoiceChange()
+ + 0xb8
# sapi.dll!CSpVoice::NotifyCallback() + 0x11d
# sapi.dll!CSpNotify::WndProc() + 0x60
# user32.dll!_InternalCallWinProc@20() + 0x28
# user32.dll!_UserCallWinProcCheckWow@32() + 0xb7
# user32.dll!_DispatchMessageWorker@8() + 0xdc
# user32.dll!_DispatchMessageA@4() + 0xf
# OLE.dll!XS_Win32__OLE_Initialize(interpreter * my_perl=0x003940d
+c, cv * cv=0x008d0634) Line 3549 C++
# perl512.dll!Perl_pp_entersub(interpreter * my_perl=0x003940dc)
+Line 2882 + 0x10 C
# perl512.dll!Perl_runops_debug(interpreter * my_perl=0x003940dc)
+ Line 2049 + 0xd C
# perl512.dll!S_run_body(interpreter * my_perl=0x003940dc, long ol
+dscope=0x00000001) Line 2308 + 0xd C
# perl512.dll!perl_run(interpreter * my_perl=0x003940dc) Line 223
+3 + 0xd C
# perl512.dll!RunPerl(int argc=0x00000002, char * * argv=0x0028251
+0, char * * env=0x002828f0) Line 270 + 0x9 C++
# perl.exe!main(int argc=0x00000002, char * * argv=0x00282510, cha
+r * * env=0x00282d40) Line 23 + 0x12 C
# perl.exe!mainCRTStartup() Line 398 + 0xe C
# kernel32.dll!_BaseProcessStart@4() + 0x23
$hash{'eventName'} = $_[1];
if($hash{'eventName'} eq 'AudioLevel'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
$hash{'AudioLevel'} = $_[4];
}
elsif($hash{'eventName'} eq 'Phoneme'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
$hash{'Duration'} = $_[4];
$hash{'NextPhoneId'} = $_[5];
$hash{'Feature'} = DecodeSpeechVisemeFeature($_[6]);
$hash{'CurrentPhoneId'} = $_[7];
}
elsif($hash{'eventName'} eq 'Word'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
$hash{'CharacterPosition'} = $_[4];
$hash{'Length'} = $_[5];
}
elsif($hash{'eventName'} eq 'StartStream'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
}
elsif($hash{'eventName'} eq 'VoiceChange'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
#Dumping the obj is pagefuls of info
$hash{'VoiceObjectTokenDesc'} = $_[4]->GetDescription();
}
elsif($hash{'eventName'} eq 'Sentence'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
$hash{'CharacterPosition'} = $_[4];
$hash{'Length'} = $_[5];
}
elsif($hash{'eventName'} eq 'Viseme'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
$hash{'Duration'} = $_[4];
$hash{'NextVisemeId'} = DecodeSpeechVisemeType($_[5]);
$hash{'Feature'} = DecodeSpeechVisemeFeature($_[6]);
$hash{'CurrentVisemeId'} = DecodeSpeechVisemeType($_[7]);
}
elsif($hash{'eventName'} eq 'EndStream'){
$hash{'StreamNumber'} = $_[2];
$hash{'StreamPosition'} = $_[3];
}
else {die "unknown event $hash{'eventName'}";}
print Dumper(\%hash);
if($hash{'eventName'} eq 'EndStream'){
Win32::OLE->QuitMessageLoop();
}
}
my $res = Win32::OLE->WithEvents ($vox, \&Event
#'_' bug in Win32 OLE, must use UUID
+ if not fixed
#https://rt.cpan.org/Public/Bug/Disp
+lay.html?id=43574
#, '{A372ACD1-3BEF-4BBD-8FFB-CB3E2B4
+16AF8}'
,'_ISpeechVoiceEvents'
);
print " OLE last error ".Win32::OLE->LastError()."\n";
#print Dumper($res);
die "bad const" if $constHash->{'SVEAllEvents'} != 33790;
$vox->{'EventInterests'} = $constHash->{'SVEAllEvents'};
$res = $vox->{'EventInterests'};
print "EventInterests res ".Dumper($res);
my $text = "I Love Perl";
die "bad const" if $constHash->{'SVSFlagsAsync'} != 1;
my $res = $vox->Speak ($text, $constHash->{'SVSFlagsAsync'});
print Dumper($res);
print " ".Win32::OLE->LastError()."\n";
print "WFNE ".Dumper($res);
#potential ideas, investigate using Perl's alarm win msg loop
Win32::OLE->MessageLoop();
{
my @table;
$table[0] = 'silence';
$table[1] = 'ae ax ah';
$table[2] = 'aa';
$table[3] = 'ao';
$table[4] = 'ey eh uh';
$table[5] = 'er';
$table[6] = 'y iy ih ix';
$table[7] = 'w uw';
$table[8] = 'ow';
$table[9] = 'aw';
$table[10] = 'oy';
$table[11] = 'ay';
$table[12] = 'h';
$table[13] = 'r';
$table[14] = 'l';
$table[15] = 's z';
$table[16] = 'sh ch jh zh';
$table[17] = 'th dh';
$table[18] = 'f v';
$table[19] = 'd t n';
$table[20] = 'k g ng';
$table[21] = 'p b m';
sub DecodeSpeechVisemeType {
return $table[$_[0]];
}
}
sub DecodeSpeechVisemeFeature {
if($_[0] == 0){return 'None';}
elsif($_[0] == 1){return 'Stressed';}
elsif($_[0] == 2){return 'Emphasis';}
else{die "unknown SpeechVisemeFeature";}
}
The output of the script.
OLE last error 0
EventInterests res $VAR1 = 33790;
$VAR1 = 1;
0
WFNE $VAR1 = 1;
$VAR1 = {
"StreamNumber" => 1,
"StreamPosition" => 0,
"eventName" => "StartStream"
};
$VAR1 = {
"StreamNumber" => 1,
"StreamPosition" => 0,
"VoiceObjectTokenDesc" => "LH Michael",
"eventName" => "VoiceChange"
};
$VAR1 = {
"CharacterPosition" => 0,
"Length" => 22,
"StreamNumber" => 1,
"StreamPosition" => 0,
"eventName" => "Sentence"
};
$VAR1 = {
"CharacterPosition" => 0,
"Length" => 1,
"StreamNumber" => 1,
"StreamPosition" => 0,
"eventName" => "Word"
};
$VAR1 = {
"CurrentPhoneId" => 0,
"Duration" => 15,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 0,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 15,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 0,
"eventName" => "Viseme"
};
$VAR1 = {
"CurrentPhoneId" => 1,
"Duration" => 15,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 330,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 15,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 330,
"eventName" => "Viseme"
};
$VAR1 = {
"CurrentPhoneId" => 11,
"Duration" => 145,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 660,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 145,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 660,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 31,
"StreamNumber" => 1,
"StreamPosition" => 2261,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CharacterPosition" => 2,
"Length" => 4,
"StreamNumber" => 1,
"StreamPosition" => 3850,
"eventName" => "Word"
};
$VAR1 = {
"CurrentPhoneId" => 14,
"Duration" => 90,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 3850,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 90,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 3850,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 28,
"StreamNumber" => 1,
"StreamPosition" => 4649,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CurrentPhoneId" => 1,
"Duration" => 94,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 5830,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 94,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 5830,
"eventName" => "Viseme"
};
$VAR1 = {
"CurrentPhoneId" => 18,
"Duration" => 63,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 7898,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 63,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 7898,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 17,
"StreamNumber" => 1,
"StreamPosition" => 8007,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CharacterPosition" => 7,
"Length" => 4,
"StreamNumber" => 1,
"StreamPosition" => 9284,
"eventName" => "Word"
};
$VAR1 = {
"CurrentPhoneId" => 21,
"Duration" => 121,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 9284,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 121,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 9284,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 5,
"StreamNumber" => 1,
"StreamPosition" => 11111,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CurrentPhoneId" => 4,
"Duration" => 155,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 11946,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 155,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 11946,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 28,
"StreamNumber" => 1,
"StreamPosition" => 13493,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CurrentPhoneId" => 13,
"Duration" => 90,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 15356,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 90,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 15356,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 27,
"StreamNumber" => 1,
"StreamPosition" => 16835,
"eventName" => "AudioLevel"
};
$VAR1 = {
"CurrentPhoneId" => 14,
"Duration" => 101,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 17336,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 101,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 17336,
"eventName" => "Viseme"
};
$VAR1 = {
"CurrentPhoneId" => 0,
"Duration" => 421,
"Feature" => "Emphasis",
"NextPhoneId" => 0,
"StreamNumber" => 1,
"StreamPosition" => 19558,
"eventName" => "Phoneme"
};
$VAR1 = {
"CurrentVisemeId" => undef,
"Duration" => 421,
"Feature" => "Emphasis",
"NextVisemeId" => undef,
"StreamNumber" => 1,
"StreamPosition" => 19558,
"eventName" => "Viseme"
};
$VAR1 = {
"AudioLevel" => 0,
"StreamNumber" => 1,
"StreamPosition" => 20169,
"eventName" => "AudioLevel"
};
$VAR1 = {
"AudioLevel" => 0,
"StreamNumber" => 1,
"StreamPosition" => 23507,
"eventName" => "AudioLevel"
};
$VAR1 = {
"AudioLevel" => 0,
"StreamNumber" => 1,
"StreamPosition" => 26847,
"eventName" => "AudioLevel"
};
$VAR1 = {
"StreamNumber" => 1,
"StreamPosition" => 28820,
"eventName" => "EndStream"
};