#!/usr/local/bin/perl -w # # Invoke with './monkyack.pl' # # This script delivers the chatterbox as MP3 encoded streaming audio. It # uses the festival/MBROLA text-to-speech (TTS) system to create the audio # on the fly. # # The audio quality is a little funky, since there's a lot of special symbols # and 'odd words' (like user names) that the TTS has to contend with. On top # of that, I encode at 8K bits per second because of my limited bandwidth. # Nonetheless, it's kind of amusing to listen to the chatterbox. # # Requires that the festival/MBROLA server be installed, preferrably on a # local machine. Needs lame 3.88beta or higher (8K bits requires MPEG III # Layer 2.5) for the MP3 encoding. Also needs libshout and Shout.pm. Last, # but not least, you'll need a ShoutCast/IcyCast server to connect to. I # I suggest the ShoutCast server. # # festival/MBROLA - http://www.cstr.ed.ac.uk/projects/festival/ # lame - http://www.lame.org # ShoutCast server - http://www.shoutcast.com/download/ # IcyCast server - http://www.icecast.org/download.html # libshout - http://developer.icecast.org/libshout/ # Shout.pm - http://developer.icecast.org/libshout/ # LWP::Simple - http://cpan.org # XML::Twig - http://cpan.org (yay, mirod! Great stuff!) # HTML::Entities - http://cpan.org # # Notes and aimless meanderings through my thoughts: # # I started out using mp3stream from http://www.prilnari.com/mp3stream/ I # wrote a complete Inline wrapper set for it, only to find that while it # would often work, it would frequently disconnect from the ShoutCast # server for no apparent reason. Found the Shout.pm reference in the # [Internet Radio] node, switched to that. Much better success. # # I've been using festival/MBROLA for a good while (I use it to TTS my # weather station), and just kept reusing my old socket code. Works, but # it's not good enough to publish. So I decided that I would move into # the 90's, and use Speech::Synthesizer. It's not very good, and I'm being # really nice. It didn't have a couple of necessary methods, and the way # it's written, there was no decent way to subclass them. The main problem # was that give input with 2 or 3 sentences in it, it would only speak the # last sentence. What the heck good is that? Wasn't this ever tested? # So, instead I just call festival-client, and pass a temp file with the # text (supposed to be able to read from STDIN, but when kicking it off # with an open statement, it hangs. # # Now I decided to look for that cute bit of code someone write a while # back for embedding images in the __DATA__ section of a script, and # decoding them on the fly. I find the reference to it at [Hex Embedded # Images], but the code is on his no-longer-up webserver. In passing, I # find that [httptech] has done something similiar, about 10 months ago. # It speaks the chatterbox, but only to the local machine. It took the # shininess off a bit, because I didn't think this had been done before. # It's at [MonkTalk], which, oddly enough, this script was called when I # first started. I changed the name, of course. # # I've been using WinAmp 2.74 and XMMS 1.2.4. WinAmp sounds far better # but periodically disconnects from the server, for no apparent reason # (gotta love error messages. Or lack thereof...) XMMS, on the other, # stays connect through hell and high water, but sounds like crap on a # 8K stream. # # Copyright 2000,2001(c) J.C.Wren jcwren@jcwren.com # A production of Twitching Monk Software # No rights reserved, use as you see fit. # I'd like to know about it, though, just for kicks. # # Version 1.00.00 - 2001/04/11 - Initial incarnation # Version 1.00.10 - 2001/04/12 - Fix for XML with high bit set # use strict; use LWP::Simple; use XML::Twig; use HTML::Entities; use POSIX ":sys_wait_h"; use Shout; # # This to change to tailor the system # use constant cSCIP => 'localhost'; use constant cSCPort => 8001; use constant cSCPassword => 'montyhall'; use constant cSCBitRate => 8; use constant cSCName => 'Chatterbox'; use constant cSCGenre => 'Talk Radio'; use constant cSCURL => 'http://www.tinymicros.com:8000'; use constant cSCDescription => 'Perlmonks Chatterbox encoded by Festival/MBROLA'; use constant cFestivalSever => 'localhost'; use constant cTTS => '/usr/bin/festival_client --prolog mt.parms --output _temp.wav --otype wav --ttw _temp.txt'; use constant cMP3Encoder => '/usr/local/bin/lame --silent -a -m m -b 8 -s 16 _temp.wav _temp.mp3 >/dev/null'; use constant cSilence => 'silence8m.mp3'; use constant cPerlMonks => 'http://www.perlmonks.org/index.pl?node=chatterbox+xml+ticker'; # # Pretty simplistic main... # { createParmsFile () or die "Can't create Festival parameter file: $!"; my $result = fork (); die ("Can't fork off a child") if !defined ($result); $result ? playTalkies ($result) : makeTalkies (); } # # The parent. Scans the directory every 3 or more seconds (depending on the # length of the playlist), and sends the files to the server to be played. # sub playTalkies { @_ >= 1 || die "Incorrect number of arguments"; my $kidpid = shift; my $conn = new Shout; $conn->ip (cSCIP); $conn->port (cSCPort); $conn->password (cSCPassword); $conn->name (cSCName); $conn->bitrate (cSCBitRate); $conn->genre (cSCGenre); $conn->description (cSCDescription); $conn->url (cSCURL); $conn->icy_compat (1); $conn->ispublic (0); die "Can't connect to server" if (!$conn->connect); while (1) { opendir (DIR, ".") or die "Can't read directory: $!"; my @playlist = sort grep { /\d{14}\.mp3/ && -f "./$_" } readdir (DIR); close (DIR); if (!scalar (@playlist)) { playFile ($conn, cSilence) or die "Error while playing: " . $conn->error . "\n"; } else { foreach (@playlist) { playFile ($conn, $_) or die "Error while playing $_: " . $conn->error . "\n"; unlink $_ or die "Can't unlink file: $!"; } } die "Child died!\n" if (waitpid ($kidpid, &WNOHANG) == -1); } } # # Send a file to the Shoutcast/Icycast server # sub playFile { @_ >= 2 || die "Incorrect number of arguments"; my ($conn, $file) = @_; my ($buff, $len); print scalar localtime, " Playing $file\n"; open (MP3, "< $file") or return 0; while (($len = sysread (MP3, $buff, 1024)) > 0) { if (!$conn->sendData ($buff, $len)) { close MP3; return 0; } $conn->sleep; } close MP3; return 1; } # # Child thread that looks for new XML, and runs the text through festival # sub makeTalkies { my %hcbxml = (); while (1) { getChatterXML (\%hcbxml) or die; chatterXmlToSpeech (\%hcbxml); sleep (10); } } # # Run through the list return by getChatterXML(), and create MP3 files with the authors # name and text. The file name is the timestamp of the xml node. If all the comments # are already encoded, simply return (avoid overhead of opening unnecessary festival # connections) # sub chatterXmlToSpeech { @_ >= 1 || die "Incorrect number of arguments"; my $hcbxml = shift; my $encoded = 1; $encoded &= $hcbxml->{$_}->{encoded} foreach keys (%$hcbxml); return if $encoded; foreach my $line (sort keys %$hcbxml) { if (!$hcbxml->{$line}->{encoded}) { my $text; # # Some minor hacking to get rid of the more problematic characters. # Some rule based logic would be nice, but too much work. # $text = $hcbxml->{$line}->{text}; $text =~ s/[\[\]\(\)\{\}"]//g; $text =~ s/[:;\|\/]/ /g; $text .= '.'; print scalar localtime, " Encoding $line\n"; open (TEMPFILE, ">_temp.txt") or die "Can't open _temp.txt: $!"; print TEMPFILE $hcbxml->{$line}->{author} . ", " . $text; close TEMPFILE; system (cTTS) and die "Can't run TTS encoder: $!"; system (cMP3Encoder) and die "Can't run MP3 encoder: $!"; rename ("_temp.mp3", "$line.mp3") or die; unlink ("_temp.txt"); $hcbxml->{$line}->{encoded} = 1; } } } # # Fetch the chatterbox XML page, and return a hash, keyed by time, with the # author, user_id, and text fields from the XML as an anon-hash. # sub getChatterXML { @_ >= 1 || die "Incorrect number of arguments"; my $rhcbxml = shift; my $xml; $LWP::Simple::FULL_LWP = 1; if ((my $xml = get (cPerlMonks))) { # # Survive an [epoptai] DoS attack # $xml =~ s/[\r\n\t]//g; $xml =~ tr/\x80-\xff/_/; $xml =~ tr/\x00-\x1f/_/; my $twig = new XML::Twig (TwigRoots => { message => sub { my ($t, $node) = @_; my $text = decode_entities ($node->text ()); $text =~ s/'/'/g; $text =~ tr/[\r\n]//d; $rhcbxml->{$node->att ('time')} = {'author' => $node->att ('author'), 'user_id' => $node->att ('user_id'), 'text' => $text, 'encoded' => (defined ($rhcbxml->{$node->att ('time')}) ? $rhcbxml->{$node->att ('time')}->{encoded} : 0) }; $t->purge; } }); $twig->parse ($xml); return (1); } return 0; } # # festival_clients won't take the parms as a string, so we have to # create a temp file. Rather than lose the data somewhere, it's # at the end of the program. # sub createParmsFile { local $/ = undef; my $prolog = ; open (TEMPFILE, ">mt.parms") or return 0; print TEMPFILE $prolog; close TEMPFILE; return 1; } __DATA__ (voice_kal_diphone) (Parameter.set 'Duration_Stretch 1.1) #### begin 644 silence8m.mp3 M_^,8Q``,TA6$`8H0`/\31T+P%'^+8\=?\G&Y`W_XSD(I___'),46.W__^+.0 M@A[E%____C@G3__^\WA]W__RYI75]T,6U*B,_^,8Q`<,."HL`8\0`#HUH+`1 M'F13KI4A?DZM!]7SCS+59,E__1_U@3IPT97'6]: M\[:7L'.<_:6;Q56%S>+O2A=MC(E4#-:A'^U@`4"*A M.L\X-O,*$]M5CNI96A]M(LZ9>5I0Q8V=E(9T_^,8Q"4,Z!8D`,#&`#\>MD_I MS6F]G]&ZVE?N2@8CPP<1;*)@.J-+8/)YLX+./M>2KJX/AI!!B7(G4,?=M,1=QO<-4]B_ M.V[&.BZ-M:W]%PSZMZKX&=W<_^,8Q#8-L"XD`,"&`%D9K8Q;@A(5(!-^9FA= M9"XVQ2%U^AZEDMJRS]J6*)%;[D]KYI]WO+V6.6Y*E4M>Q$U5UK>E$!>>6+@$ M_^,8Q#H,\!8D`'@```8+,6*34&YE"SE]=(UBT8X]#L<&GH[R"SFMB5M6BD_Y M(LK51<>6Y"+D[-2R7,T);*H'LR*1JDBFE89=_^,8Q$$-J!H@`'@``%A,CVD# M[J7+94\J*"K$,RE0H[G\OQ,E6O*KV"$ZE^HRE9]:R"ND#/5?=>A%^1H2)8KU M[_7D%B&=X@YP_^,8Q$4-X!X@`'A$`&&9?)41YJ?T4MC74#[$H>8D"^8<^O^O]/3JL2!:`Z1_L41CL$8)C0*009+%PX(C9<5_^,8Q$@-`$8D M`,"$`'T%LA88*!1B[6(DV,R>?DR3J[_`*3!:Q9I+5`12%(>[8X7):GMJWCZU MWN15$2&G3\6KU6<8U-W8=EE"_^,8Q$\/R#HD`'A&`&"#BCE-9.E:'B)WD'J6 M$6;!W@8#'+X5)GZ&)MH(*1=H4941<`?]TJ*=E5T4&^NI#M/?IA3*#=8T6,"- M_^,8Q$H.P$8@`,#$``\T.QEKGWI:EA M`S6[MBNM2@5F[NH>960T]CE*%J(8V=RKVZKT@SI&E`Z!,0XHP=47%$.4E:DTL>J MMR6#-*WKC#XN_^,8Q$\-`"XD`,"&`+6]AH@D705.@CWL*.<$D(N1UL9UW)7< ME*QP21[4-W[5*@1S1`E@QCBH:&*."LJ]6O"F/;5M497=40[IA)\`10^ M,Z1>^NEVR\N@Y=L:+AXFYZP$7%T-F`,C8C2RT_I0L;IF4 M_^,8Q%0,^"(D`'C&`%!T^Y3MZ71A584`R"*9U[W\Q9JUO',D+$)GZ6J1*&/K M+]V;!`T*&Q9Q@XXTP7FSRQZP)I'IL^AA3RC>_^,8Q%L/.#(@`,#``,J2D7IJ M1:?GFNJJ[JM]CWU=VO12(BO]24V\50U4,.IHZ9&4+/BQ++C2-5SQQN]3'#5& M*FJ8`*L>6>JH_^,8Q%D,>"8D`,#&`&T%106OUH62N2=WF#H?]69)NRY=FCIJ M&*SS1WG@Y@)`\E`OI.ATBT/$23[*T*IH8]!1)-+(HYYH2+9[_^,8Q&(/4#H@ M`,#&`$JF*G#.N)EZ)W<\J@B;K5UV93I9.EH0@N#;=FAJA,'"Z'E%#%%T+K;+ M*-F3@NR0M4E34O[4%]3V)/#Q_^,8Q%\.<"(D`'C``,ZS8NI_,!E>YGNM)J=_ M[+'6(@52Z``9!("K$@:(I$9IRQ(?N&J%C"WZ"+%Y^C=-T[W)]2`I:E>?NW64 M_^,8Q&`-L"X@`'C``+NSO_O1+[-BW?.$AV(#(XJI]9IZ%'XH$8R MID][6NAPX,J$(/--C2`$H%%%,Y1#[7+[[U10-44'7'(J[74A@N_ M#//H%7N1><4A+_^,8Q&P.T"(D M`,"&`(>]G,#P^>6%NJNQZ$$6-0=N%TVWS28\$`L$1@N74>"3F'9G,MN=H4E+6_XQUO>QZ=ROWM M4],:B0N='+EK_^,8Q&0-X!8@`'A&`";JW]E>I0)1ZXTAY67'$$J'PLX6Y5QP M>KDH^^[8*L-$]UL67%QM#1B%!IC'L/SB#]/IO0VA6ZMAY_WZ_^,8Q&<,8!(D M`'@``)4,:]4]A*NHA)T).CZVF#"A8L\\!14*EC5<6;(#Q8.AQ%J^Z*'&15W, MK2\43?<]1V1:RQOL&,JG%N8[_^,8Q'`-4!HD`'B``&_YH7K5#PWZ8^PTX-!Y MZ4-)=(LUS7J$4W#@^I6A*M;"#N?8V M_^,8Q'4/D%(@`,&&`+V%E0\H:YVF>-!\N<%VM/TECR<@FXNSN8!5ET+VZ4%Q M=G"Q;W.'>GKE152Z9_^,8Q'$.$`XD`'@``#4J`XN! M_7CP6`!1L1$@&4(DPZ^O4ME8:;L%5'SRT':$)7IS[C1J*F""TK MNA+MO=5&L!H+F]Q0RJ@I")#N8_XNV\V/+'*<%T%;67D9_\_ MZ=)["6.0^0_T)@9%(NFK(PG,_^,8Q'(-R#8D`,#"`#"`9'))&2=ZA6'!(ZFI M19=3Z&.IFB`_((#:T-:0*I%1NM9^FE+&RKG,A1^38*V4H"RGR\UTKA>(JK/K M_^,8Q'4,CI6 MXX?>8Y1&Q7R-9QY>+C^M#TT(S,3Q]8U04?U1[I)@ M&%!81DUR1ZH\7"R2#6ZT'$_TL#YE3':.*_.11T6K!=VS0OKW_YQE:OGEJ@/B2>5A`8-G`2$:R`(6_^,8Q(`/0"X@`,#``%"#5;D5 M,O56_E;2R4/:A'>NVACF*ZGS2AE]%+"],@BV[WT,3_AN)JO?+;,SH`5#K,08 M($6H;%33B#$(_^,8Q'X-""XD`,#&`*"#&+LE6S&2WWF&\GO1[GK=T5$4J)\5A*1 M'[S<6^CUU0H&+4,3`1C%"IRHB56UZ1(TS4VJI-CG\8Z*.(O8 M_^,8Q(T/<"X@`'B$`*6&R#"S)AB"0]8O/W3CS3TM0YK.Z]Y'VV(HYER$JB+, MAA=-264P16N><(69#RR(J8.`05"8N\Y>7M2U_^,8Q(H,2!(D`'A$``R9N!-S M4"G552DV5':J%JSISO732/>66P^Y"F:?J.-5Z`B&YS2D\["`3"TJA M2BRK^M@AK$WW2S%U(HYN1'?NC&0MC=(D!%@^7L;>P,D#V&E9_^,8Q),/@$(@ M`,&&`"+'14E8Y!HZUK=;>%&E[#]^U:MHLZ@;I&UE=;'J32QCK]W4`<(C(I&J M`80#6Z`,#6L>('!`0BA%$P:>_^,8Q)`/&"H@`'B$`)=9V8:>+/OT]@TE0@@] MPW%4')_E?BBU;RS:6,ZJWL8A5NH2;`T,TN5A'``@$1*E99"`H*C*'D$1SG'E M_^,8Q(X.N$(@`,#$`*5V.6@!,/%!9K!H0=W.9[4/N##5\>%J=^%YZO:IFBMJ M#&L4#:T5#B5Y^U3Y@`QV!PX4"-SQ=%EXG64&_^,8Q(X.@#(@`'F&`"0LY#%" MV:`PX5N[20XK%RUUZ6DYG"+]%E[,\>?B)IQIAB)AVG\2:0#$UE3:%S"QH.BH M3#YTZXBXL+#!_^,8Q(\/F"8D`,#&`$1&6WG&4.RI2%3=$V\@\X+,%:'DV_/O MI9>J4(DRR>V/J6ON43?WO075$DPN!<,ZP6#.?*:0$Z4>M`-=_^,8Q(L/`#(D M`,"``&:NG9N]#\,['N<)'GW(+9BH=IJ?C_]=T[^6:VS574H:(_DLKY8CJE<@ M*8<FFE+_^,8Q(H/$"H@`'B&`(D*YZXBYVPC0SL1,;9R MJY]SNY$WOF%5D@.FC#KRQ0A0,$D`05;'98B#;KJP$A%ZX\JD;0UB'8H7`[5. M_^,8Q(@,&"HD`,"``$*H+=A$6M-)*I79W7IK/TO*(H!IG2V^ZBHB907JENE= M11#2AP8ZV4!`HQ36HLN>]4`O$5L0W4`A8M%E_^,8Q)(-`"(D`,#$`*X*G2E[ MUHB,H0A]%0TO:UQ^+/>4HV*3LLC0-RT2B\N5^YH$)"'14I!:T.MBPN=2DA8C M=0;*0.BNW&7"_^,8Q)D.T"(@`&#&`+<:1QAJ;A_B@Q[WAP7,A`_IG`U)K&O< MUZR*H"56E.IE"QT/VV(]V)4)!Q0XV9:`"A@4J.7N6=',%JMZ_^,8Q)@/>"H@ M`,#$`%,V++(F`(FZMD@D5O\.)/6K=7"+,TNJFHGM0Y]AB._8M03KV%+;F%02 M"0E`2#H4'AMC7.&M(LY!46UW_^,8Q)40`$8D`,#&``"[?2.N+,H)4L8U@K;J M=)RIE?1-CF-M9I0@54-Q"G;:"9\C1/%;VGCS'BCW5NYE+`*1$-7E4I(+?>YE`+ M%]73>T>.J?2FJXD7E6_'.$K\?U%44E6 M=-;[:LK5`<46^;K@H@H@5:%0&D")$QQ+%XM6Q^F+$8&]Z:6&_^,8Q)(-L!8D M`'@``%"_*56L[EZ+=_;6]K!;0ZBS%M&M!S*)NLR.Y6&P=*)/)"@I,,'$VSFU M[K';T[:S682@>I(SJ>@\O//>_^,8Q)8/""(@`,B``%2B3IE4:FY_9K6F MA(YET0.^Q2Y?2BC;M%OI:L\[[.U.G:GI%@2\_^,8Q)T-2#HD`'B&`--D7)`D M;.0EPQ>TJ4KHOFTD%(=V-`I!VQ#GH3[$RQC9B_K0A>FE.@]3J0.* M%*N%1^3*#'GQ_^,8Q*(,F"XD`,"``%0D/I:P^3NY1D?]B0QJ75KCJQ6\TZI: M7/>ZP7>NDLZY9AKA?Z+BHWUZE1*CQVK%!3@"+%JE--PU:.VR_^,8Q*H,0#XD M`,C&``=:Q:GAG8MMMRB#@HQAA&>EX47&OJN*[FZ*W,4A!FB93`I&TEZ]*@(@ MN",HQQ__JE(E`"I"@>=`!&F7_^,8Q+0,L!8D`'C``+D+Q'A_8Y`9:(Q4(/5* MK(=EJ\H]R&'11#BY(S'U)&-1$U[5[6JT7V1?4BI,04U%,RXX."`H8F5T82FJ M_^,8Q+P-$"HD`'C``*JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ MJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ_^,8Q,(-V"H@`$C&`*JJJJJJ MJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ MJJJJJJJJJJJJ_^,8Q,4/D#H@`4\``*JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ DJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ ` end