#!perl -w use strict; ########################################################################### # jtoeng.pl # # A Japanese to English file translator # by Brett T. Warden # NEC Eluminant Technologies, Inc. # Created 21 February 2001 # Lastmod 15 October 2001 ########################################################################### use Jcode; use WWW::Babelfish; use Storable qw(nfreeze thaw); use File::Basename; #use Data::Dumper; my $DEBUG = 0; print "\nConnecting to translator... please wait.\n\n" if $DEBUG; my $babel = new WWW::Babelfish(); die( "Babelfish server unavailable\n" ) unless defined($babel); my %dict; if(open(DICT, '< jtoeng.dict')) { binmode(DICT); local($/); my $frozen = ; if(my $ref = thaw($frozen)) { # Yeah this is inefficient. Ideally I'd use a DB anyway. %dict = %{$ref}; } close(DICT); } if(@ARGV) { ARG: for(@ARGV) { print "Trying to read $_\n"; if(open(IFILE, "<" . $_)) { binmode(IFILE); my ($name, $path, $suffix) = fileparse($_, '\..*'); my $outfile = $path . $name . '.english' . $suffix; print "Preparing $outfile\n"; if(open(OFILE, ">" . $outfile)) { binmode(OFILE); my $fh = select(OFILE); $| = 1; select($fh); my $TRANSLATIONS = 0; my $BABELFISHINGS = 0; # Translate print "Translating $_\n"; translate(\*IFILE, \*OFILE, \$TRANSLATIONS, \$BABELFISHINGS); close(OFILE); print "Performed $TRANSLATIONS translations, of which $BABELFISHINGS were directly requested from Babelfish.\n"; print "Translation complete\n\n"; } else { die "Unable to write $outfile: $!\n"; } close(IFILE); } else { warn "Unable to read $_: $!\n"; next ARG; } } } else { my $TRANSLATIONS = 0; my $BABELFISHINGS = 0; translate(\*STDIN, \*STDOUT, \$TRANSLATIONS, \$BABELFISHINGS); print "Performed $TRANSLATIONS translations, of which $BABELFISHINGS were directly requested from Babelfish.\n" if $DEBUG; } sub translate { my $IFH = shift or return; my $OFH = shift or return; my $TRANSLATIONS = shift; my $BABELFISHINGS = shift; LINE: while(my $text = <$IFH>) { # If it's ascii, then it doesn't need to be translated? my $code = getcode($text) || ''; print "Line coding: $code\n" if($code and $DEBUG); unless($code eq 'ascii') { if($code) { # Not ascii, run through Jcode. my $j = Jcode->new($text); $text = $j->utf8; } my @chunks = $text =~ m!(\S+)!g; CHUNK: for(@chunks) { my $chunk = $_; my $chunk_code = getcode($chunk) || ''; next CHUNK if($chunk_code and ($chunk_code eq 'ascii')); $chunk =~ s!^//!!; $chunk =~ s!^#+!!; $chunk =~ s!^/\*+!!; $chunk =~ s!\*/$!!; print "Chunk: $chunk\n" if $DEBUG; my $trans; if(exists($dict{$chunk})) { if(defined($dict{$chunk})) { $trans = $dict{$chunk}; print "Dictionary: $chunk = $trans\n" if $DEBUG; $text =~ s!\Q$chunk!$trans!; $$TRANSLATIONS++ if $TRANSLATIONS; } else { print "Skipping $chunk -- translation failed previously.\n" if $DEBUG; } } else { print "\n" if $DEBUG; print "Translating: $chunk\n" if $DEBUG; $trans = $babel->translate( source => 'Japanese', destination => 'English', text => $chunk, delimiter => "\n", ); if(defined($trans)) { # Replace those annoying  s that Babelfish loves. $trans =~ s! ! !g; chomp $trans; if($trans =~ m!^\s*$!) { # Babelfish returned nothing. print "No useful translation returned.\n" if $DEBUG; sleep 2 if $DEBUG; # Make an entry in the dict in case somebody # wants to try to translate it later. $dict{$chunk} = undef; $chunk = ''; $trans = ''; } else { $$TRANSLATIONS++ if $TRANSLATIONS; $$BABELFISHINGS++ if $BABELFISHINGS; if($chunk ne $trans) { # Answer looks useful. Use it and keep it. $text =~ s!\Q$chunk!$trans!; $dict{$chunk} = $trans; print "Translated\n\t$chunk\nto\n\t$trans\n" if $DEBUG; } else { # Store a placeholder in the dict # so we don't waste time sending it to # Babelfish again $dict{$chunk} = undef; print "Babelfish returned what we sent it.\n" if $DEBUG; } if((my $freeze = nfreeze(\%dict)) and open(DICT, "> jtoeng.dict")) { binmode(DICT); print DICT $freeze; close(DICT); } } } else { warn "Lookup on $chunk failed.\n"; } } } } print "\n\n<" . '-' x 79 . "\n" if $DEBUG; print $text if $DEBUG; print '-' x 79 . ">\n\n" if $DEBUG; print $OFH $text; } return; }