#!/usr/bin/perl -wT #DEFINE EXTERNAL MODULE USES use CGI; use CGI::Carp qw(fatalsToBrowser); use strict; use warnings; use Regexp::Assemble; use Encode; use Encode qw(encode decode); use open qw( :std :encoding(UTF-8) ); print "Content-type: text/html; charset=utf-8\n\n"; #print CGI::header(); binmode STDOUT, ":utf8"; our $booksfile = 'BibleBooks_Abbreviations_PlusApoc_FULL.txt'; # Filename says it all! our $thisprogram = "text-thml.pl"; #MAY NEED TO BE FULLY QUALIFIED URL our $book = ""; # For Bible book names assembled in regex via Regexp::Assemble our $text = ""; # Holds text from HTML form to be processed our %input; # For holding name/value key pairs from HTML form input ###################### ### BEGIN PROGRAM ### &parseinputs; &assemblebooks; &parsetext; &printHTML; exit; ### FINISH PROGRAM ### ###################### sub parseinputs { my $buffer=""; my $pair=""; my @pairs=(); if ($ENV{CONTENT_LENGTH}) { read(STDIN, $buffer, $ENV{CONTENT_LENGTH}); @pairs = split(/&/,$buffer); } else { $buffer = $ENV{QUERY_STRING}; @pairs = split(/\&/,$buffer); } foreach $pair (@pairs) { $pair=~s/`/ /g; #REMOVE BACKTICKS AS A SECURITY AND FUNCTIONALITY MEASURE TO PROTECT REMAINDER OF SCRIPT #$input{translate}=''; my ($name, $value) = split(/=/,$pair); $value =~ tr/+/ /; $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $name = decode("utf-8", $name); $value = decode("utf-8", $value); $input{$name} = $value; } $text=$input{terms}; } # END SUB sub assemblebooks { my @data = (); # To hold file source data my @books = (); # To hold book tokens for entire file my @temp = (); # To hold single book's tokens (one line in file) my $line = ""; # For each line of the file my $piece = ""; # For each record in line open (DATA, '<:encoding(utf8)', $booksfile) or die "Cannot open the Bible book names file! $!\n"; @data = ; close DATA; foreach $line (@data) { chomp $line; @temp = split/\t|,\s/, $line; foreach $piece (@temp) { push @books, "$piece\n"; } } $book = Regexp::Assemble->new; foreach $line (@books) { chomp $line; $book->add( "$line" ); } } # END SUB sub parsetext { $text =~ s% (?LOOK FOR OPTIONAL VERSION NOTATION=$5 (?:\(\w+\)*) # >MATCH WORD IN PARENTHESES | # > -OR- (?:[, ]+\w+)* # >MATCH COMMA OR SPACE FOLLOWED BY WORD )* # >CLOSE CAPTURE FOR $5 \)* # MATCH OPTIONAL PARENTHESIS ) # CLOSE CAPTURE OF $1 *** (?>!(?:\[TAGGED_END\])) # WE CAN REMOVE THESE TAGS LATER %\[TAGGED_START\]$1\[TAGGED_END\]%gx; #$text =~ s/(<\!--TAGGED_START-->)|(<\!--TAGGED_END-->)//g; } #END SUB parsetext sub printHTML { print < Text/HTML --> ThML Parser

Enter your text in the box below, then submit for ThML.


$text
$book
HTML } # END SUB