This is what I have come up with so far for a ThML parser...
#!/usr/bin/perl -wT
#DEFINE EXTERNAL MODULE USES
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use strict;
use warnings;
use Regexp::Assemble;
use Encode;
use Encode qw(encode decode);
use open qw( :std :encoding(UTF-8) );
print "Content-type: text/html; charset=utf-8\n\n"; #print CGI::heade
+r();
binmode STDOUT, ":utf8";
our $booksfile = 'BibleBooks_Abbreviations_PlusApoc_FULL.txt'; # Filen
+ame says it all!
our $thisprogram = "text-thml.pl"; #MAY NEED TO BE FULLY QUALIFIED
+URL
our $book = ""; # For Bible book names assemble
+d in regex via Regexp::Assemble
our $text = ""; # Holds text from HTML form to
+be processed
our %input; # For holding name/value key pa
+irs from HTML form input
######################
### BEGIN PROGRAM ###
&parseinputs;
&assemblebooks;
&parsetext;
&printHTML;
exit;
### FINISH PROGRAM ###
######################
sub parseinputs {
my $buffer="";
my $pair="";
my @pairs=();
if ($ENV{CONTENT_LENGTH}) {
read(STDIN, $buffer, $ENV{CONTENT_LENGTH});
@pairs = split(/&/,$buffer);
} else {
$buffer = $ENV{QUERY_STRING};
@pairs = split(/\&/,$buffer);
}
foreach $pair (@pairs) {
$pair=~s/`/ /g; #REMOVE BACKTICKS AS A SECURITY AND FUNCTIONA
+LITY MEASURE TO PROTECT REMAINDER OF SCRIPT
#$input{translate}='';
my ($name, $value) = split(/=/,$pair);
$value =~ tr/+/ /;
$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
$name = decode("utf-8", $name); $value = decode("utf-8", $valu
+e);
$input{$name} = $value;
}
$text=$input{terms};
} # END SUB
sub assemblebooks {
my @data = (); # To hold file source data
my @books = (); # To hold book tokens for entire fi
+le
my @temp = (); # To hold single book's tokens (one
+line in file)
my $line = ""; # For each line of the file
my $piece = ""; # For each record in line
open (DATA, '<:encoding(utf8)', $booksfile) or die "Cannot open the Bi
+ble book names file! $!\n";
@data = <DATA>;
close DATA;
foreach $line (@data) {
chomp $line;
@temp = split/\t|,\s/, $line;
foreach $piece (@temp) {
push @books, "$piece\n";
}
}
$book = Regexp::Assemble->new;
foreach $line (@books) {
chomp $line;
$book->add( "$line" );
}
} # END SUB
sub parsetext {
$text =~ s%
(?<!(?:\[TAGGED_START\])) # TO BE REMOVED AFTER MATCHING ALL REFS
( # BEGIN CAPTURE OF $1 ***
\(* # MATCH OPTIONAL PARENTHESIS
($book) # MATCH BIBLE BOOK NAMES FROM ASSEM
+BLED REGEX $2
\s # MATCH A SPACE FOLLOWING BOOK NAME
(\d+(?:[:\-, ]\d+)*) # MATCH A NUMBER (USUALLY A CHAPT.
+)=$3
# OPTIONALLY FOLLOWED BY VERSE NUMBERS
# WHICH MAY BE DELIMITED BY THE [:-, ]
+
(\; # .MATCH A SEMI-COLON...ETC. (BELOW
+)=$4
(?:\s*\d+ # .MATCH AN OPTIONAL SPACE, THEN A NU
+MBER
(?:[:\-,]\d+)* ) # .MATCH ZERO+ VERSE DELIMITERS/NU
+MBERS
)* # .MAKE CLAUSE OPTIONAL & REPEATABLE
( # >LOOK FOR OPTIONAL VERSION NOTATION
+=$5
(?:\(\w+\)*) # >MATCH WORD IN PARENTHESES
| # > -OR-
(?:[, ]+\w+)* # >MATCH COMMA OR SPACE FOLLOWED BY W
+ORD
)* # >CLOSE CAPTURE FOR $5
\)* # MATCH OPTIONAL PARENTHESIS
) # CLOSE CAPTURE OF $1 ***
(?>!(?:\[TAGGED_END\])) # WE CAN REMOVE THESE TAGS LATER
%\[TAGGED_START\]$1\[TAGGED_END\]%gx;
#$text =~ s/(<\!--TAGGED_START-->)|(<\!--TAGGED_END-->)//g;
} #END SUB parsetext
sub printHTML {
print <<HTML;
<html lang="utf8">
<head>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf8">
<title>Text/HTML --> ThML Parser</title>
</HEAD>
<BODY>
<h3 align="center">Enter your text in the box below, then submit for T
+hML.</h3>
<form name="ff" method="POST" accept-encoding="UTF-8" accept-charset="
+utf-8" action="$thisprogram">
<textarea rows="5" cols="65" name="terms" style="background-color:#999
+999;" value="$input{terms}">$input{terms}</textarea>
<input type="submit" value="Submit" name="submit"></input>
</form>
<hr>
$text
<hr>
$book
<hr>
</body>
</html>
HTML
} # END SUB
And here's contents of the external file referenced above:
Book Name Abbreviations
Genesis Gen, Ge, Gn
Exodus Exo, Ex, Exod
Leviticus Lev, Le, Lv
Numbers Num, Nu, Nm, Nb
Deuteronomy Deut, Dt
Joshua Josh, Jos, Jsh
Judges Judg, Jdg, Jg, Jdgs
Ruth Rth, Ru
1 Samuel 1 Sam, 1 Sa, 1Samuel, 1S, I Sa, 1 Sm, 1Sa, I Sam, 1Sam, I
+ Samuel, 1st Samuel, First Samuel
2 Samuel 2 Sam, 2 Sa, 2Samuel, 2S, II Sa, 2 Sm, 2Sa, II Sam, 2Sam,
+ II Samuel, 2Samuel, 2nd Samuel, Second Samuel
1 Kings 1 Kgs, 1 Ki, 1K, I Kgs, 1Kgs, I Ki, 1Ki, I Kings, 1Kings,
+1st Kgs, 1st Kings, First Kings, First Kgs, 1Kin
2 Kings 2 Kgs, 2 Ki, 2K, II Kgs, 2Kgs, II Ki, 2Ki, II Kings, 2King
+s, 2nd Kgs, 2nd Kings, Second Kings, Second Kgs, 2Kin
1 Chronicles 1 Chron, 1 Ch, I Ch, 1Ch, 1 Chr, I Chr, 1Chr, I Chron
+, 1Chron, I Chronicles, 1Chronicles, 1st Chronicles, First Chronicles
+, 3 Kgs, 3 Ki, 3K, III Kgs, 3Kgs, III Ki, 3Ki, III Kings, 3Kings, 3rd
+ Kgs, 3rd Kings, Third Kings, Third Kgs, 3Kin
2 Chronicles 2 Chron, 2 Ch, II Ch, 2Ch, 2 Chr, II Chr, 2Chr, II Ch
+ron, 2Chron, II Chronicles, 2Chronicles, 2nd Chronicles, Second Chron
+icles, 4 Kgs, 4 Ki, 4K, IV Kgs, 4Kgs, IV Ki, 4Ki, IV Kings, 4Kings, 4
+th Kgs, 4th Kings, Fourth Kings, Fourth Kgs, 4Kin
Ezra Ezra, Ezr
Nehemiah Neh, Ne
Esther Esth, Es
Job Job, Job, Jb
Psalm Pslm, Ps, Psalms, Psa, Psm, Pss
Proverbs Prov, Pr, Prv
Ecclesiastes Eccles, Ec, Qoh, Qoheleth
Song of Solomon Song, So, Canticle of Canticles, Canticles, Song o
+f Songs, SOS
Isaiah Isa, Is
Jeremiah Jer, Je, Jr
Lamentations Lam, La
Ezekiel Ezek, Eze, Ezk
Daniel Dan, Da, Dn
Hosea Hos, Ho
Joel Joel, Joe, Jl
Amos Amos, Am
Obadiah Obad, Ob
Jonah Jnh, Jon
Micah Micah, Mic
Nahum Nah, Na
Habakkuk Hab, Hab
Zephaniah Zeph, Zep, Zp
Haggai Haggai, Hag, Hg
Zechariah Zech, Zec, Zc
Malachi Mal, Mal, Ml
Tobit Tobit, Tob, Tb
Judith Jdth, Jdt, Jth
Additions to Esther Add Esth, Add Es, Rest of Esther, The Rest of
+Esther, AEs, AddEsth
Wisdom of Solomon Wisd of Sol, Wis, Ws, Wisdom
Sirach Sirach, Sir, Ecclesiasticus, Ecclus
Baruch Baruch, Bar
Letter of Jeremiah Let Jer, Let Jer, LJe, Ltr Jer
Song of Three Youths Song of Three, Song Thr, The Song of Three Yo
+uths, Pr Az, Prayer of Azariah, Azariah, The Song of the Three Holy C
+hildren, The Song of Three Jews, Song of the Three Holy Children, Son
+g of Thr, Song of Three Children, Song of Three Jews
Susanna Susanna, Sus
Bel and the Dragon Bel, Bel
1 Maccabees 1 Macc, 1 Mac, 1M, I Ma, 1Ma, I Mac, 1Mac, I Macc, 1Ma
+cc, I Maccabees, 1Maccabees, 1st Maccabees, First Maccabees
2 Maccabees 2 Macc, 2 Mac, 2M, II Ma, 2Ma, II Mac, 2Mac, II Macc,
+2Macc, II Maccabees, 2Maccabees, 2nd Maccabees, Second Maccabees
1 Esdras 1 Esdr, 1 Esd, I Es, 1Es, I Esd, 1Esd, I Esdr, 1Esdr, I E
+sdras, 1Esdras, 1st Esdras, First Esdras
Prayer of Manasseh Pr of Man, Pr Man, PMa, Prayer of Manasses
Additional Psalm Add Psalm, Add Ps
3 Maccabees 3 Macc, 3 Mac, III Ma, 3Ma, III Mac, 3Mac, III Macc, 3
+Macc, III Maccabees, 3rd Maccabees, Third Maccabees
2 Esdras 2 Esdr, 2 Esd, II Es, 2Es, II Esd, 2Esd, II Esdr, 2Esdr,
+II Esdras, 2Esdras, 2nd Esdras, Second Esdras
4 Maccabees 4 Macc, 4 Mac, IV Ma, 4Ma, IV Mac, 4Mac, IV Macc, 4Mac
+c, IV Maccabees, IIII Maccabees, 4Maccabees, 4th Maccabees, Fourth Ma
+ccabees
Ode Ode, Ode
Psalms of Solomon Ps Solomon, Ps Sol, Psalms Solomon, PsSol
Epistle to the Laodiceans Laodiceans, Laod, Ep Laod, Epist Laodice
+ans, Epistle Laodiceans, Epistle to Laodiceans
3 Esdras 3 Esd, 3Esd
4 Esdras 4 Esd, 4Esd
5 Esdras 5 Esd, 5Esd
Matthew Matt, Mt
Mark Mrk, Mk, Mr
Luke Luk, Lk
John John, Jn, Jhn
Acts Acts, Ac
Romans Rom, Ro, Rm
1 Corinthians 1 Cor, 1 Co, I Co, 1Co, I Cor, 1Cor, I Corinthians,
+1Corinthians, 1st Corinthians, First Corinthians
2 Corinthians 2 Cor, 2 Co, II Co, 2Co, II Cor, 2Cor, II Corinthian
+s, 2Corinthians, 2nd Corinthians, Second Corinthians
Galatians Gal, Ga
Ephesians Ephes, Eph
Philippians Phil, Php
Colossians Col, Col
1 Thessalonians 1 Thess, 1 Th, I Th, 1Th, I Thes, 1Thes, I Thess,
+1Thess, I Thessalonians, 1Thessalonians, 1st Thessalonians, First The
+ssalonians
2 Thessalonians 2 Thess, 2 Th, II Th, 2Th, II Thes, 2Thes, II Thes
+s, 2Thess, II Thessalonians, 2Thessalonians, 2nd Thessalonians, Secon
+d Thessalonians
1 Timothy 1 Tim, 1 Ti, I Ti, 1Ti, I Tim, 1Tim, I Timothy, 1Timothy
+, 1st Timothy, First Timothy
2 Timothy 2 Tim, 2 Ti, II Ti, 2Ti, II Tim, 2Tim, II Timothy, 2Timo
+thy, 2nd Timothy, Second Timothy
Titus Titus, Tit
Philemon Philem, Phm
Hebrews Hebrews, Heb
James James, Jas, Jm
1 Peter 1 Pet, 1 Pe, I Pe, 1Pe, I Pet, 1Pet, I Pt, 1 Pt, 1Pt, I Pe
+ter, 1Peter, 1st Peter, First Peter
2 Peter 2 Pet, 2 Pe, II Pe, 2Pe, II Pet, 2Pet, II Pt, 2 Pt, 2Pt, I
+I Peter, 2Peter, 2nd Peter, Second Peter
1 John 1 John, 1 Jn, I Jn, 1Jn, I Jo, 1Jo, I Joh, 1Joh, I Jhn, 1 J
+hn, 1Jhn, I John, 1John, 1st John, First John
2 John 2 John, 2 Jn, II Jn, 2Jn, II Jo, 2Jo, II Joh, 2Joh, II Jhn,
+ 2 Jhn, 2Jhn, II John, 2John, 2nd John, Second John
3 John 3 John, 3 Jn, III Jn, 3Jn, III Jo, 3Jo, III Joh, 3Joh, III
+Jhn, 3 Jhn, 3Jhn, III John, 3John, 3rd John, Third John
Jude Jude, Jud
Revelation Rev, Re, The Revelation, Apocalypse, Apoc
|