# Strange Markup Language processor 16mar14waw # Strange Markup Language (anyway, strange to me) processor. # refer to perlmonks node 1078356. package SML; use warnings FATAL => 'all'; use strict; my $nobrackets = qr{ [^{}]+ }xms; # maybe use * quantifier?? use constant XLT => ( # basic translation mapping # captured tagged substrings represented by $ below # tag translates as '\textsuperscript' => ' startsuperscript $ endsuperscript ', '\textsubscript' => ' startsubscript $ endsubscript' , '\textit' => ' startitalic $ enditalic' , '\textcolor' => '' , '' => '($)' , ); my %xlate1 = XLT; # tag will be in separate scalar. # tagged string will be returned by anon. subroutine. # convert strings (and placeholders) of %xlate1 to anon. subs. for (values %xlate1) { # convert: # placeholder to function parameter; s{ \$ }'$_[0]'xms; # value to anon. sub returning string w/interpolated param $_ = eval qq{ sub { qq{$_} } }; } sub process_iter_1 { # works -- iterative, 1 capture, uses /e my ($passes, # number of processing passes to make $string, # string to be processed @tags, # tags to process in order of processing ) = @_; while ($passes-- >= 1) { # count the passes for ($string) { # aliases $string to $_ for s/// for my $tag (@tags) { # processes tags in order exists $xlate1{$tag} or die qq{unknown tag '$tag'}; s{ \Q$tag\E \{ ($nobrackets) \} } { $xlate1{$tag}->($1) }xmsge; } } } return $string; } my %xlate2 = XLT; # tag will be in separate scalar. # tagged string will be returned by anon. subroutine. # convert strings (and placeholders) of %xlate2 to anon. subs. for (values %xlate2) { # convert: # placeholder to function parameter; s{ \$ }'$_[0]'xms; # value to anon. sub returning ref. to string w/interpolated param $_ = eval qq{ sub { \\ qq{$_} } }; } sub process_iter_2 { # works -- iterative, 1 capture, no /e my ($passes, # number of processing passes to make $string, # string to be processed @tags, # tags to process in order of processing ) = @_; while ($passes-- >= 1) { # count the passes for ($string) { # aliases $string to $_ for s/// for my $tag (@tags) { # processes tags in order exists $xlate2{$tag} or die qq{unknown tag '$tag'}; s{ \Q$tag\E \{ ($nobrackets) \} } {${ $xlate2{$tag}->($1) }}xmsg; } } } return $string; } 1; #### # SML.pm unit tests 16mar14waw use warnings; use strict; use Test::More # tests => ?? + 1 # Test::NoWarnings adds 1 test 'no_plan' ; use Test::NoWarnings; BEGIN { use_ok 'SML'; } use constant RAW_SML => <<'EOT'; {\selectlanguage{english} \textcolor{black}{\ \ 10.\ \ Three resistors connected in series each carry currents labeled }\textit{\textcolor{black}{I}}\textcolor{black}{\textsubscript{1}}\textcolor{black}{, }\textit{\textcolor{black}{I}}\textcolor{black}{\textsubscript{2}}\textcolor{black}{and}\textit{\textcolor{black}{I}}\textcolor{black}{\textsubscript{3}}\textcolor{black}{. Which of the following expresses the value of the total current }\textit{\textcolor{black}{I}}\textit{\textcolor{black}{\textsubscript{T}}}\textcolor{black}{in the system made up of the three resistors in series?}}. EOT use constant PASS_1 => <<'EOT'; {\selectlanguage(english) (\ \ 10.\ \ Three resistors connected in series each carry currents labeled )\textit{(I)}( startsubscript 1 endsubscript)(, )\textit{(I)}( startsubscript 2 endsubscript)(and)\textit{(I)}( startsubscript 3 endsubscript)(. Which of the following expresses the value of the total current )\textit{(I)}\textit{( startsubscript T endsubscript)}(in the system made up of the three resistors in series?)}. EOT use constant PASS_2 => <<'EOT'; (\selectlanguage(english) (\ \ 10.\ \ Three resistors connected in series each carry currents labeled ) startitalic (I) enditalic( startsubscript 1 endsubscript)(, ) startitalic (I) enditalic( startsubscript 2 endsubscript)(and) startitalic (I) enditalic( startsubscript 3 endsubscript)(. Which of the following expresses the value of the total current ) startitalic (I) enditalic startitalic ( startsubscript T endsubscript) enditalic(in the system made up of the three resistors in series?)). EOT note "\n===== JDoolin code =====\n\n"; sub JDoolin_process { my $nobrackets = qr/[^\{}]+/; s/\\textsuperscript\{($nobrackets)\}/ startsuperscript $1 endsuperscript /g; s/\\textsubscript\{($nobrackets)\}/ startsubscript $1 endsubscript/g; s/\\textit\{($nobrackets)\}/ startitalic $1 enditalic/g; s/\\textcolor\{$nobrackets\}//g; s/\{($nobrackets)\}/($1)/g; } $_ = RAW_SML; # JDoolin_process() runs against $_ JDoolin_process(); ok $_ eq PASS_1, qq{pass 1}; JDoolin_process(); ok $_ eq PASS_2, qq{pass 2}; note 'try 3rd pass against JDoolin: any change from 2nd pass?'; JDoolin_process(); ok $_ eq PASS_2, qq{3rd pass: no change from pass 2}; note "\n===== SML code =====\n\n"; use constant TAG_PROCESS_ORDER => ( qw( \textsuperscript \textsubscript \textit \textcolor ), '' ); FUNT: # functions under test: fully qualified functions from module for my $funt (map qq{SML::$_}, qw( process_iter_1 process_iter_2 )) { note "\n----- $funt() -----\n\n"; *process = do { no strict 'refs'; *$funt; }; my $text = process(1, RAW_SML, TAG_PROCESS_ORDER); ok $text eq PASS_1, qq{1st pass, raw -> pass1}; $text = process(1, $text, TAG_PROCESS_ORDER); ok $text eq PASS_2, qq{2nd pass, pass1 -> pass2}; ok PASS_2 eq process(2, RAW_SML, TAG_PROCESS_ORDER), qq{2 passes, raw -> pass2}; ok PASS_2 eq process(3, RAW_SML, TAG_PROCESS_ORDER), qq{3 passes: no change from 2 passes}; ok PASS_2 eq process(4, RAW_SML, TAG_PROCESS_ORDER), qq{4 passes: no change from 2 passes}; note "degenerate cases"; ok RAW_SML eq process(0, RAW_SML, TAG_PROCESS_ORDER), qq{degenerate: < 1 pass}; ok RAW_SML eq process(0.99999, RAW_SML, TAG_PROCESS_ORDER), qq{degenerate: < 1 pass}; ok RAW_SML eq process(-1, RAW_SML, TAG_PROCESS_ORDER), qq{degenerate: < 1 pass}; ok RAW_SML eq process(1, RAW_SML), qq{degenerate: no tags to process}; ok RAW_SML eq process(9, RAW_SML), qq{degenerate: no tags to process}; ok '' eq process(1, '', TAG_PROCESS_ORDER), qq{degenerate: empty string to process}; ok '' eq process(9, '', TAG_PROCESS_ORDER), qq{degenerate: empty string to process}; } # end for FUNT