To answer your original question, here's a couple of approaches that encapsulate markup processing in a function and fully parameterize it: number of passes, the string to be processed, and the tags to be processed in the order of processing are all passed. (BTW: I've never seen this type of markup before; can you provide any info on it?) If the strings to be processed are lengthy, passing them by reference rather than by value might speed things up, and these functions could easily be modified to pass the strings by reference. The two different methods of macro expansion and replacement probably run at different speeds, but I've done no Benchmark-ing and I doubt the difference is great. My guess is the method that uses no /e evaluation in the s/// is faster. There are no new features in the code; it can run under Perl 5.8.9.
# Strange Markup Language processor 16mar14waw
# Strange Markup Language (anyway, strange to me) processor.
# refer to perlmonks node 1078356.
package SML;
use warnings FATAL => 'all';
use strict;
my $nobrackets = qr{ [^{}]+ }xms; # maybe use * quantifier??
use constant XLT => ( # basic translation mapping
# captured tagged substrings represented by $ below
# tag translates as
'\textsuperscript' => ' startsuperscript $ endsuperscript ',
'\textsubscript' => ' startsubscript $ endsubscript' ,
'\textit' => ' startitalic $ enditalic' ,
'\textcolor' => '' ,
'' => '($)' ,
);
my %xlate1 = XLT;
# tag will be in separate scalar.
# tagged string will be returned by anon. subroutine.
# convert strings (and placeholders) of %xlate1 to anon. subs.
for (values %xlate1) { # convert:
# placeholder to function parameter;
s{ \$ }'$_[0]'xms;
# value to anon. sub returning string w/interpolated param
$_ = eval qq{ sub { qq{$_} } };
}
sub process_iter_1 { # works -- iterative, 1 capture, uses /e
my ($passes, # number of processing passes to make
$string, # string to be processed
@tags, # tags to process in order of processing
) = @_;
while ($passes-- >= 1) { # count the passes
for ($string) { # aliases $string to $_ for s///
for my $tag (@tags) { # processes tags in order
exists $xlate1{$tag} or die qq{unknown tag '$tag'};
s{ \Q$tag\E \{ ($nobrackets) \} }
{ $xlate1{$tag}->($1) }xmsge;
}
}
}
return $string;
}
my %xlate2 = XLT;
# tag will be in separate scalar.
# tagged string will be returned by anon. subroutine.
# convert strings (and placeholders) of %xlate2 to anon. subs.
for (values %xlate2) { # convert:
# placeholder to function parameter;
s{ \$ }'$_[0]'xms;
# value to anon. sub returning ref. to string w/interpolated param
$_ = eval qq{ sub { \\ qq{$_} } };
}
sub process_iter_2 { # works -- iterative, 1 capture, no /e
my ($passes, # number of processing passes to make
$string, # string to be processed
@tags, # tags to process in order of processing
) = @_;
while ($passes-- >= 1) { # count the passes
for ($string) { # aliases $string to $_ for s///
for my $tag (@tags) { # processes tags in order
exists $xlate2{$tag} or die qq{unknown tag '$tag'};
s{ \Q$tag\E \{ ($nobrackets) \} }
{${ $xlate2{$tag}->($1) }}xmsg;
}
}
}
return $string;
}
1;
# SML.pm unit tests 16mar14waw
use warnings;
use strict;
use Test::More
# tests => ?? + 1 # Test::NoWarnings adds 1 test
'no_plan'
;
use Test::NoWarnings;
BEGIN { use_ok 'SML'; }
use constant RAW_SML => <<'EOT';
{\selectlanguage{english}
\textcolor{black}{\ \ 10.\ \ Three resistors connected in series each
carry currents labeled }\textit{\textcolor{black}{I}}\textcolor{black}
+{\textsubscript{1}}\textcolor{black}{,
}\textit{\textcolor{black}{I}}\textcolor{black}{\textsubscript{2}}\tex
+tcolor{black}{and}\textit{\textcolor{black}{I}}\textcolor{black}{\tex
+tsubscript{3}}\textcolor{black}{.
Which of the following expresses the value of the total current
}\textit{\textcolor{black}{I}}\textit{\textcolor{black}{\textsubscript
+{T}}}\textcolor{black}{in the system made up of the three resistors i
+n series?}}.
EOT
use constant PASS_1 => <<'EOT';
{\selectlanguage(english)
(\ \ 10.\ \ Three resistors connected in series each
carry currents labeled )\textit{(I)}( startsubscript 1 endsubscript)(,
)\textit{(I)}( startsubscript 2 endsubscript)(and)\textit{(I)}( starts
+ubscript 3 endsubscript)(.
Which of the following expresses the value of the total current
)\textit{(I)}\textit{( startsubscript T endsubscript)}(in the system m
+ade up of the three resistors in series?)}.
EOT
use constant PASS_2 => <<'EOT';
(\selectlanguage(english)
(\ \ 10.\ \ Three resistors connected in series each
carry currents labeled ) startitalic (I) enditalic( startsubscript 1 e
+ndsubscript)(,
) startitalic (I) enditalic( startsubscript 2 endsubscript)(and) start
+italic (I) enditalic( startsubscript 3 endsubscript)(.
Which of the following expresses the value of the total current
) startitalic (I) enditalic startitalic ( startsubscript T endsubscrip
+t) enditalic(in the system made up of the three resistors in series?)
+).
EOT
note "\n===== JDoolin code =====\n\n";
sub JDoolin_process {
my $nobrackets = qr/[^\{}]+/;
s/\\textsuperscript\{($nobrackets)\}/ startsuperscript $1 endsuper
+script /g;
s/\\textsubscript\{($nobrackets)\}/ startsubscript $1 endsubscript
+/g;
s/\\textit\{($nobrackets)\}/ startitalic $1 enditalic/g;
s/\\textcolor\{$nobrackets\}//g;
s/\{($nobrackets)\}/($1)/g;
}
$_ = RAW_SML; # JDoolin_process() runs against $_
JDoolin_process();
ok $_ eq PASS_1, qq{pass 1};
JDoolin_process();
ok $_ eq PASS_2, qq{pass 2};
note 'try 3rd pass against JDoolin: any change from 2nd pass?';
JDoolin_process();
ok $_ eq PASS_2, qq{3rd pass: no change from pass 2};
note "\n===== SML code =====\n\n";
use constant TAG_PROCESS_ORDER => (
qw( \textsuperscript \textsubscript \textit \textcolor ), ''
);
FUNT: # functions under test: fully qualified functions from module
for my $funt (map qq{SML::$_}, qw(
process_iter_1 process_iter_2
)) {
note "\n----- $funt() -----\n\n";
*process = do { no strict 'refs'; *$funt; };
my $text = process(1, RAW_SML, TAG_PROCESS_ORDER);
ok $text eq PASS_1, qq{1st pass, raw -> pass1};
$text = process(1, $text, TAG_PROCESS_ORDER);
ok $text eq PASS_2, qq{2nd pass, pass1 -> pass2};
ok PASS_2 eq process(2, RAW_SML, TAG_PROCESS_ORDER),
qq{2 passes, raw -> pass2};
ok PASS_2 eq process(3, RAW_SML, TAG_PROCESS_ORDER),
qq{3 passes: no change from 2 passes};
ok PASS_2 eq process(4, RAW_SML, TAG_PROCESS_ORDER),
qq{4 passes: no change from 2 passes};
note "degenerate cases";
ok RAW_SML eq process(0, RAW_SML, TAG_PROCESS_ORDER),
qq{degenerate: < 1 pass};
ok RAW_SML eq process(0.99999, RAW_SML, TAG_PROCESS_ORDER),
qq{degenerate: < 1 pass};
ok RAW_SML eq process(-1, RAW_SML, TAG_PROCESS_ORDER),
qq{degenerate: < 1 pass};
ok RAW_SML eq process(1, RAW_SML),
qq{degenerate: no tags to process};
ok RAW_SML eq process(9, RAW_SML),
qq{degenerate: no tags to process};
ok '' eq process(1, '', TAG_PROCESS_ORDER),
qq{degenerate: empty string to process};
ok '' eq process(9, '', TAG_PROCESS_ORDER),
qq{degenerate: empty string to process};
} # end for FUNT