As I kept refining and refactoring my code, I discovered that I really needed a lexical analyzer that could perform actions. Not quite Parse::RecDescent, nor as simple as HOP::Lexer. I'm sure that were I in an environment where I could install CPAN modules at will, I would use those; however, as it is I wound up writing my own wee module to suit my other coding tasks.
I would welcome any comments from my esteemed bretheren and sisters, including whether I should continue work on this, if I should CPAN it, or whether I'm simply reinventing the wheel for the nth time.
Behold the actual code, with a packaged example:
package SimpleLexer;
use strict;
use warnings;
our $VERSION = 1.0;
# Generate a new lexical analyzer from the factory.
sub get_engine {
my ( $this, $lexer, $init_state ) = @_;
my $class = ref $this || $this;
my $self = {
STATE => ['DEFAULT'],
LEXER => $lexer,
};
$self->{STATE} = [$init_state] if $init_state;
return bless $self, $class;
}
# Lex the input.
sub lex {
my ( $self, $text ) = @_;
my $lexer = $self->{LEXER};
FOUND_LEX_AGAIN: {
for my $lex ( @{ $lexer->{ $self->{STATE}[-1] } } ) {
my ( $regex, $action ) = @$lex;
if ( $text =~ /\G$regex/gc ) {
$action->($1||$text, $self);
redo FOUND_LEX_AGAIN;
}
}
}
}
sub begin_state {
my ( $self, $state ) = @_;
push @{$self->{STATE}}, $state;
}
sub end_state {
my $self = shift;
pop @{$self->{STATE}};
}
### Standalone ###
sub bold_begin {
my ( $arg, $lexer ) = @_;
print "BOLD ";
$lexer->begin_state('bold');
}
sub bold_end {
my ( $arg, $lexer ) = @_;
print " NO BOLD";
$lexer->end_state;
}
sub main {
# Our lexer
my $lexer = {
DEFAULT => [
[ qr/<b>/, \&bold_begin ],
[ qr/<uc>/, sub { $_[1]->begin_state('uppercase') } ],
[ qr/(.)/s, sub { print $1 } ], # echo
#[ qr/./s, sub { } ], # no echo
],
bold => [
[ qr{</b>}, \&bold_end ],
[ qr/<uc>/, sub { $_[1]->begin_state('uppercase') } ],
[ qr/(.)/s, sub { print $1 } ], # echo
],
uppercase => [
[ qr{</uc>}, sub { $_[1]->end_state } ],
[ qr/(.)/s, sub { print "\U$1" } ], # echo
],
};
# Usage
if ( scalar @ARGV < 0 ) {
print "Usage: $0\n";
exit(1);
}
my $engine = get_engine( __PACKAGE__, $lexer );
$engine->lex("This is a nifty <b><uc>uppercase</uc> test</b> to se
+e what <uc>this</uc> thing can do.\n");
}
main(@ARGV) unless caller;
my $package = __PACKAGE__;
Thank you!
-v.
"Perl. There is no substitute."