Template for machine definitions
#Empty.parser
my ($error); # local variables
my $error_fun = sub {output("Error: $1\n\tat line $_[0]")}; # shared e
+rror function
{
'!INIT' => ['START', sub {$error = 0;}], # reset local variables
# [ the initial state, the initialization ]
START => [
[ qr'^ServiceName \d+\.\d+.\d+$' => 'START'],
# [ regexp, state to go to, function to run (first paramet
+er is the line number, second the line itself) ]
[ qr'^---- Ticking: \d{4}/\d\d/\d\d \d\d:\d\d:\d\d - \d\d:\d\d
+:\d\d$' => 'START'],
[ qr'^Blah ' => 'OTHER_STATE', sub { }],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
START_EOF => sub {output("OK") unless $error},
# specifies the function to execute if EOF is encountered in S
+TART state
ERROR_TIME => [
[ qr'\tat ' => 'OTHER_STATE'],
],
'!EOF' => sub {output("Unexpected EOF!")},
# specifies the function to execute if EOF is encountered in a
+ state that doesn't define its own handler
'!UNEXPECTED' => ['START', sub {output("Unexpected text at line $_
+[0]"); $error++}],
# [ the state to move to if no regexp of the current state mat
+ched the line, function to call ]
}
An actual machine (one of several)
#FileCreate.parser
my ($sites, $files, $jobs, $site_start, $error);
my $error_fun = sub {output("Error: $1\n\tat line $_[0]")};
{
'!INIT' => ['START', sub {$sites = 0; $files = 0; $jobs = 0;$error
+ = 0;}],
START => [
[ qr'^FileCreate \d+\.\d+.\d+$' => 'START'],
[ qr'^---- Ticking: \d{4}/\d\d/\d\d \d\d:\d\d:\d\d - \d\d:\d\d
+:\d\d$' => 'START'],
[ qr'^Creating files for site ' => 'FILES', sub {$sites++; $si
+te_start=$_[0];}],
[ qr'^Going down at ' => 'START'],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
START_EOF => sub {output("OK ($sites Sites, $files Files, $jobs Jo
+bs)") unless $error},
FILES => [
[ qr'^\tCreating file ' => 'FILE', sub {$files++}],
[ qr'^File generation succeeded for site ' => 'START'],
[ qr'^File generation FAILED for site (.*)' => 'START', sub {o
+utput("Failed for $1 at line $site_start"); $error=1}],
[ qr'^Jobs for site \d+ with parameter type "\w+" are to be pr
+ocessed by HTTPPost or something.' => 'FILES'],
[ qr'^Site \d+ has posting parameters either only for single o
+r for package jobs!!!' => 'FILES'],
[ qr'^Sending delete request for job' => 'FILES', sub {$jobs++
+}],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
FILE => [
[ qr'^\t\tThere are \d+ jobs.$' => 'FILE_OPT'],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
FILE_OPT => [
[ qr'^\t\tFlat File$' => 'JOBS'],
[ qr'^\t\tXML File$' => 'JOBS_XML'],
[ qr'^\t\t((Comma|Tab) Delimited|Delimited by .*)$' => 'JOBS_D
+ELIM'],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
JOBS_DELIM => [
[ qr'^\t\tCreating 2 delimited files: ' => 'JOBS'],
[ qr'^\t\t\tJob ID:' => 'JOBS', sub {$jobs++}],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
JOBS_XML => [
[ qr'^\t\t(using DTD: .*|without DTD)' => 'JOBS'],
[ qr'^\t\t\tJob ID:' => 'JOBS', sub {$jobs++}],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
JOBS => [
[ qr'^\t\t\tJob ID:' => 'JOBS', sub {$jobs++}],
[ qr'^\t\tZipped' => 'JOBS_END'],
[ qr'^\t\tDONE' => 'FILES'],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
JOBS_END => [
[ qr'^\t\tDONE' => 'FILES'],
[ qr'^ERROR: (.*)$' => ERROR_TIME, $error_fun],
],
ERROR_TIME => [
[ qr'\tat ' => 'FILES'],
],
'!EOF' => sub {output("Unexpected EOF!")},
'!UNEXPECTED' => ['START', sub {output("Unexpected text at line $_
+[0]"); $error++}],
}
A script I use to debug them
#parse.pl
use strict;
BEGIN {
# dirty hack. If I define the DEBUG as a constant, not as a variab
+le the debug prints are optimized out during compilation
if ($ARGV[0] =~ m{^[/-]d}) {
shift(@ARGV);
eval "sub DEBUG () {1}"
} elsif ($ARGV[0] =~ m{^[/-]D}) {
shift(@ARGV);
eval "sub DEBUG () {2}"
} else {
eval "sub DEBUG () {0}"
}
}
my $machines = readParsers('c:\dir\with\machines');
my $type = shift(@ARGV) or die "usage: parse type [files...]";
if (! @ARGV) {
@ARGV = glob "logs/*.$type.log";
}
foreach my $file (@ARGV) {
do_file( $file, $machines->{lc $type});
}
sub output {print @_,"\n"}
# funcs
sub readParsers {
my %machines;
my $dir = shift();
opendir my $DIR, $dir or die "Can't open directory $dir : $!\n";
while (defined(my $file = readdir $DIR)) {
next unless $file =~ /^(.*)\.parser$/i;
my $type = $1;
$machines{lc $type} = do "$dir/$type.parser"
or die "Failed to read the state machine $dir/$type.parser
+:\n $@\n";
}
return \%machines;
}
sub do_file {
my ($file, $machine) = @_;
die "Second parameter to do_file must be a state machine!\n"
unless defined $machine and ref $machine eq 'HASH' and exists
+$machine->{'!INIT'};
open IN, '< ' . $file or return print "Can't open $file : $!\n";
print "$file\n";
my $state_name = $machine->{'!INIT'}[0];
my $state = $machine->{$state_name};
$machine->{'!INIT'}[1]->() if (ref $machine->{'!INIT'}[1] eq 'CODE
+');
my $line_no = 0;
LINE: while (defined(my $line = <IN>)) {
chomp($line);
$line_no++;
next if $line eq '';
print "\nSTATE: $state_name\nLINE: $line\n" if DEBUG;
foreach my $transition (@{$state}) {
print "RE: ".$transition->[0]."\n" if DEBUG > 1;
next unless $line =~ $transition->[0];
$transition->[2]->($line_no, $line)
if (ref $transition->[2] eq 'CODE');
$state_name = $transition->[1];
$state = $machine->{$state_name};
die qq{Unknown state "$state_name" !!!\n}
unless $state;
next LINE;
}
# no regexp matched!
if (ref $machine->{'!UNEXPECTED'} and ref $machine->{'!UNEXPEC
+TED'}->[1] eq 'CODE') {
$machine->{'!UNEXPECTED'}->[1]->( $line_no, $line);
} else {
output("Unexpected input at line $line_no:\n\t$line");
}
if (ref $machine->{'!UNEXPECTED'} and $machine->{'!UNEXPECTED'
+}->[0]) {
$state_name = $machine->{'!UNEXPECTED'}->[0];
$state = $machine->{$state_name};
} else {
$state_name = $machine->{'!INIT'}[0];
$state = $machine->{$state_name};
}
die qq{Unknown state "$state_name" !!!\n}
unless $state;
}
print "STATE at EOF: $state_name\n" if DEBUG;
if (exists $machine->{$state_name . "_EOF"}) {
$machine->{$state_name . "_EOF"}->();
} else {
if (ref $machine->{'!EOF'} and ref $machine->{'!EOF'} eq 'CODE
+') {
$machine->{'!EOF'}->();
} else {
output("Unexpected EOF");
}
}
}
Please take this just as an example! Comments appreciated (though ... it does what I need so ... ;-)
Jenda