see Re: Count Quoted Words, Re: Help required in find command. (read parse file tokenize m//gc), Re^2: Help with regular expression ( m/\G/gc ), perlfaq6#What good is \G in a regular expression? , Re^2: POD style regex for inline HTML elements
#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dump qw/ dd /;
sub TRACE; sub DEBUG;
*TRACE = *DEBUG = sub { print STDERR @_,"\n" };
my $data =<<'END';
# yo
x = 10;
y = 12;
z = 100;
junk
END
my $matches = dadata( \$data );
dd( $matches );
while( @$matches ){
my $ma = shift @$matches ;
dd( $ma );
}
exit( 0 );
sub dadata {
my( $dataref ) = @_;
my @matches;
pos( $$dataref ) = 0;
while( length( $$dataref ) > pos( $$dataref ) ){
$$dataref =~ m{\G^(#.*$)}gcm and do {
push @matches, [ "COMMENT", $1 ];
TRACE "# COMMENT $1";
next;
};;
$$dataref =~ m{\G^\s*(\w+)\s*=\s*(\d+)\s*;\s*$}gcmx and do {
push @matches, [ "KV", $1 , $2 ];
TRACE "# K($1)=V($2)";
next;
};;
$$dataref =~ m{\G(\s+)}gcxs and do {
push @matches, [ "SPACE", $1 ];
next;
};;
$$dataref =~ m{\G(\S)}gcxs and do {
push @matches, [ "INCH", $1 ];
TRACE "# INCH($1)";
next;
};;
}
return \@matches;
}
__END__
# COMMENT # yo
# K(x)=V(10)
# K(y)=V(12)
# K(z)=V(100)
# INCH(j)
# INCH(u)
# INCH(n)
# INCH(k)
[
["COMMENT", "# yo"],
["SPACE", "\n"],
["KV", "x", 10],
["SPACE", "\n"],
["KV", "y", 12],
["SPACE", "\n"],
["KV", "z", 100],
["SPACE", "\n"],
["INCH", "j"],
["INCH", "u"],
["INCH", "n"],
["INCH", "k"],
["SPACE", "\n"],
]
["COMMENT", "# yo"]
["SPACE", "\n"]
["KV", "x", 10]
["SPACE", "\n"]
["KV", "y", 12]
["SPACE", "\n"]
["KV", "z", 100]
["SPACE", "\n"]
["INCH", "j"]
["INCH", "u"]
["INCH", "n"]
["INCH", "k"]
["SPACE", "\n"]