One problem is that you discard whitespace, but whitespace include \n, so \n is discarded before you can recognize it -- this might be a bug in marpa (or maybe some kind of configurable option ... blah blah blah ask on list :)
One workaround is to redefine what you mean by whitespace, like
ws ~ [ \f\r\t]+
Basically, I need to figure out how to say "if the line ends in "\n" stop processing this line, if it ends in "\\n" then the next line is part of the expression.
I kinda thought you figured this out, something like
blahblah ::= stuff continuationcharacter blahblah ## recurse to self
| stuff endoflinestop
unicharproptoregexrange.pl - program to expand PosixPunct '[:punct:]' => ['punctuation characters',, '\p{PosixPunct}', ],
Example output of which includes "\\\n" as part of a line ... gee, i should probably just discard it
#!/usr/bin/perl --
use strict; use warnings; use Data::Dump qw/ dd pp /;
use Marpa::R2;
Main( @ARGV );
exit( 0 );
sub Main {
my $grammar_spec = get_grammar();
my $test_input = test_input();
my $grammar = Marpa::R2::Scanless::G->new({ bless_package => 'Ast'
+, source => \$grammar_spec, });
my $recce = Marpa::R2::Scanless::R->new({ grammar => $grammar });
$recce->read(\$test_input);
my $val = $recce->value;
dd( $val );
}
## no definition means "Unproductive lexical symbols: <ws>"
sub get_grammar {
return q{
:default ::= action => [values]
:start ::= Lines
Lines ::= Line*
Line ::= Words <continuation> <Line>
| Words <new line>
| <new line>
Words ::= <Word>*
Word ::= <wordchars><punctuationwordchars>
| <wordchars>
punctuationwordchars ::= <PosixPunct><wordchars>
| <PosixPunct>
continuation ~ '\\' [\n]
PosixPunct ~ [\N{U+0021}-\N{U+002F}\N{U+003A}-\N{U+0040}\N{U
++005B}-\N{U+0060}\N{U+007B}-\N{U+007E}]+
wordchars ~ [\w]+
<new line> ~ [\n]+
:discard ~ ws
ws ~ [ \f\r\t]+
}
}
sub test_input {
#~ I can't seem to face up to the facts
#~ I'm tense and nervous and I\
#~ Can't relax
return q{
I can't sleep 'cause my bed's on fire
Don't touch me I'm a real live wire
a b c\
1 2 3\
do re mi
you and me girl
}
}
__END__
\[
["\n"],
[
[
["I"],
["can", ["'", "t"]],
["sleep", ["'", "cause"]],
["my"],
["bed", ["'", "s"]],
["on"],
["fire"],
],
"\n",
],
[
[
["Don", ["'", "t"]],
["touch"],
["me"],
["I", ["'", "m"]],
["a"],
["real"],
["live"],
["wire"],
],
"\n",
],
[
[["a"], ["b"], ["c"]],
"\\\n",
[[[1], [2], [3]], "\\\n", [[["do"], ["re"], ["mi"]], "\n"]],
],
[[["you"], ["and"], ["me"], ["girl"]], "\n"],
]
discardedededit
#!/usr/bin/perl --
use strict; use warnings; use Data::Dump qw/ dd pp /;
use Marpa::R2;
Main( @ARGV );
exit( 0 );
sub Main {
my $grammar_spec = get_grammar();
my $test_input = test_input();
my $grammar = Marpa::R2::Scanless::G->new({ bless_package => 'Ast'
+, source => \$grammar_spec, });
my $recce = Marpa::R2::Scanless::R->new({ grammar => $grammar });
$recce->read(\$test_input);
my $val = $recce->value;
dd( $val );
}
## no definition means "Unproductive lexical symbols: <ws>"
sub get_grammar {
return q{
:default ::= action => [values]
:start ::= Lines
Lines ::= Line*
Line ::= Words <new line>
| <new line>
Words ::= <Word>*
Word ::= <wordchars><punctuationwordchars>
| <wordchars>
punctuationwordchars ::= <PosixPunct><wordchars>
| <PosixPunct>
continuation ~ '\\' [\n]
PosixPunct ~ [\N{U+0021}-\N{U+002F}\N{U+003A}-\N{U+0040}\N{U
++005B}-\N{U+0060}\N{U+007B}-\N{U+007E}]+
wordchars ~ [\w]+
<new line> ~ [\n]+
:discard ~ ws
:discard ~ continuation
ws ~ [ \f\r\t]+
}
}
sub test_input {
#~ I can't seem to face up to the facts
#~ I'm tense and nervous and I\
#~ Can't relax
return q{
I can't sleep 'cause my bed's on fire
Don't touch me I'm a real live wire
a b c\
1 2 3\
do re mi
you and me girl
}
}
__END__
\[
["\n"],
[
[
["I"],
["can", ["'", "t"]],
["sleep", ["'", "cause"]],
["my"],
["bed", ["'", "s"]],
["on"],
["fire"],
],
"\n",
],
[
[
["Don", ["'", "t"]],
["touch"],
["me"],
["I", ["'", "m"]],
["a"],
["real"],
["live"],
["wire"],
],
"\n",
],
[
[["a"], ["b"], ["c"], [1], [2], [3], ["do"], ["re"], ["mi"]],
"\n",
],
[[["you"], ["and"], ["me"], ["girl"]], "\n"],
]