I am trying to use Parse::RecDescent to match what are known as tuples in the
REBOL programming language. Besides its roots in Lisp and Forth, REBOL is sometimes preferable Perl because of its ability to handle most internet tasks (SMTP, POP, HTTP, FTP) in 1-2 lines.
A tuple is, at the least, an alternating integer-dot sequence with at minimum two repetitions of the integer-dot sequence. E.g: 2343.23423. is the minimum that a tuple may be.
I tried to use the following productions to match this fact:
tuple : (number dot)(2) tuple_rest(s?) { \@item }
tuple_rest : number dot { \@item }
| number { \@item }
number : /\d+/ { \@item }
digit : /[0-9]/ { \@item }
dot : '.' { \@item }
But for some reason, the action is not returning every aspect of the match. That is, for the intial two integer-dot repeitions, it matches, but only returns the dot aspect to @item. Then for the tuple_rest part of the production, it matches, but only returns the number part of the production. I was hoping to return the data for all the subrules.
Entire grammar included for reference, but the relevant part above should be sufficient for an answer.
#! /usr/local/bin/perl -ws
use Data::Dumper;
use Parse::RecDescent;
++$::RD_HINT;
$Data::Dumper::Indent=2;
sub Parse::RecDescent::dump_item {
my ($msg, $aref)=@_;
Data::Dumper->Dump([$aref],[$msg]);
}
sub Parse::RecDescent::join_aref {
join('', @{$_[0]});
}
my $parse = Parse::RecDescent->new(<<'EndGrammar');
value : datatype(s)
datatype : scalar | series
scalar : time
| <skip:''> '%' file { print dump_item('file!', \@item) }
| <skip:''> '#' issue { print dump_item('issue!', \@item) }
| <skip:''> tuple { print dump_item('tuple!', \@item) }
| binary
| decimal
| integer
| money
| char
| string { print dump_item('string!', \@item) }
| email { print dump_item('email!' , \@item) }
binary : base16 { dump_item('base16!', \@item) }
| base2 { dump_item('base2!', \@item) }
| base64 { dump_item('base64!', \@item) }
base16 : '#' '{' /(\s|[0-9A-F])+/ '}' { dump_item('B16', \@ite
+m) }
base2 : '#' '{' /[01]+/ '}' { dump_item('BA2', \@ite
+m) }
base64 : '#' '{' /[0-9A-Z]+/ '}' { dump_item('B64', \@item)
+ }
issue : issuedat (dash issuedat)(s?) dash(?) { \@item }
| dash(?) { \@item }
dash : /[-]/ { \@item }
issuedat: alphanumeric(s) { \@item }
alphanumeric: /[A-Za-z0-9]/ { \@item }
file : directory { \@item }
| filename { dump_item('filename!', \@item) }
| directory filename { dump_item('directory filename', \@item) }
filename : /([A-Za-z0-9_]+)/ { \@item }
directory : slash filename_and_slash(s) { \@item }
| filename_and_slash(s) { \@item }
slash: '/' { \@item }
filename_and_slash: filename slash { \@item }
email : /\w+[@]\w+(\.\w+)+/ { $item[1] }
string : { extract_bracketed($text, '{') }
| { extract_delimited($text, '"') }
char : par_pre ctrl_char '"' { dump_item('char!', \@item) }
| '#' '"' /[^"]/ '"' { dump_item('char!', \@item) }
ctrl_char : null { $item[1] }
| eol { $item[1] }
| tab { $item[1] }
| newpage { $item[1] }
| esc { $item[1] }
| backspace { $item[1] }
| delete { $item[1] }
| caret { $item[1] }
| quote { $item[1] }
| hex { $item[1] }
par_pre : '#"^' { $item[1] }
null : '(null)' { $item[1] }
| '@' { $item[1] }
eol : '(line)' { $item[1] }
| '/' { $item[1] }
| '#"."' { $item[1] }
tab : '(tab)' { $item[1] }
| '-' { $item[1] }
newpage : '(page)' { $item[1] }
esc : '(esc)' { $item[1] }
backspace : '(back)' { $item[1] }
delete : '(del)' { $item[1] }
caret : '^' { $item[1] }
quote : '"' { $item[1] }
hex : /[0-9A-F]{2}/i { $item[1] }
money : sign(?) dollar cash(s?) '.' cash(2) { dump_item('money!', \
+@item) }
| sign(?) dollar cash(s?) { dump_item('money!', \
+@item) }
sign : /[-+]/ { $item[1] }
dollar : /\$/ { $item[1] }
cash : /\d/ { $item[1] }
decimal : /[-+]?(\d+)?\.\d+/ { warn "decimal! $item[1]" }
integer : /[-+]?\d+/ { warn "integer! $item[1]" }
time : HMS am_pm { warn "time! $item[1]$item[2]" }
| HMS { warn "time! $item[1]" }
am_pm : /(A|P)m/i { $item[1] }
HMS : hours colon minutes colon seconds { dump_item('hms',\@item
+); }
| hours colon minutes { join_aref($item[1]). ':' . join_aref($item[3])
+ }
| colon seconds { join '', @item[1,2] }
colon : ':' { $item[1] }
hours : digit(s) { $item[1] }
minutes : digit(s) { $item[1] }
seconds : digit(s) '.' digit(s) {join_aref($item[1]).'.'.join_aref($i
+tem[3])}
| '.' digit(s) {'.' . join('',join_aref($item[2])) }
| digit(s) { join_aref($item[1]) }
tuple : (number dot)(2) tuple_rest(s?) { \@item }
tuple_rest : number dot { \@item }
| number { \@item }
number : /\d+/ { \@item }
digit : /[0-9]/ { \@item }
dot : '.' { \@item }
EndGrammar
++$|;
$delim="\n\n>";
warn $delim;
$/ = undef;
$_=<DATA>;
warn "attempting $_\n\n";
warn "parsed: ", $parse->value($_), $delim;
=head1 todo
#"^(tab)"
#"^(null)"
"hi how's it going?"
#{3A18427F 899AEFD8}
2#{10010110110010101001011011001011}
64#{LmNvbSA8yw9CB0aGvXmgUkVCu2Uz934b}
=cut
=head1 successful:
=item email
princepawn@yahoo.com
=item string
{
adsfasdf ad
asdfa sfasdf
asdfadsf
}
=item string
"asdfasdfadsf"
=item file
%fasdfasdf
%/fasdfasdf/
%fasdfasdf/
%/fasdfasdf/adsfasdf/asasfasdf/asdfasdf
%/AAAAAAA/BBBBBBB/CCCCCCC/DDDDDD/
=item issue
#asadasdf
#123-456-789-
#-
=cut
__DATA__
1.2.3.4.5