in reply to converting context free grammar to BNF
I switched to Marpa::R2 instad of Parse::RecDescent. I had to change the grammar a bit to make it work:
#!/usr/bin/perl
use warnings;
use strict;
use Marpa::R2;
use Data::Dumper;
my $dsl = << '__DSL__';
:default ::= action => [values]
lexeme default = latm => 1
seq ::= '(' seqstrs ')'
seqstrs ::= seqstr seqstrs | seqstr
seqstr ::= seq | tagstr
tagstr ::= OZN seq | OZN rijec
OZN ~ [A-Z.,?'*:`]+
rijec ~ [\w\-?,:<*`*]+
space ~ [\s]+
:discard ~ space
__DSL__
my $grammar = 'Marpa::R2::Scanless::G'->new({ source => \$dsl });
my $input = "(SBARQ (WHNP (WP Who))(SQ (VP (VBZ says)(, ,)(S (SBAR (``
+ ``)(IN If)(S (NP (PRP you))(VP (VBP do)(RB n<t)(VP (VB look)(ADJP (J
+J good)))))(, ,)(S (NP (PRP we))(VP (VBP do)(RB n<t)(VP (VB look)(ADJ
+P (JJ good)('' <<)))))))))(? ?))";
my $result = $grammar->parse(\$input);
print Dumper($result);
It seems you confused ( and '(' . Marpa doesn't know the (s) , you have to enumerate the alternatives.
Output:
$VAR1 = \[
'(',
[
[
[
'SBARQ',
[
'(',
[
[
[
'WHNP',
[
'(',
[
[
[
'WP',
'Who'
]
]
],
')'
]
]
]
],
')'
]
]
],
[
[
[
'(',
[
[
[
'SQ',
[
'(',
[
[
[
'VP',
[
'(',
[
[
[
'VBZ',
'says'
]
]
],
')'
]
]
],
[
[
[
'(',
[
[
[
',',
','
]
]
],
')'
]
],
[
[
[
'(',
[
[
[
'S',
[
'(',
[
[
[
'SBAR',
[
'(',
[
[
[
'``',
'``'
]
]
],
')'
]
]
],
[
[
[
'(',
[
[
[
'IN',
'If'
]
]
],
')'
]
],
[
[
[
'(',
[
[
[
'S',
[
'(',
[
[
[
+'NP',
+[
+ '(',
+ [
+ [
+ [
+ 'PRP',
+ 'you'
+ ]
+ ]
+ ],
+ ')'
+]
]
]
],
')'
]
]
],
[
[
[
'(',
[
[
[
+'VP',
+[
+ '(',
+ [
+ [
+ [
+ 'VBP',
+ 'do'
+ ]
+ ]
+ ],
+ ')'
+]
]
],
[
[
+[
+ '(',
+ [
+ [
+ [
+ 'RB',
+ 'n<t'
+ ]
+ ]
+ ],
+ ')'
+]
],
[
+[
+ [
+ '(',
+ [
+ [
+ [
+ 'VP',
+ [
+ '(',
+ [
+ [
+ [
+ 'VB',
+ 'look'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ '(',
+ [
+ [
+ [
+ 'ADJP',
+ [
+ '(',
+ [
+ [
+ [
+ 'JJ',
+ 'good'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ]
+ ],
+ ')'
+ ]
+]
]
]
],
')'
]
]
]
],
')'
]
],
[
[
[
'(',
[
[
[
',',
','
]
]
],
')'
]
],
[
[
[
'(',
[
[
[
'S',
[
'(
+',
[
+[
+ [
+ 'NP',
+ [
+ '(',
+ [
+ [
+ [
+ 'PRP',
+ 'we'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+]
],
')
+'
]
]
],
[
[
[
'(
+',
[
+[
+ [
+ 'VP',
+ [
+ '(',
+ [
+ [
+ [
+ 'VBP',
+ 'do'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+],
+[
+ [
+ [
+ '(',
+ [
+ [
+ [
+ 'RB',
+ 'n<t'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ],
+ [
+ [
+ [
+ '(',
+ [
+ [
+ [
+ 'VP',
+ [
+ '(',
+ [
+ [
+ [
+ 'VB',
+ 'look'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ '(',
+ [
+ [
+ [
+ 'ADJP',
+ [
+ '(',
+ [
+ [
+ [
+ 'JJ',
+ 'good'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ '(',
+ [
+ [
+ [
+ '\'\'',
+ '<<'
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ]
+ ],
+ ')'
+ ]
+ ]
+ ]
+]
],
')
+'
]
]
]
],
')'
]
]
]
]
]
]
],
')'
]
]
]
],
')'
]
]
]
]
],
')'
]
]
]
],
')'
]
],
[
[
[
'(',
[
[
[
'?',
'?'
]
]
],
')'
]
]
]
]
],
')'
];
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord
}map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
Re^2: converting context free grammar to BNF
by nido203 (Novice) on May 31, 2016 at 18:07 UTC
|
Thank you so much guys, I learned some things that were confusing me and turns out that it is ok to just use Parse::RecDescent without BNF. This really helps. I have just one question. As I must use Parse::RecDescent and not Marpa, I need to parse Abstract syntax tree and copy data from dumper to array and then export it to JSON format. The code is this:
use Parse::RecDescent;
use Data::Dumper;
$::RD_AUTOACTION = q { [ @item ] };
$grammar = q{
start: seq
seq: '(' seqstr(s) ')'
seqstr: seq | tagstr
tagstr: OZN ( seq | rijec )
OZN: /[A-Z.,?'*:`*]+ /
rijec: /[\w-?,:<*`*]+/
};
my $parser=Parse::RecDescent->new($grammar);
my $result = $parser->start("(SBARQ (WHADVP (WRB Where))(SQ (VBZ is)(N
+P (NNP Inoco))(VP (VBN based)))(. ?))");
print Dumper($result);
| [reply] [d/l] |
|
use JSON::MaybeXS;
print encode_json($result);
Hope this helps, -- Hauke D | [reply] [d/l] |
|
["start",["seq","(",[["seqstr",["tagstr",["OZN","SBARQ "],["_alternati
+on_1_of_production_1_of_rule_tagstr",["seq","(",[["seqstr",["tagstr",
+["OZN","WHADVP "],["_alternation_1_of_production_1_of_rule_tagstr",["
+seq","(",[["seqstr",["tagstr",["OZN","WRB "],["_alternation_1_of_prod
+uction_1_of_rule_tagstr",["rijec","Where"]]]]],")"]]]]],")"]]]],["seq
+str",["seq","(",[["seqstr",["tagstr",["OZN","SQ "],["_alternation_1_o
+f_production_1_of_rule_tagstr",["seq","(",[["seqstr",["tagstr",["OZN"
+,"VBZ "],["_alternation_1_of_production_1_of_rule_tagstr",["rijec","i
+s"]]]]],")"]]]],["seqstr",["seq","(",[["seqstr",["tagstr",["OZN","NP
+"],["_alternation_1_of_production_1_of_rule_tagstr",["seq","(",[["seq
+str",["tagstr",["OZN","NNP "],["_alternation_1_of_production_1_of_rul
+e_tagstr",["rijec","Inoco"]]]]],")"]]]]],")"]],["seqstr",["seq","(",[
+["seqstr",["tagstr",["OZN","VP "],["_alternation_1_of_production_1_of
+_rule_tagstr",["seq","(",[["seqstr",["tagstr",["OZN","VBN "],["_alter
+nation_1_of_production_1_of_rule_tagstr",["rijec","based"]]]]],")"]]]
+]],")"]]],")"]],["seqstr",["seq","(",[["seqstr",["tagstr",["OZN",". "
+],["_alternation_1_of_production_1_of_rule_tagstr",["rijec","?"]]]]],
+")"]]],")"]]
It should actually look like this example bellow, I mean with curly brackets (not square) and colons instead of comma:
{"firstName": "John",
"lastName" : "Smith",
"age" : 25,
"address" :
{"streetAdr
”
: "21 2nd Street",
"city" : "New York",
"state" : "NY",
”
zip" : "10021"},
"phoneNumber":
[{"type" : "home",
"number": "212 555
-
1234"},
{"type" : "fax",
"number
”
: "646 555
-
4567"}]
}
I tried using <autotree> directive and it won't even export to json (shows message:"encountered object 'start=HASH(0x89d0d08)', but neither allow_blessed nor convert_blessed settings are enabled at gramatika.txt line 31.")
Is there another way to get desired shape?
Thanks! | [reply] [d/l] [select] |
|
|
|
|