#!/usr/bin/perl -- use strict; use warnings; use Data::Dump; my $jp = do { use Regexp::Grammars; qr{ # Keep the big stick handy, just in case... #~ # Match this... ^ | $ | (?: \x5B ) (?: \x7B ) (?: \x5D ) (?: \x7D ) (?: \x3A ) (?: \x2C ) (?: \x20 | \x09 | \x0A | \x0D )* | | | | | | \x66\x61\x6c\x73\x65 \x6e\x75\x6c\x6c \x74\x72\x75\x65 (?: (?: (?: (?: (?: ) )* ) )? ) (?: ) (?: (?: (?: (?: (?: ) )* ) )? ) (?: (?: )? (?: )? (?: )? ) \x2E [\x{31}-\x{39}] \x65 | \x45 (?: (?: | )? (?: )+ ) (?: (?: )+ ) | (?: (?: )* ) \x2D \x2B \x30 (?: (?: )* ) | (?: \x22 | \x5C | \x2F | \x62 | \x66 | \x6E | \x72 | \x74 | (?: \x75 (?: ){4} ) ) \x5C \x22 [\x{20}-\x{21}] | [\x{23}-\x{5B}] | [\x{5D}-\x{10FFFF}] | A | B | C | D | E | F [\x{30}-\x{39}] }ixs}; for my $str( q/[false,true,null]/ , ){ print "#<<@{[time-$^T]}# $str \n"; if( $str =~ $jp ){ dd(\%/) ; #/ } else { print "## fail to match \n"; } print "#>>@{[time-$^T]}#\n"; } #### #<<1# [false,true,null] { "" => "[false,true,null]", "array" => { "" => "[false,true,null]", "begin_array" => { "" => "[", "ws" => "" }, "end_array" => { "" => "]", "ws" => "" }, "value" => { "" => "null", "null" => "null" }, "value_separator" => { "" => ",", "ws" => "" }, }, } #>>1# #### --- reg +++ reg @@ -49,3 +49,3 @@ - (?: (?: (?: (?: (?: ) )* ) )? ) + (?: <.begin_object> (?: (?: (?: (?: <.value_separator> ) )* ) )? <.end_object> ) @@ -55,3 +55,3 @@ - (?: (?: (?: (?: (?: ) )* ) )? ) + (?: <.begin_array> (?: (?: (?: (?: <.value_separator> ) )* ) )? <.end_array> ) @@ -88,3 +88,3 @@ - (?: (?: )* ) + (?: <.quotation_mark> (?: )* <.quotation_mark> ) #### #<<0# [false,true,null] { "" => "[false,true,null]", "array" => { "" => "[false,true,null]", "value" => { "" => "null", "null" => "null" }, }, } #>>0# #### --- reg +++ reg @@ -55,3 +55,3 @@ - (?: <.begin_array> (?: (?: (?: (?: <.value_separator> ) )* ) )? <.end_array> ) + (?: <.begin_array> (?: (?: <[value]> (?: (?: <.value_separator><[value]> ) )* ) )? <.end_array> ) #### #<<0# [false,true,null] { "" => "[false,true,null]", "array" => { "" => "[false,true,null]", "value" => [ { "" => "false", "false" => "false" }, { "" => "true", "true" => "true" }, { "" => "null", "null" => "null" }, ], }, } #>>0# #### --- reg +++ reg @@ -7,2 +7,3 @@ #~ + # Switch off context substring retention #### #<<0# [false,true,null] { array => { value => [{ false => "false" }, { true => "true" }, { null => "null" }], }, } #>>0# #### --- reg +++ reg @@ -40,12 +40,15 @@ \x66\x61\x6c\x73\x65 + (?{ $MATCH = ::FALSE(); }) \x6e\x75\x6c\x6c + (?{ $MATCH = undef; }) \x74\x72\x75\x65 + (?{ $MATCH = ::TRUE(); }) (?: <.begin_object> (?: (?: (?: (?: <.value_separator> ) )* ) )? <.end_object> ) @@ -117,3 +120,6 @@ } print "#>>@{[time-$^T]}#\n"; } +sub TRUE { bless \(my$s=!!1), 'TRUE' } +sub FALSE { bless \(my$s=!!0), 'FALSE' } + #### #<<1# [false,true,null] { array => { value => [ { false => bless(do{\(my $o = "")}, "FALSE") }, { true => bless(do{\(my $o = 1)}, "TRUE") }, { null => undef }, ], }, } #>>1# #### --- reg +++ reg @@ -38,3 +38,3 @@ - | | | | | | + | | | | | | #### #<<0# [false,true,null] { array => { value => [ bless(do{\(my $o = "")}, "FALSE"), bless(do{\(my $o = 1)}, "TRUE"), undef, ], }, } #>>0# #### --- reg +++ reg @@ -58,6 +58,9 @@ (?: <.begin_array> (?: (?: <[value]> (?: (?: <.value_separator><[value]> ) )* ) )? <.end_array> ) + (?{ + $MATCH = $MATCH{value} ; + }) (?: (?: )? (?: )? (?: )? ) #### #<<1# [false,true,null] { array => [ bless(do{\(my $o = "")}, "FALSE"), bless(do{\(my $o = 1)}, "TRUE"), undef, ], } #>>1# #### --- reg +++ reg @@ -8 +8 @@ - # Switch off context substring retention +#~ # Switch off context substring retention @@ -117 +117 @@ -for my $str( q/[false,true,null]/ , ){ +for my $str( q/[ -2.0, 4.333e333, 600 ]/ , ){ #### #<<0# [ -2.0, 4.333e333, 600 ] { "" => "[ -2.0, 4.333e333, 600 ]", "array" => [ { "" => "-2.0", "frac" => { "" => ".0", "decimal_point" => ".", "DIGIT" => 0 }, "int" => { "" => 2, "digit1_9" => 2 }, "minus" => "-", }, { "" => "4.333e333", "exp" => { "" => "e333", "DIGIT" => 3, "e" => "e" }, "frac" => { "" => ".333", "decimal_point" => ".", "DIGIT" => 3 }, "int" => { "" => 4, "digit1_9" => 4 }, }, { "" => 600, "int" => { "" => 600, "DIGIT" => 0, "digit1_9" => 6 } }, ], } #>>0# #### --- reg +++ reg @@ -7,3 +7,3 @@ #~ -#~ # Switch off context substring retention + # Switch off context substring retention @@ -77,9 +77,9 @@ - (?: (?: | )? (?: )+ ) + (?: (?: | )? (?: <[DIGIT]> )+ ) - (?: (?: )+ ) + (?: (?: <[DIGIT]> )+ ) - | (?: (?: )* ) + | (?: (?: <[DIGIT]> )* ) #### #<<1# [ -2.0, 4.333e333, 600 ] { array => [ { frac => { decimal_point => ".", DIGIT => [0] }, int => { digit1_9 => 2 }, minus => "-", }, { exp => { DIGIT => [3, 3, 3], e => "e" }, frac => { decimal_point => ".", DIGIT => [3, 3, 3] }, int => { digit1_9 => 4 }, }, { int => { DIGIT => [0, 0], digit1_9 => 6 } }, ], } #>>1# #### --- reg +++ reg @@ -76,12 +76,21 @@ (?: (?: | )? (?: <[DIGIT]> )+ ) + (?{ + $MATCH = join '', grep defined, $MATCH{e}, $MATCH{minus}, $MATCH{plus}, @{$MATCH{DIGIT}}; + }) (?: (?: <[DIGIT]> )+ ) + (?{ + $MATCH = join '', grep defined, $MATCH{decimal_point}, @{$MATCH{DIGIT}}; + }) - | (?: (?: <[DIGIT]> )* ) + | (?: (?: <[DIGIT]> )* ) + (?{ + $MATCH = join '', grep defined, $MATCH{digit1_9}, @{$MATCH{DIGIT}}; + }) \x2D @@ -114,7 +123,7 @@ [\x{30}-\x{39}] }ixs}; -for my $str( q/[ -2.0, 4.333e333, 600 ]/ , ){ +for my $str( q/[ -2.0, 4.333e333, 600, 0, 9 ]/ , ){ print "#<<@{[time-$^T]}# $str \n"; if( $str =~ $jp ){ dd(\%/) ; #/ #### #<<0# [ -2.0, 4.333e333, 600, 0, 9 ] { array => [ { frac => ".0", int => 2, minus => "-" }, { exp => "e333", frac => ".333", int => 4 }, { int => 600 }, { int => 0 }, { int => 9 }, ], } #>>0# #### --- reg +++ reg @@ -65,4 +65,7 @@ (?: (?: )? (?: )? (?: )? ) + (?{ + $MATCH = join'', grep defined, map{$MATCH{$_}} qw{ minus int frac exp }; + }) #### #<<1# [ -2.0, 4.333e333, 600, 0, 9 ] { array => ["-2.0", "4.333e333", 600, 0, 9] } #>>1# #### --- reg +++ reg @@ -8 +8 @@ - # Switch off context substring retention +#~ # Switch off context substring retention @@ -129 +129 @@ -for my $str( q/[ -2.0, 4.333e333, 600, 0, 9 ]/ , ){ +for my $str( q{[ "\\" quote \\t tab" ]} , ){ #### #<<0# [ "\" quote \t tab" ] { "" => "[ \"\\\" quote \\t tab\" ]", "array" => [ { "" => "\"\\\" quote \\t tab\"", "char" => { "" => "b", "unescaped" => "b" }, }, ], } #>>0# #### --- reg +++ reg @@ -7,3 +7,3 @@ #~ -#~ # Switch off context substring retention + # Switch off context substring retention @@ -107,3 +107,3 @@ - (?: <.quotation_mark> (?: )* <.quotation_mark> ) + (?: <.quotation_mark> (?: <[char]> )* <.quotation_mark> ) #### #<<0# [ "\" quote \t tab" ] { array => [ { char => [ { escape => "\\" }, { unescaped => " " }, { unescaped => "q" }, { unescaped => "u" }, { unescaped => "o" }, { unescaped => "t" }, { unescaped => "e" }, { unescaped => " " }, "\\", { unescaped => "t" }, { unescaped => " " }, { unescaped => "t" }, { unescaped => "a" }, { unescaped => "b" }, ], }, ], } #>>0# #### --- reg +++ reg @@ -108,0 +109,3 @@ + (?{ + $MATCH = token_string( @{ $MATCH{char} } ); + }) @@ -111 +114,4 @@ - | (?: \x22 | \x5C | \x2F | \x62 | \x66 | \x6E | \x72 | \x74 | (?: \x75 (?: ){4} ) ) + | (?: <.escape> ) + + + \x22 | \x5C | \x2F | \x62 | \x66 | \x6E | \x72 | \x74 | \x75[1-9A-F]{4} @@ -129 +135 @@ -for my $str( q{[ "\\" quote \\t tab" ]} , ){ +for my $str( q{[ "\\" quote \\t tab \\uD83D\\uDC2A U+1F42A DROMEDARY CAMEL" ]}, ){ @@ -140,0 +147,23 @@ +BEGIN { + my %rep = ( + "\"" => '"', + "/" => '/', + "\\" => '\\', + b => "\b", + f => "\f", + n => "\n", + r => "\r", + t => "\t" + ); + sub token_string { + return join '', + map { + my $ret = $_; + if( ref $_ ){ + $ret = $rep{ $_->{escaped} }; + $ret ||= pack('U', hex substr $_->{escaped}, 1, 4 ); + } + $ret; + } @_; + } +} #### #<<0# [ "\" quote \t tab \uD83D\uDC2A U+1F42A DROMEDARY CAMEL" ] { array => ["\" quote \t tab \x{D83D}\x{DC2A} U+1F42A DROMEDARY CAMEL"], } #>>0# #### --- reg +++ reg @@ -135 +135 @@ -for my $str( q{[ "\\" quote \\t tab \\uD83D\\uDC2A U+1F42A DROMEDARY CAMEL" ]}, ){ +for my $str( q{[ -90.0, [" \\t tab",[7,11]],3e9,"\\r return", null ]}, ){ #### #<<1# [ -90.0, [" \t tab",[7,11]],3e9,"\r return", null ] { array => ["-90.0", [" \t tab", [7, 11]], "3e9", "\r return", undef], } #>>1# #### --- reg +++ reg @@ -8 +8 @@ - # Switch off context substring retention +#~ # Switch off context substring retention @@ -135 +135 @@ -for my $str( q{[ -90.0, [" \\t tab",[7,11]],3e9,"\\r return", null ]}, ){ +for my $str( q{{ "key" : "lime", "blue": "berry" }}, ){ #### #<<0# { "key" : "lime", "blue": "berry" } { "" => "{ \"key\" : \"lime\", \"blue\": \"berry\" }", "object" => { "" => "{ \"key\" : \"lime\", \"blue\": \"berry\" }", "member" => { "" => "\"blue\": \"berry\"", "name_separator" => { "" => ": ", "ws" => " " }, "string" => "blue", "value" => "berry", }, }, } #>>0# #### --- reg +++ reg @@ -7,3 +7,3 @@ #~ -#~ # Switch off context substring retention + # Switch off context substring retention @@ -53,6 +53,9 @@ - (?: <.begin_object> (?: (?: (?: (?: <.value_separator> ) )* ) )? <.end_object> ) + (?: <.begin_object> (?: (?: <[member]> (?: (?: <.value_separator><[member]> ) )* ) )? <.end_object> ) - (?: ) + (?: <.name_separator> ) + (?{ + $MATCH = [ $MATCH{string}, $MATCH{value} ]; + }) #### #<<0# { "key" : "lime", "blue": "berry" } { object => { member => [["key", "lime"], ["blue", "berry"]] } } #>>0# #### --- reg +++ reg @@ -53,4 +53,7 @@ (?: <.begin_object> (?: (?: <[member]> (?: (?: <.value_separator><[member]> ) )* ) )? <.end_object> ) + (?{ + $MATCH = { map { @$_ } @{$MATCH{member}} }; + }) #### #<<1# { "key" : "lime", "blue": "berry" } { object => { blue => "berry", key => "lime" } } #>>1# #### --- reg +++ reg @@ -141 +141 @@ -for my $str( q{{ "key" : "lime", "blue": "berry" }}, ){ +for my $str( q{[{"k":"v"},{"v":"k"}] }, q{{"ro":["sham","bo"],"t":{"i":{"c":{"t":{"o":"c"}}}}}}){ #### #<<1# [{"k":"v"},{"v":"k"}] { array => [{ k => "v" }, { v => "k" }] } #>>1# #<<1# {"ro":["sham","bo"],"t":{"i":{"c":{"t":{"o":"c"}}}}} { object => { ro => ["sham", "bo"], t => { i => { c => { t => { o => "c" } } } } }, } #>>1# #### --- reg +++ reg @@ -141 +141 @@ -for my $str( q{[{"k":"v"},{"v":"k"}] }, q{{"ro":["sham","bo"],"t":{"i":{"c":{"t":{"o":"c"}}}}}}){ +for my $str( q{[1,[2,[3],[]]]}, q{["double extra comma",,]} , ){ #### #<<0# [1,[2,[3],[]]] { array => [1, [2, [3], undef]] } #>>0# #<<0# ["double extra comma",,] ## fail to match #>>43#