What started as an exercise in practicing regexen became an exercise in simplification.
The result is the following script. It works well and seems to be forgiving of malformed json files.
I hope that you might be able to give me either answers or ideas for the following questions that I have:
#!/usr/bin/perl
# issues:
# false and null strings do not get requoted in wrap() while true does
+.
#why?
use strict;
use warnings;
# simple json interpreter and writer.
# module.
package Jsonrw;
require Exporter;
our @ISA =qw(Exporter);
our @EXPORT =qw(J_write J_read);
our $VERSION = 0.04;
sub J_read {
# in which we grab a json file and spit out a reference (% or @)
my $file = shift;
open my $in, "$file" or die
"Can't open $file, $!";
my $json; #flattened json file
my $struct; #resultant datastructure
while (<$in>){
$json.=$_; # to lazy to check how to slurp. :)
}
my @rawarray = destruct_json($json);
# the first element of @rawarray indicates the base of the datastruc
+ture.
$struct = build_struct(\@rawarray, shift @rawarray);
return $struct;
}
sub J_write{
#in which we turn a datastructure into a string in json format.
my $input = shift;
my $pretty = shift; #do we make the json pretty?
my $json;
my @jarray = destruct_struct($input);
($pretty)?
($json = build_json_pretty(\@jarray,$pretty)):
($json = build_json(\@jarray));
$json
}
sub destruct_json{
# in which we take a json string and split it up into an array:
#
# Tokens like commas and colons are ommitted; commas since each elemen
+t of the array is delimited by virtue of
# itself; colons since key:value pairs remain ajacent.
# Braces and quotes are the only tokens which remain. Braces are need
+ed when building the datastructure.
# Quotes are needed in case we are dealing with braces as data instead
+ of metadata.
my $j = shift;
my @array;
#take string and seperate elements into an array.
(@array) = $j =~
/
(\{|\}|\[|\]) #a naked brace is a sure sign of a reference delim
+iter
|
(?:\s*:\s*) #note, but don't capture hash pair token
|
(?:\s*,*\s*) #note, but don't capture delimiter- even takes jso
+n without commas.
|
(null) #one of those "special" words
|
(false) #special . . .
|
(true) #again...
|
("") #ooh- let's accept an empty string :)
|
(\d+) #or a number
|
# this is how we deal with quoting:
(".*? # lot's of characters followed by . . .
(?:[^\\] # one character is NOT a \
(?:\\\\)*) # zero or more pairs of \
") # finish off with a close quote.
# an escaped quote will not match the end
# of the string.
/gx;
return clean(\@array);
}
sub build_struct{
#where a flat array is turned into a deep structure. This
#works recursively.
my $json = shift;
my $type = shift; # hash or array reference marker
my $struct;
($type eq '{')?($struct = {}):($struct = []);
while (@$json){
((shift @$json) and (return $struct))
if # We shift() in order to get rid of the
# closing reference delimiter
($json->[0] =~ /^(\}|\])$/); #End of level
if ($type eq '{'){ #this level is a hash
my $key = shift @$json;
$key = unwrap($key);
my $val = shift @$json;
# IF the value in the hash is a reference
# delimiter, then we call build_struct()
# again to build the new structure.
# $val is now a reference instead of a scaler
($val =~ /^(\{|\[)$/)?
($val = build_struct($json,$val)):
($val = unwrap($val));
#unwrap() get's rid of quotes in a string and
#the \ used in escaping quotes and other \'s
$struct->{$key} = $val
}
if ($type eq '['){ #this level is an array
my $val = shift @$json;
# IF $val is a reference
# delimiter, then we call build_struct()
# again to build the new structure.
# $val is now a reference instead of a scaler
($val =~ /^(\{|\[)$/)?
($val = build_struct($json,$val)):
($val = unwrap($val));
push (@{$struct}, $val);
}
}
#if we are missing reference delimiter:
my %err = ("ERR" => 'No closing ] or }');
return \%err
#this return statement will happen if the above return statements neve
+r happen.
#A cheap way to detect bad input. Not very informative.
}
sub destruct_struct{
# In which we pull a datastructure apart and build an array out of it.
# The resulting array should be identical to the results of destruct_j
+son() from
#the equivalent json file.
#This is a recursive routine.
my $struct = shift;
my @array;
if (ref($struct) eq 'HASH'){
push @array, '{';
for my $key (keys(%$struct)){
my $val = $struct->{$key};
push @array, wrap($key);
#wrap() put's quotes around the string and
#adds escapes to any quotes or \ already there.
#IF $val is a reference, then we call destruct_struct
#again and push the result (an array) onto this array.
(ref($val))?
(push @array, destruct_struct ($val)):
(push @array, wrap($val));
}
push @array, '}';
}
# Same comments as above.
if (ref($struct) eq 'ARRAY'){
push @array, '[';
for my $val (@$struct){
(ref($val))?
(push @array, destruct_struct ($val)):
(push @array, wrap($val));
}
push @array, ']';
}
@array
}
sub build_json_pretty{
#In which build_jason() is duplicated with
#extra prettifying padding.
# It's easier to debug both routines seperately than
#to combine them.
my $jarray = shift;
my $pretty = shift;
my $indent;
my $string;
my $typeflag;
#$pretty is a string passed as an arguement to the exported functio
+n J_write().
#Currently, you may mix and match tabs and spaces for indenting eac
+h reference
#level. You may add extra spaces for the values within the referen
+ces.
# Eg: T1S4VS4 means that for each level, indent an extra Tab + fo
+ur spaces.
# Indent four more spaces again for the values within the level.
(my $tabset) = $pretty =~ /T(\d+)/;
(my $spaceset) = $pretty=~ /S(\d+)/;
my( $valindentype, $valindentval) = $pretty =~ /V(\w)(\d+)/;
my $tabs;
my $spaces;
my $valindent;
# These FOR loops are here because I have no idea how
# to s/// with quatifiers on the right hand side.
for (1..$valindentval){
($valindentype eq 'T')?
($valindent .= "\t"):
($valindent .= " ");
}
for (1..$tabset){
$tabs.="\t";
}
for (1..$spaceset){
$spaces.=" ";
}
my $indentset = $tabs.$spaces;
while (@$jarray){
my $jelem = $jarray->[0];
if ($jelem =~/^([[{]$)/){
$typeflag .=$1;
($indent .= $indentset) if ($string);
$string .= $indent;
$string .= shift @$jarray;
$string .= "\n";
next
}
if ($jelem =~/^[\]}]$/){
$typeflag =~ s/.$//;
$string .= $indent;
$string .= shift @$jarray;
$string .= ",\n";
$indent =~ s/$indentset//;
next
}
if ($typeflag =~/\[$/){
my $val = shift @$jarray;
$string .= $indent.$valindent;
$string .= $val;
$string .= ",\n";
next
}
if ($typeflag =~ /\{$/){
my $key = shift @$jarray;
my $val;
($val = shift @$jarray)unless($jarray->[0]=~/(^[[{]$)/);
$string .= $indent.$valindent;
$string .= "$key:$val";
$string .= ",\n";
next
}
}
$string
}
sub build_json{
#In which we take an array and turn it into a json file
# See destruct_struct() and destruct_json() to see how
# the array is built.
my $jarray = shift;
my $typeflag; #indicate if writing out a hash or an array
my $string;
#$typeflag has reference open delimiters appended to
#it. The last is removed upon finding a closing reference
#delimiter. A side effect is that ] and } are interchangable.
while (@$jarray){
my $jelem = $jarray->[0];
if ($jelem =~/(^[[{]$)/){
$typeflag .= "$1";
$string .= shift @$jarray;
next
}
if ($jelem =~/^[\]}]$/){
$typeflag =~ s/.$//;
$string .= shift @$jarray;
next
}
if ($typeflag =~ /\[$/){
$string .= shift @$jarray;
$string .=',';
next
}
if ($typeflag =~ /\{$/){
my $key = shift @$jarray;
my $val;
#IF $val would be a reference delimiter, we leave it blank.
# This way, earlier logic takes care of the next
#reference level.
($val = shift @$jarray)unless($jarray->[0]=~/(^[[{]$)/);
$string .= "$key:$val,";
next
}
}
$string
}
sub unwrap{
#In which we take a string and remove json formatting
my $l = shift;
unless ($l =~/(^\d$)/){
$l=~ s/^"(.*)"$/$1/; #first, get rid of the qoutes.
$l=~ s/([^\\])\\([^\\])/$1$2/g; #then get rid of single escapes
$l=~ s/\\\\/\\/g #then get rid of paired escapes
}
$l
}
sub wrap{
#In which we add escapes and quotes to strings.
my $l = shift;
unless ($l =~/(^false$)|(^null$)|(^\d$)/){
$l =~ s/(\\)/$1$1/g; # escape '\'
$l =~ s/"/\\"/g; # escape quotes
$l =~ s/^(.*)$/"$1"/; # wrap in quotes
}
$l
}
sub clean{
#In which empty elements are removed
#from an array
my $array = shift;
my @cleaned;
for (@{$array}){
(push @cleaned, $_) if ($_);
}
return @cleaned;
}
1
I would love to hear of good ideas to make this code better. I feel like I did an OK job of it though.