http://www.perlmonks.org?node_id=918123


in reply to Re^4: rough start of an axml compiler
in thread rough start of an axml compiler

Just hacked this together, it only knows about standard tags which are named, and doesn't know about tag attributes, but it is working.

#!/usr/bin/perl use Modern::Perl; my $knownCmds = '(use|conf|qd)'; $_ = qq@ <html lang="<qd>action</qd>"> <head> <title><conf>site_title</conf></title> </head> <body> some orphan text <use>actions/<qd>action</qd>/somefile.aXML</use> <b>orphan</b> <conf>a</conf> </body> </html> @; #tokenise negatable marker s@`@<caret>@gs; #any text before first knownCmd + s@(.*?)<$knownCmds>@'$1',\n\n<$2>@s; #put negatable marker next to knownCmds + s@<$knownCmds>@<$1>`@gs; s@</$knownCmds>@`</$1>@gs; #any text between any two opens + s@<$knownCmds>`([^`]*?)<$knownCmds>`@<$1>`\n'$2',\n\n<$3>`@gs; #any text between any two closes s@`</$knownCmds>([^`]*?)`</$knownCmds>@`</$1>\n'$2',\n\n`</$3>@gs; #any text between a close and an open s@</$knownCmds>([^`]*?)<$knownCmds>@</$1>\n\n'$2',\n\n<$3>@gs; #any text after last known close s@(.*)</$knownCmds>(.*)@$1</$2>\n'$3'@s; #remove remaining negatable markers s@`@@gs; #de-tokenise negatable marker s@<caret>@`@gs; #convert non-nested tags to node calls + s@<$knownCmds>([^<>]*?)</\1>@Node->new ( $1 => '$2' ),@gs; #convert everything else s@<$knownCmds>(.*?)</\1>@Node->new ( \[ $2 \] ),@gs; #get rid of comma before end of array s@,(\s+?)\]@]@gs; say 'my @nodes = ('; say; say ');';
OUTPUT :
my @nodes = ( ' <html lang="', Node->new ( qd => 'action' ), '"> <head> <title>', Node->new ( conf => 'site_title' ), '</title> </head> <body> some orphan text ', Node->new ( [ 'actions/', Node->new ( qd => 'action' ), '/somefile.aXML', ] ), ' <b>orphan</b> ', Node->new ( conf => 'a' ), ' </body> </html> ' );