http://www.perlmonks.org?node_id=944731

wrinkles has asked for the wisdom of the Perl Monks concerning the following question:

Good Evening Monks,

I have written a simple Movable Type plugin that creates a MT tag modifier which takes the text output from an MT tag, finds and parses a CSV string, then returns the data as a simple (no nested data) hash or array or string which is then captured as an MT template variable. All within an ordinary template tag.

It is on GitHub

The data fields are extracted from the surrounding content and from neighboring data with index and substr functions. I use the split function to parse the CSV into arrays and hashes, which is certainly lightweight, but of course brittle.

I'd appreciate any tips to make the code more robust. Any dependencies should be pure perl so I can just drop them in a extlib directory.

Most of the Text:CSV type modules read from files, so I was unable to make them work with a string variable.

The plugin code follows. Any advice appreciated.

package MoreData::Plugin; use MT 4.2; use warnings; use strict; use MT::Util qw(decode_html); sub moredata { my ($str, $val, $ctx) = @_; # val is the data name and optional form +at (or default format) $str = decode_html($str); my $dataname; # retrieved from $val my $content; # extracted from $str my $datastring; # extracted from $str my $result; # processed from $datastring # retrieve configuration defaults my $config = _moredata_config($ctx); # open tag is required my $opentag = $config->{'moredata_opentag'} ? $config->{'moredata_op +entag'} : ''; die "MoreData requires an open tag: $!" unless ($opentag); my $closetag = $config->{'moredata_closetag'} ? $config->{'moredata_ +closetag'} : ''; # not required # retrieve format and data separator from tag parameters my $format_cfg = $config->{'moredata_format'}; my $format = ''; if (ref($val) eq 'ARRAY') { $dataname = defined($val->[0]) ? $val->[0] : '' ; #dataname can be + empty $format = $val->[1] ? ($val->[1]) : $format_cfg ; } else { $dataname = $val ? $val : ''; $format = $format_cfg } die "No format specified in args or configuration: $!" unless $forma +t; # extract datastrings my $substrings_aref = _retrieve_strings($str, $opentag, $closetag, $ +dataname); unless (scalar($substrings_aref)) { # no stringrefs returned die "cannot extract substrings from string: $!"; } $content = _trim($$substrings_aref[0]); return $content if $dataname eq "__content__"; $datastring = _trim($$substrings_aref[1]); return $datastring if ($dataname eq '__data__'); # return all data +as string return $datastring unless (length($datastring)); # no sense in proce +ssing an empty string my $datasep_cfg = $config->{'moredata_datasep'}; length($datasep_cfg) or die "The data separation string must be conf +igured: $!"; my $hashsep_cfg = $config->{'moredata_hashsep'} || ''; length($hashsep_cfg) or die "The hash separation string must be conf +igured: $!"; # send data and parameters to desired format for result if ($format eq 'array') { $result = _moredata_array($datastring, $datasep_cfg); } elsif ($format eq 'hash') { $result = _moredata_hash($datastring, $datasep_cfg, $hashsep_cfg); } elsif ($format eq 'string') { $result = $datastring; } else { die "no format : $!"; } return $result; } sub _moredata_array { my ($datastring, $datasep_cfg) = @_; my @list = split /\s*$datasep_cfg\s*/, $datastring; scalar @list ? \@list : undef; } sub _moredata_hash { my ($datastring, $datasep_cfg, $hashsep_cfg) = @_; my $datasep = _escape($datasep_cfg); my $hashsep = _escape($hashsep_cfg); my %hash; my @list = split /\s*$datasep\s*/, $datastring; foreach my $item (@list) { my $count = my @array = split(/\s*$hashsep\s*/, $item); die "Count of $count is not a pair of values in hash assignment of + values @array : $!" if ($count != (2 || 0)); $hash{$array[0]} = $array[1]; } scalar %hash ? \%hash : undef; } sub _moredata_config { my $ctx = shift; my $plugin = MT->component("MoreData"); my $blog = $ctx->stash('blog'); if ( !$blog ) { my $blog_id = $ctx->var('blog_id'); $blog = MT->model('blog')->load($blog_id); } my $blog_id = $blog->id; my $scope = "blog:" . $blog_id; my $config = $plugin->get_config_hash($scope); return $config; } # return content and data strings from string and tags sub _retrieve_strings { my ($str, $opentag, $closetag, $dataname) = @_; my $content = $str; my $datastring = ''; # content is all non-data, datastring all data my @substrings = ($content, $datastring); die "MoreData requires an open tag in plugin configuration: $!" unle +ss (length($opentag)); # check that we have a string with length. my $stringlength = length($str); return \@substrings unless ($stringlength); # return unless opentag is found in string my $openposition = index($str,$opentag); # start of _all_ the data return \@substrings if ($openposition == -1); # close tag cannot be subset of open tag die "MoreData close tag cannot be substring of the open tag: $!" unless (index($opentag,$closetag) == -1); # extract the content and data strings my $closeposition; # end of _all_ the data if ((length($closetag)) && (rindex($str,$closetag) != -1)) { # close +tag found $closeposition = index($str,$closetag); } else { $closeposition = $stringlength; } my $datalength = $closeposition - $openposition; # length of _all_ d +ata die "MoreData open tag must precede the close tag: $!" if ($dataleng +th < 0); #tags in wrong order; $content = substr($str, 0, $openposition) . substr($str, ($closeposi +tion + length($closetag)), $stringlength); #length argument can be be +yond the end # data includes open tag but not close tag $datastring = substr($str, $openposition, $datalength); # _all_ data @substrings = ($content, $datastring); return \@substrings if ($dataname eq '__data__'); # return content a +nd data as strings return \@substrings if ($dataname eq '__content__'); # return conten +t and data as strings return \@substrings unless length($datastring); # no reason to proce +ss empty datastring # search for named daatastring my $nameopentag = $opentag . $dataname . '='; # selected data requir +es the equal sign appended # locate the desired selected data within the datastring my $nameopenposition = index($datastring, $nameopentag); if ($nameopenposition == -1) { # if named data can't be found, retur +n substrings with empty datastring @substrings = ($content, ''); return \@substrings; } my $length_tagname = length($nameopentag); my $namecloseposition; if (index($datastring, $opentag, $nameopenposition + 1) != -1 ) { # + if we find another opentag following current one $namecloseposition = index($datastring, $opentag, $nameopenpositio +n + 1); # find next opentag } elsif (index($datastring, $closetag, $nameopenposition + 1) != -1 +) { # otherwise use closing tag $namecloseposition = index($datastring, $closetag, $nameopenpositi +on + 1); } else { # else just use end of file $namecloseposition = $datalength; } $datalength = $namecloseposition - $nameopenposition - $length_tagna +me; my $datastring = substr($datastring, $nameopenposition + $length_tag +name, $datalength); @substrings = ($content, $datastring); return \@substrings; } sub _escape { my $separator = shift; my $regex = '\\' . join('\\', split(/''/, $separator) ); return $regex; } # Perl trim function to remove whitespace from the start and end of th +e string sub _trim { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } 1; # Copyright 2011 Rick Bychowski # This program is free software; you can redistribute it and/or modify + it # under the terms of either: the GNU General Public License as publish +ed # by the Free Software Foundation; or the Artistic License.