perlquestion
ajt
object
More Learned Monks I seek wisdom!
<p>I've recently been playing about with RSS News Feeds ([id://161895]). Overall I've found that they are easy and fun to deal with, but I found that the code I was working with could be "modulised".
<p>I've only ever designed one module before, [id://113685] - now much improved, and never a OO-module. I've got [TheDamian]'s exellect [isbn://1884777791]/<a href="http://www.manning.com/conway/">Manning Link</a>, which I've read several times.
<p>When I read the book, and other OO Tutorials, it all seems to make perfect sense, and I have no problem with using OO-modules, I'm just not convinced I know enough to make one.
<p>I've got all sorts of questions, and I'm sure that there is guidance that can help me resolve my problems. Here are some of the problems I see with my design:
<ul>
<li>Should my module's methods croak, or return error codes when they fail?</li>
<li>How much internal error checking is sensible? Some external calls will fail anyway if I pass duff data</li>
<li>Should I use generic methods that can both get and set, or is a single one each, e.g. <code>$o->debug</code> or <code>$o->set_debug(1); print $o->get_debug</code>?</li>
<li>How many interfaces are useful, e.g. a parse_string, parse_file, parse_uri, parse_filehandle. Where should I draw the line between sensible part of the module and unnessesary bloat?</li>
<li>If I want to submit it to CPAN, how do I go about it? I know CPAN has instructions, but USENET seems a scary place, and I don't know if the code is worth anything to anyone else?</li>
<li>What works best, lots of methods to set things, or via the constructor in one go?</li>
<li>Where can I look for a good tutorial for OO module design? I've seen lots of OO tutorials, but I know how to use an OO-module, and I know what OO is, I want to know how to make OO-modules.</li>
<li>I've included my prototype below, it's a very rough 0.01 version, but it does pass my tests and it actually works. I'd love constructive comments, as OO is still new to me.</li>
</ul>
<p>As ever humble thanks in adavnce...
<readmore>
<p>Here is my first stab. I have example files, and I'm working of test stuff, naturally. <strong>SCARY prototype code to follow:</strong>
<code>
# --------------------------------------------------
#
# XML::RSS::Tools
# Version 0.01
# April 2002
# Copyright iredale Consulting, all rights reserved
# http://www.iredale.net/
#
# --------------------------------------------------
# --------------------------------------------------
# Module starts here...
# --------------------------------------------------
package XML::RSS::Tools;
use 5.006;
use strict;
use warnings;
use Diagnostics;
use Carp;
#use HTTP::GHTTP; # Delayed loading
#use LWP::UserAgent; # In case you don't have GHTTP
use XML::RSS; # Handle the RSS/RDF files
use XML::LibXML; # Hand the XML file for XSL-T
use XML::LibXSLT; # Hand the XSL file and do the XSL-T
use HTML::Entities; # Try and fix entities
require Exporter;
our $VERSION = '0.01';
our @ISA = qw(Exporter);
# Preloaded methods go here.
#{
# Class Data
#}
#
# Tools Constructor
#
sub new {
my $self = shift;
my %args = @_;
bless {
_rss_version => $args{version} || 0.91,
_debug => $args{debug} || 4,
_xml_string => "",
_xsl_string => "",
_output_string => "",
_transformed => 0,
_auto_wash => $args{auto_wash} || 1,
}, $self;
}
#
# Output what we have as a string
#
sub as_string {
my $self = shift;
my $mode = shift;
if ($mode) {
if ($mode =~ /rss/i) {
croak "No RSS File to output" unless $self->{_rss_string};
return $self->{_rss_string};
} elsif ($mode =~ /xsl/i) {
croak "No XSL Template to output" unless $self->{_xsl_string};
return $self->{_xsl_string};
} else {
croak "Unknown mode: $mode";
}
} else {
croak "Nothing to output..." unless $self->{_transformed};
return $self->{_output_string};
}
}
#
# Set/Read the debug level
#
sub debug {
my $self = shift;
my $debug = shift;
$self->{_debug} = $debug if defined $debug;
return $self->{_debug};
}
#
# Set/Read the auto_wash level
#
sub auto_wash {
my $self = shift;
my $wash = shift;
$self->{_auto_wash} = $wash if defined $wash;
return $self->{_auto_wash};
}
#
# Get the RSS Version
#
sub get_version {
my $self = shift;
return $self->{_rss_version};
}
#
# Set the RSS Version
#
sub set_version {
my $self = shift;
my $version = shift;
$self->{_rss_version} = $version if defined $version;
return $self->{_rss_version};
}
#
# Load an RSS file, and call RSS conversion to standard RSS format
#
sub rss_file {
my $self = shift;
my $file_name = shift;
_check_file($file_name);
open SOURCE_FILE, "<$file_name" or croak "Unable to open $file_name for reading";
$self->{_rss_string} = _load_filehandle(\*SOURCE_FILE);
close SOURCE_FILE;
_parse_rss_string($self);
$self->{_transformed} = 0;
}
#
# Load an XSL file
#
sub xsl_file {
my $self = shift;
my $file_name = shift;
_check_file($file_name);
open SOURCE_FILE, "<$file_name" or croak "Unable to open $file_name for reading";
$self->{_xsl_string} = _load_filehandle(\*SOURCE_FILE);
close SOURCE_FILE;
$self->{_transformed} = 0;
}
#
# Load an RSS file via HTTP and call RSS conversion to standard RSS format
#
sub rss_uri {
my $self = shift;
my $uri = shift;
$self->{_rss_string} = _http_get($uri);
_parse_rss_string($self);
$self->{_transformed} = 0;
}
#
# Load an XSL file via HTTP
#
sub xsl_uri {
my $self = shift;
my $uri = shift;
$self->{_xsl_string} = _http_get($uri);
$self->{_transformed} = 0;
}
#
# Parse a string and convert to standard RSS
#
sub rss_string {
my $self = shift;
my $xml = shift;
croak "Nothing to parse" unless $xml;
$self->{_rss_string} = $xml;
_parse_rss_string($self);
$self->{_transformed} = 0;
}
#
# Import an XSL from string
#
sub xsl_string {
my $self = shift;
my $xml = shift;
croak "No XSL-T" unless $xml;
_$self->{_xsl_string} = $xml;
$self->{_transformed} = 0;
}
#
# Do the transformatoin
#
sub transform {
my $self = shift;
croak "No XSL-T loaded" unless $self->{_xsl_string};
croak "No RSS loaded" unless $self->{_rss_string};
croak "Can't transform twice without a change" if $self->{_transformed};
my $xslt = XML::LibXSLT->new;
my $xml_parser = XML::LibXML->new;
$xml_parser->keep_blanks(0);
my $source_xml = $xml_parser->parse_string($self->{_rss_string}); # Parse the source XML
my $style_xsl = $xml_parser->parse_string($self->{_xsl_string}); # and Template XSL files
my $stylesheet = $xslt->parse_stylesheet($style_xsl); # Load the parsed XSL into XSLT
my $result_xml = $stylesheet->transform($source_xml); # Transform the source XML
$self->{_output_string} = $stylesheet->output_string($result_xml); # Store the result
$self->{_transformed} = 1;
}
# ---------------
# Private Methods
# ---------------
#
# Parse the RSS string
#
sub _parse_rss_string {
my $self = shift;
my $xml = $self->{_rss_string};
$xml = _wash_xml($xml) if $self->{_auto_wash};
my $rss = XML::RSS->new;
$rss->parse($xml);
if ($rss->{version} != $self->{_rss_version}) {
$rss->{output} = $self->{_rss_version};
$xml = $rss->as_string;
}
$self->{_rss_string} = $xml;
}
#
# Load file from File Handle
#
sub _load_filehandle {
my $handle = shift;
my $content;
while (<$handle>) {
$content .= $_
}
return $content;
}
#
# Wash the XML File of known nasties
#
sub _wash_xml {
my $xml = shift;
my %entity = (
trade => "™",
);
my $entities = join('|', keys %entity);
decode_entities($xml); # Try and deal with known entities
$xml =~ s/&($entities);/$entity{$1}/g; # Deal with odd entities
$xml =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g; # Matt's ampersand entity fixer
$xml =~ s/\s+/ /gs;
$xml =~ s/> />/g;
return $xml
}
#
# Check that the requested file is there and readable
#
sub _check_file {
my $file_name = shift;
croak "No file name supplied" unless $file_name;
croak "Cannot find $file_name" unless -e $file_name;
croak "$file_name isn't a real file" unless -f _;
croak "Cannot read $file_name" unless -r _;
croak "$file_name is zero bytes long" if -z _;
}
#
# Grab something via HTTP
#
sub _http_get {
my $uri = shift;
my $xml;
eval { # Try and use Gnome HTTP, it's faster
require HTTP::GHTTP;
};
if ($@) { # Otherwise use LWP
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("iC-XML::RSS::Tools/$VERSION " . $ua->agent . " ($^O)");
my $response = $ua->request(HTTP::Request->new('GET', $uri));
croak "HTTP error: " . $response->status_line if $response->is_error;
$xml = $response->content();
} else {
my $r = HTTP::GHTTP->new($uri);
$r->process_request;
$xml = $r->get_body;
croak "HTTP error: Unable to connect to server.\n" unless $xml;
my ($status, $message) = $r->get_status;
croak "HTTP error: $status, $message\n" unless $status == 200;
}
return $xml;
}
1;
__END__
=head1 NAME
XML::RSS::Tools - Perl extension for very high level RSS Feed manipulation
=head1 SYNOPSIS
use XML::RSS::Tools;
my $rss_feed = XML::RSS::Tools->new;
$rss_feed->rss_uri('http:://foo/bar.rdf');
$rss_feed->xsl_file('/my/rss_transformation.xsl');
$rss_feed->transform;
print $rss_feed->as_string;
=head1 DESCRIPTION
RSS/RDF feeds are commonly available ways of distributing the latest news about a given web site for news syndication. This module provides a VERY high level way of manipulating them. You can easily use LWP, the XML::RSS and XML::LibXSLT do to this yourself.
=head2 EXPORT
None by default.
=head1 AUTHOR
Adam Trickett, E<lt>adam@iredale.net<gt>
=head1 SEE ALSO
C<perl>, C<XML::RSS>, C<XML::LibXSLT>, C<LWP> and C<HTTP::GHTTP>.
=head1 COPYRIGHT
XML::RSS::Tools, Copyright iredale Consulting 2002
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
=cut
</code>