1: #!/usr/bin/perl -w
2: ##############################################################################
3: #
4: # XML2PDF
5: #
6: # Author : HolyGrail
7: # Version: 0.7
8: #
9: # Usage: xml2pdf {<xmlfile>}
10: #
11: # This conversionprogram translates XML into PDF. You can specify more than one
12: # xmlfile at the commandline and they will all be processed. The PDF files will
13: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with
14: # ".xml". It does no XML validation at all. The XML file should conform to the
15: # following DTD:
16: #
17: # <!ELEMENT document (section1)+>
18: # <!ATTLIST document
19: # title CDATA #REQUIRED
20: # >
21: #
22: # <!ELEMENT section1 (section2 | p | img)+>
23: # <!ATTLIST section1
24: # title CDATA #REQUIRED
25: # >
26: #
27: # <!ELEMENT section2 (p | img)+>
28: # <!ATTLIST section2
29: # title CDATA #REQUIRED
30: # >
31: #
32: # <!ELEMENT p (img | #PCDATA)>
33: #
34: # <!ELEMENT img>
35: # <!ATTLIST img
36: # src CDATA #REQUIRED
37: # >
38: #
39: # Known issues:
40: # * 'img'-elements are processed after a complete 'p' element is processed,
41: # so the images that are meant to come 'in' the text, come 'after' the
42: # text. The workaround for this should be to split the text up in multiple
43: # p-elements and put the img-element between two p-elements.
44: # * Images that are wider than the page fall off the page on the right side
45: #
46: # Bugs:
47: # * Lots of them, no doubt
48: #
49: # Changes:
50: #
51: # 0.7: 'img' tags can now also be url's and no longer just local files
52: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD
53: # accordingly.
54: # 0.5: Changed the program call, so you can process multiple files in one call
55: # 0.4: It now can handle sections that don't fit on one page. It starts new
56: # pages if needed.
57: # 0.3: Adapted it to conform to OeufMayo's DTD
58: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg
59: # 0.1: First release
60: #
61: ##############################################################################
62: use strict;
63: use PDFLib;
64: use XML::XPath;
65: use LWP::Simple;
66: use vars qw ( $TOPMARGIN
67: $BOTTOMMARGIN
68: $LEFTMARGIN
69: $RIGHTMARGIN
70: $PAGEHEIGHTPOINTS
71: $PAGEWIDTHPOINTS
72: $MAXYPOS
73: %FONTDEFINITION
74: %IMAGES
75: );
76: # Define the fonts for each element
77: %FONTDEFINITION = ( section1 => [face => "Helvetica-Bold", size => "16.0"],
78: section2 => [face => "Helvetica-Bold", size => "14.0"],
79: p => [face => "Helvetica", size => "12.0"]
80: );
81:
82: # Pagesetup
83: my $pagename = "a4";
84: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for
85: $PAGEWIDTHPOINTS = 595; # the size of your page
86: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15);
87:
88: foreach (@ARGV)
89: {
90: # Documentsetup
91: my $xmlfile = $_;
92: my $xml = XML::XPath->new(filename => $xmlfile);
93: s/\.xml$/\.pdf/i if /\.xml$/i;
94: my $pdffile = /\.pdf$/i ? $_ : "$_.pdf";
95: my $pdf = PDFLib->new( filename => $pdffile,
96: papersize=> $pagename,
97: creator => "XML2PDF",
98: title => (($xml->findnodes('document'))[0]->getAttribute('name'))
99: );
100: print "Converting $xmlfile to $pdffile \n";
101:
102: # pre-process the images:
103: foreach my $img ($xml->findnodes('//img'))
104: {
105: my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i;
106: $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i;
107: $filetype = 'png' if $img->getAttribute('src')=~ /png$/i;
108:
109: my $filename = $img->getAttribute('src');
110:
111: if ($filename =~ /^http:\/\//i)
112: {
113: $filename =~ /.*\/(.*\..*)/;
114: print $1."\n";
115: getstore($img->getAttribute('src'), $1);
116: $filename=$1;
117: }
118:
119: $IMAGES{$img->getAttribute('src')} =
120: $pdf->load_image(filetype => $filetype || 'gif',
121: filename => $filename );
122: }
123:
124: # Process the document
125: foreach my $section ($xml->findnodes('document/section1'))
126: {
127: #start every section on a new page
128: $pdf->start_page();
129:
130: # Calculate the starting Y-axis value
131: $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
132:
133: # Print the header
134: printpdf($pdf, $FONTDEFINITION{section1},
135: "\n".$section->getAttribute('title')."\n\n", $MAXYPOS);
136:
137: processsection($pdf, $section);
138: }
139: }
140:
141: sub printpdf
142: {
143: my ($pdf, $font, $what, $ypos) = @_;
144: $pdf->set_font( @{$font} );
145: my $charsnotprinted = length($what);
146:
147: while ($charsnotprinted)
148: {
149: $charsnotprinted = $pdf->print_boxed($what,
150: mode => "left",
151: x => $LEFTMARGIN,
152: y => $BOTTOMMARGIN,
153: w => $PAGEWIDTHPOINTS - $RIGHTMARGIN -
154: $LEFTMARGIN,
155: h => $ypos || $pdf->get_value("texty")
156: );
157: $what = substr($what, (length($what) - $charsnotprinted), length($what));
158: $pdf->start_page() if $charsnotprinted;
159: $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
160: }
161: }
162:
163: sub processsection
164: {
165: my ($pdf, $section) = @_;
166: foreach my $node ($section->findnodes('*'))
167: {
168: my $text;
169: if ($node->getName() eq "section2")
170: {
171: printpdf($pdf, $FONTDEFINITION{$node->getName()},
172: "\n".$node->getAttribute('title')."\n");
173: processsection($pdf, $node); #recurse!
174: }
175: if ($node->getName() eq "p")
176: {
177: printpdf($pdf, $FONTDEFINITION{$node->getName()},
178: "\n".$node->string_value()."\n");
179: processsection($pdf, $node); #recurse!
180: }
181: if ($node->getName() eq "img")
182: {
183: if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0)
184: {
185: $pdf->start_page();
186: $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN);
187: }
188: $pdf->add_image(img => $IMAGES{$node->getAttribute('src')},
189: x => $LEFTMARGIN,
190: y => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
191: $pdf->set_text_pos($pdf->get_value('textx'),
192: $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
193: }
194:
195: }
196:
197: }
Re: XML2PDF by davorg (Chancellor) on Jul 02, 2001 at 18:05 UTC |
That looks very interesting. I'd never heard of
PDFLib.pm (or, indeed, pdflib). I'll be taking a closer look
at them.
One question tho'. You declare a number of package
variables with this code:
use vars qw ( $TOPMARGIN
$BOTTOMMARGIN
$LEFTMARGIN
$RIGHTMARGIN
$PAGEHEIGHTPOINTS
$PAGEWIDTHPOINTS
);
And then later you declare a number of lexical variables
with the same names with this code:
my $PAGEHEIGHTPOINTS = 842;
my $PAGEWIDTHPOINTS = 595; # the size of your page
my ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN,
$RIGHTMARGIN) = (10, 10, 15, 15);
You then go on to use the lexical variables, but never
the package variables. Is this left over from an earlier
version of the code?
--
<http://www.dave.org.uk>
Perl Training in the UK <http://www.iterative-software.com> | [reply] [d/l] [select] |
|
Absolutely! And thank you, I've updated it now!
--HolyGrail
| [reply] |
Re: XML2PDF by the_0ne (Pilgrim) on Jul 03, 2001 at 08:42 UTC |
I finally find somebody that is using pdflib also. We've used this module for over 2 1/2 years on a pretty large shipping company's site. Back then there was no real pdf perl module, so pdflib was what we used. It was pretty easy and very stable. In fact, so stable, that we're still using version 2.0 and they're now up to 4.x. No sense changing what isn't broken.
Thanks for the node.
Update
Forgot to mention the use of XML is great. Starting to make me rethink our site design now. :) | [reply] |
|
I've been using it as well and while it has really saved us tons in development time it doesn't have or do everything we need.
Anyone interested in PDF document dispatch might also wish to check out HTMLDOC
It can be called in a Perl script using system. I use a homegrown templating routine to generate an RG authorization complete with tabular layout, store it temporaily as HTML from a MySQL table, and then pass the temp file name to HTMLDOC and poof! instant PDF file ready for faxing.
It's even easier than PDFLib. Here's a fragment from an actual app
our $template;
&template_fun;
$template =~ s/\#([^\#])\#/${$1}/ge;
# see update note
my $tmstamp = localtime;
$tmstamp =~s/ |:|-/_/g;
my $tmpfile = "D:\\pdfs\\" . $tmstamp . ".html";
open (HTMLFILE, ">$tmpfile") || die "cannot do it: $!";
print HTMLFILE "$template";
close(HTMLFILE);
## here's where we make the actual system call
chdir "D:\\HTMLDOC\\";
system("htmldoc -t pdf --webpage -f D:\\pdfs\\" . $RGA_num . ".pdf $tm
+pfile");
unlink $tmpfile;
sub template_fun(){
$template = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
<TITLE></TITLE>
</HEAD>
<BODY>
<center>Returned Goods Authorization</center>
<TABLE border=0 width=595>
<TR>
<TD width=85 valign=top>
<P>
<IMG SRC="D:\inetpub\wwwroot\images\D2.jpg" ALIGN=left><BR cle
+ar=all>
</TD>
<TD width=450 valign=top><H2>#companyName#<BR>
<font size=-2>#ourphone# #ourfax#</font></H2>
</TD>
</TR>
<TR>
<TD width=85> </TD>
<TD width=450>
<TABLE BORDER=0 width=350>
<TR>
<TD valign=top>To: #attn#<BR>
Company: #cust_name#<BR>
Phone: #phone#<BR>
#dmethod#:
#fax#
</TD>
<TD valign=top>From: #tech#<BR>
Fax back: #our_fax#<BR>
Reason for return: #code#<BR>
Date: #date#<BR>
Order number: #ord_num#
</TD>
</TR>
</TABLE><P>
<font size=+1>RGA number: #RGA_num#</font> <font size=2>(Write thi
+s number on the outside of your package)</font>
#raddress#
</TD>
</TR>
</TABLE>
</BODY>
</HTML>';
}
# Update: just read Death to Dot Star! | [reply] [d/l] |
|
|