Beefy Boxes and Bandwidth Generously Provided by pair Networks
go ahead... be a heretic
 
PerlMonks  

Comment on

( #3333=superdoc: print w/ replies, xml ) Need Help??
    0: #!/usr/bin/perl -w
    1: ##############################################################################
    2: #
    3: # XML2PDF
    4: #
    5: # Author : HolyGrail
    6: # Version: 0.7
    7: #
    8: # Usage: xml2pdf {<xmlfile>}
    9: #
    10: # This conversionprogram translates XML into PDF. You can specify more than one
    11: # xmlfile at the commandline and they will all be processed. The PDF files will
    12: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with 
    13: # ".xml". It does no XML validation at all. The XML file should conform to the 
    14: # following DTD:
    15: #
    16: # <!ELEMENT document (section1)+>
    17: # <!ATTLIST document
    18: #     title CDATA #REQUIRED
    19: # >
    20: # 
    21: # <!ELEMENT section1 (section2 | p | img)+>
    22: # <!ATTLIST section1
    23: #     title CDATA #REQUIRED
    24: # >
    25: #
    26: # <!ELEMENT section2 (p | img)+>
    27: # <!ATTLIST section2
    28: #     title CDATA #REQUIRED
    29: # >
    30: #
    31: # <!ELEMENT p (img | #PCDATA)>
    32: #
    33: # <!ELEMENT img>
    34: # <!ATTLIST img
    35: #     src CDATA #REQUIRED
    36: # >
    37: # 
    38: # Known issues:
    39: # * 'img'-elements are processed after a complete 'p' element is processed, 
    40: #   so the images that are meant to come 'in' the text, come 'after' the 
    41: #   text. The workaround for this should be to split the text up in multiple 
    42: #   p-elements and put the img-element between two p-elements.
    43: # * Images that are wider than the page fall off the page on the right side
    44: #
    45: # Bugs:
    46: # * Lots of them, no doubt
    47: #
    48: # Changes:
    49: #
    50: # 0.7: 'img' tags can now also be url's and no longer just local files
    51: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD 
    52: #      accordingly.
    53: # 0.5: Changed the program call, so you can process multiple files in one call
    54: # 0.4: It now can handle sections that don't fit on one page. It starts new 
    55: #      pages if needed.
    56: # 0.3: Adapted it to conform to OeufMayo's DTD
    57: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg
    58: # 0.1: First release
    59: #
    60: ##############################################################################
    61: use strict;
    62: use PDFLib;
    63: use XML::XPath;
    64: use LWP::Simple;
    65: use vars qw ( $TOPMARGIN 
    66:               $BOTTOMMARGIN 
    67:               $LEFTMARGIN 
    68:               $RIGHTMARGIN 
    69:               $PAGEHEIGHTPOINTS 
    70:               $PAGEWIDTHPOINTS
    71: 	      $MAXYPOS
    72: 	      %FONTDEFINITION
    73: 	      %IMAGES
    74:              );
    75: # Define the fonts for each element
    76: %FONTDEFINITION = ( section1  => [face => "Helvetica-Bold", size => "16.0"],
    77:                     section2  => [face => "Helvetica-Bold", size => "14.0"],
    78:                     p         => [face => "Helvetica", size => "12.0"]
    79:                     );
    80: 
    81: # Pagesetup
    82: my $pagename      = "a4";
    83: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for 
    84: $PAGEWIDTHPOINTS  = 595; # the size of your page
    85: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15);
    86: 
    87: foreach (@ARGV)
    88: {
    89:   # Documentsetup
    90:   my $xmlfile = $_;
    91:   my $xml = XML::XPath->new(filename => $xmlfile);
    92:   s/\.xml$/\.pdf/i if /\.xml$/i;
    93:   my $pdffile =  /\.pdf$/i ? $_ : "$_.pdf";
    94:   my $pdf = PDFLib->new( filename => $pdffile,
    95:                          papersize=> $pagename,
    96:                          creator  => "XML2PDF",
    97:                          title    => (($xml->findnodes('document'))[0]->getAttribute('name'))
    98:                         );
    99:   print "Converting $xmlfile to $pdffile \n";
    100: 
    101:   # pre-process the images:
    102:   foreach my $img ($xml->findnodes('//img'))
    103:   {
    104:     my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i;
    105:     $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i;
    106:     $filetype = 'png' if $img->getAttribute('src')=~ /png$/i;
    107: 
    108:     my $filename = $img->getAttribute('src');
    109: 
    110:     if ($filename =~ /^http:\/\//i)
    111:     {
    112:       $filename =~ /.*\/(.*\..*)/;
    113:       print $1."\n";
    114:       getstore($img->getAttribute('src'), $1);
    115:       $filename=$1;
    116:     }  
    117: 
    118:     $IMAGES{$img->getAttribute('src')} =  
    119:        $pdf->load_image(filetype => $filetype || 'gif',
    120:                         filename => $filename );
    121:   }
    122: 
    123:   # Process the document
    124:   foreach my $section ($xml->findnodes('document/section1'))
    125:   {
    126:     #start every section on a new page
    127:     $pdf->start_page();
    128:     
    129:     # Calculate the starting Y-axis value
    130:     $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
    131:     
    132:     # Print the header
    133:     printpdf($pdf, $FONTDEFINITION{section1}, 
    134:              "\n".$section->getAttribute('title')."\n\n", $MAXYPOS);
    135:     
    136:     processsection($pdf, $section);
    137:   }
    138: }
    139: 
    140: sub printpdf
    141: {
    142:     my ($pdf, $font, $what, $ypos) = @_;
    143:     $pdf->set_font( @{$font} );
    144:     my $charsnotprinted = length($what);
    145:     
    146:     while ($charsnotprinted)
    147:     {
    148:       $charsnotprinted = $pdf->print_boxed($what,
    149:                           mode => "left",
    150:                           x    => $LEFTMARGIN,
    151:                           y    => $BOTTOMMARGIN,
    152:                           w    => $PAGEWIDTHPOINTS - $RIGHTMARGIN - 
    153:                                    $LEFTMARGIN,
    154:                           h    => $ypos || $pdf->get_value("texty")
    155:                        );
    156:        $what = substr($what, (length($what) - $charsnotprinted), length($what));
    157:        $pdf->start_page() if $charsnotprinted;
    158:        $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
    159:      }           
    160: }
    161: 
    162: sub processsection
    163: {
    164:   my ($pdf, $section) = @_;
    165:   foreach my $node ($section->findnodes('*'))
    166:   {
    167:     my $text;
    168:     if ($node->getName() eq "section2")
    169:     {
    170:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
    171:                "\n".$node->getAttribute('title')."\n");
    172:       processsection($pdf, $node); #recurse!
    173:     }
    174:     if ($node->getName() eq "p")
    175:     {
    176:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
    177:                "\n".$node->string_value()."\n");
    178:       processsection($pdf, $node); #recurse!
    179:     }
    180:     if ($node->getName() eq "img")
    181:     {
    182:       if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0)
    183:       {
    184:         $pdf->start_page();
    185:         $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN);
    186:       }
    187:       $pdf->add_image(img => $IMAGES{$node->getAttribute('src')},
    188:                       x   => $LEFTMARGIN,
    189: 		      y   => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
    190:       $pdf->set_text_pos($pdf->get_value('textx'), 
    191:                          $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
    192:     }
    193:     
    194:   }
    195: 
    196: }
    

In reply to XML2PDF by holygrail

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others contemplating the Monastery: (7)
    As of 2015-07-28 08:54 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?

      The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









      Results (254 votes), past polls