Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Comment on

( #3333=superdoc: print w/ replies, xml ) Need Help??
The code of pdfmerger.pl
=pod =head1 NAME pdfmerger.pl =head1 SYNOPSIS $perl pdfmerger.pl -outfile <file> [-inpath <directory>] [-inmask <regex>] [-logfile <file>] [-engine <pdf-engine>] [-order <file 1>] [-order <file 2>] [-order <file n> +] [-bookmark] [-help] [@optionsfile] =head1 ARGUMENTS -outfile : req. : none : file to create -inpath : opt. : ./ : path to source-files -inmask : opt. : /./ : regex to match files (no glob! +) -logfile : opt. : pdfmerger.log : name of the logfile -bookmark : opt. : false : add bookmarks -help : opt. : no : show help -@optionsfile : opt. : none : file with options -order : opt. : none : sort merged files -engine : opt. : reuse : choose between api2|acro|reuse -backup : opt. : false : backup existing -outfile =head1 DESCRIPTION: pdfmerger.pl merges multiple Adobe PDF-files to a single document. it supports multiple "engines" to produce pdf, namly =over 3 =item PDF::API2 due to a bug in PDF::Api2, it seems to be impossible to merge files that were merged before. =item PDF::Reuse I encountered a bug with Reuse and documents that were produced by MS Word/Acrobat PDF::Writer. =item Win32 Acrobat OLE-Automation No bookmarks, due to the limitations of the Acrobat-Object-Model. =back When the -order option is used only the mentioned files are merged. @optionsfile is a file that contains options for the script, one per l +ine. This is especially convenient for storing long -order - lists. =head1 AUTOR <a href="http://holli.perlmonk.org/">holli</a> =head1 PREREQUISITIES: Getopt::ArgvFile Getopt::Attribute POD::Usage File::Log::Shortcut File::Copy File::Spec File::Basename PDF::API2; PDF::Reuse; Win32::OLE =head1 HISTORY: version 0.1 =cut # version our $VERSION = 0.1; # modules use strict; use warnings; use Getopt::ArgvFile; use Getopt::Attribute; use Pod::Usage; use File::Log::Shortcut; use File::Spec qw(rel2abs); use File::Copy; use File::Basename qw (fileparse); use PDF::API2; use PDF::Reuse; # arguments our $inpath : Getopt(inpath=s ./); our $inmask : Getopt(inmask=s .); our $outfile : Getopt(outfile=s); our $logfile : Getopt(logfile=s pdfmerger.rpt); our $bookmark : Getopt(bookmark); our $help : Getopt(help); our $engine : Getopt(engine=s api2); our @order : Getopt(order=s); our $backup : Getopt(backup); # Help? pod2usage(-exitval=>0, -verbose=>1) if $help; # inpath? pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=> +1, -verbose=>1) unless -e $inpath && -d $inpath; $inpath .= "/" unless $inpath =~ m:/$:; # outfile? pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1) unless $outfile; pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verb +ose=>1) if -e $outfile && -f $outfile; # build file list my $i=0; my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pd +f" ); $i=0; my %order = @order ? map { $inpath.$_ => ++$i } @order : %files; my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order +{$b} } keys %order; die "no files to merge!\n" unless @dateien; # create log my $log = File::Log::Shortcut->new ( { storeExpText => 1, # Store internally all + exp text authorName => "Holli", logFileName => $logfile, versionFrom => "20050206", } ); $log->pProgramHeader(2); # create pdf eval { $log->msg(2, "using Engine: $engine\n"); write_pdfs ($outfile, @dateien); }; # catch errors if ( $@ ) { $log->msg(2,"\n!**ABORT**!\n$@\n$!\n"); exit 1; } $log->pProgramFooter(2); #engine-dispatcher sub write_pdfs { my ($outputpdf, $outdir) = fileparse ($_[0]); if ( $backup ) { if ( -e "$outdir$outputpdf") { # backup existing file my $i=0; while ( -e "$outdir$outputpdf.$i" ) { $i++ }; move ("$outdir$outputpdf", "$outdir$outputpdf.$i") or die "File '$outputpdf' cannot be renamed\n"; $log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n"); } } else { unlink "$outdir$outputpdf" if -e "$outdir$outputpdf"; } if ( $engine eq "acrobat" ) { write_pdf_acro (@_); } elsif ( $engine eq "api2" ) { write_pdf_api2 (@_); } elsif ( $engine eq "reuse" ) { write_pdf_reuse (@_); } else { die "Unknow PDF-Engine!\n"; } -e "$outdir$outputpdf" ? $log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$ +outputpdf : ") : die "File '$outputpdf' not written!\n"; } sub write_pdf_api2 { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; my ( $file, $pdf, $root, ); $pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf ); #default mediabox to A4 $pdf->mediabox (0,0,594.9,841.3597); $root = $pdf->outlines; my $import_page = 0; my $document_page = 0; foreach $file ( @pdfFiles ) { my ($inputpdf, $inputdir) = fileparse (shift @_); my $input = PDF::API2->open( $file ); my @pages = 1 .. $input->pages; if ( scalar @pages > 0 ) { my $outline; $outline = $root->outline if $bookmark; foreach ( @pages ) { ++$import_page; ++$document_page; my $page = $pdf->importpage($input, $_, $import_page); if ( $bookmark ) { # create bookmark my ($bmtext) = ($inputpdf =~ /([^\.]+)/ ); $outline->title($bmtext); my $bm = $outline->outline; $bm->title("page $document_page"); $bm->dest($page); $outline->dest($page) if $document_page == 1; $outline->closed; } } } $log->pFileInfo(2, "$file", "Processsing $file : "); } $pdf->preferences( -outlines => 1 ) if $bookmark; $pdf->update; $pdf->end; } sub write_pdf_acro { eval "use Win32::OLE;"; my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = map { File::Spec->rel2abs($_) } @_; my ( $_empty_pdf, $file, $bigpdf, $singlepdf, $fillpdf, $acro, ); my $pages=0; $singlepdf = Win32::OLE->new('AcroExch.AVDoc'); $bigpdf = Win32::OLE->new('AcroExch.AVDoc'); prFile ($outdir."_empty_tmp_.pdf"); prPage; prEnd; $bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), "") +; foreach $file (@pdfFiles) { $log->pFileInfo(2, $file, "Processsing $file : "); $singlepdf->open($file, ""); $bigpdf->GetPDDoc->InsertPages ( $bigpdf->GetPDDoc->GetNumPages-1, $singlepdf->GetPDDoc(), 0, $singlepdf->GetPDDoc->GetNumPages, 1 ); $pages += $singlepdf->GetPDDoc->GetNumPages; $singlepdf->close(1); } $bigpdf->GetPDDoc->DeletePages ( 0, 0 ); unless ( $bigpdf->GetPDDoc->GetNumPages == $pages ) { my $p = $bigpdf->GetPDDoc->GetNumPages; $bigpdf->close(1), #bug in acrobat? some files just donīt get merged die "not all pages inserted. shoud be: $pages, is: $p!\n"; } $bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf" +)); $bigpdf->close(1); unlink $outdir."_empty_.pdf"; } sub write_pdf_reuse { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; prFile ("$outdir$outputpdf"); my $pages=0; for ( @pdfFiles ) { my $page=prDoc($_); if ( $bookmark ) { my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i); prBookmark({ text => $inputpdf, act => "this.pageNum = +$pages;"}); } $log->pFileInfo(2, $_, "Processsing $_ : "); $pages += $page; } prEnd; } __END__
The code of File::Log::Shortcut
package File::Log::Shortcut; use strict; use warnings; use POSIX qw(strftime); our $VERSION = "0.1"; use base qw(File::Log); sub new { my $proto = shift; my $class = ref($proto) || $proto; my %args = %{$_[0]}; %args = map { lc($_) => $args{$_} } keys %args ; my $self = $class->SUPER::new(@_); my %defaults = ( appname => ($main::0 =~ /(?:^|[\/\\])([^\/\\]+)$/)[-1] +, appversion => $main::VERSION || "", authorname => "", _startedepoc => time, versionfrom => strftime( "%Y%m%d-%X", localtime( (stat($m +ain::0))[9] ) ) , startedat => strftime( "%Y%m%d-%X", localtime() ), ); for ( keys %defaults ) { $self->{$_} = defined $args{$_} ? $args{$_} : $defaults{$_}; } return $self; } sub pProgramHeader { my $self = shift; $self->msg(shift, $self->programHeader); } sub programHeader { my $self = shift; my $msg = ""; $msg .= $self->{appname}; $msg .= " Version $self->{appversion}" if $self->{appversion}; $msg .= " from $self->{versionfrom}" if $self->{versionfrom}; $msg .= "\n"; $msg .= "Author: $self->{authorname}\n" if $self->{authorname}; $msg .= "Started at $self->{startedat}\n" if $self->{startedat}; return $msg; } sub pProgramFooter { my $self = shift; my $debug = shift; $self->msg($debug, $self->programFooter); } sub programFooter { my $self = shift; return "Ended at " . strftime( "%Y%m%d-%X", localtime() ) . " afte +r " . (time-$self->{_startedepoc}) . "\n"; } sub pFileInfo { my $self = shift; my $debug = shift; my $file = shift; my $msg = shift || ""; $self->msg($debug, $self->fileInfo ($file, $msg)); } sub fileInfo { my $self = shift; my $file = shift; my $msg = shift || ""; return $msg . strftime( "%Y%m%d-%X", localtime( (stat($file))[9] + ) ) . "\n"; } 1; __END__ =head1 NAME File::Log::Shortcut =head1 SYNOPSIS my $log = File::Log::Shortcut->new ( { # the author of the program, defaults to "" authorName => "Holli", # date of release, defaults to "last changed" of the script-f +ile versionFrom => "someday", # the scripts version, defaults to $main::VERSION or "" appVersion => "0.1", } ); $log->pProgramHeader(2); sleep(2); open OUT , ">", "testout"; close OUT; sleep(2); $log->pFileInfo(2, "testout","File created: "); $log->pProgramFooter(2); $log->close(); =head1 DESCRIPTION This is a subclass of File::Log, that offers some convenience. So ever +ything in the File::Log-documentation is true for File::Log::Shortcut =head2 methods =over 6 =item pProgramHeader ($debug); calls msg() of the parent class with $debug and a string like the foll +owing: test.pl Version 0.1 from someday Author: Holli Started at 20050206-04:57:49 =item programHeader returns the input for pProgramHeader() =item pProgramFooter ($debug); calls msg() of the parent class with $debug and a string like the foll +owing: Ended at 20050206-04:57:53 after 4 =item programFooter returns the input for pProgramFooter() =item pFileInfo ($debug, $filename, $message); calls msg() of the parent class with $debug and a string like the foll +owing: File created: 20050205-21:47:16 =item fileInfo ($filename, $message) returns the input for pFileInfo =back So, $log->pFileInfo (2, "file", "message"); is equivalent to $log->msg(2, $log->fileInfo ("file", "message")); =head1 AUTHOR holli

In reply to merging pdf-files: pdf-merger.pl by holli

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • Outside of code tags, you may need to use entities for some characters:
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others wandering the Monastery: (7)
    As of 2014-09-23 23:21 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?

      How do you remember the number of days in each month?











      Results (241 votes), past polls