<?xml version="1.0" encoding="windows-1252"?>
<node id="428482" title="merging pdf-files: pdf-merger.pl" created="2005-02-06 10:07:12" updated="2005-08-10 15:20:18">
<type id="1748">
sourcecode</type>
<author id="241598">
holli</author>
<data>
<field name="doctext">
The code of pdfmerger.pl
&lt;code&gt;
=pod

=head1 NAME

pdfmerger.pl

=head1 SYNOPSIS

 $perl pdfmerger.pl -outfile &lt;file&gt;
                  [-inpath &lt;directory&gt;]
                  [-inmask &lt;regex&gt;]
                  [-logfile &lt;file&gt;]
                  [-engine &lt;pdf-engine&gt;]
                  [-order &lt;file 1&gt;] [-order &lt;file 2&gt;] [-order &lt;file n&gt;]
                  [-bookmark]
                  [-help]
                  [@optionsfile]

=head1 ARGUMENTS

 -outfile      : req. : none          : file to create
 -inpath       : opt. : ./            : path to source-files
 -inmask       : opt. : /./           : regex to match files (no glob!)
 -logfile      : opt. : pdfmerger.log : name of the logfile
 -bookmark     : opt. : false         : add bookmarks
 -help         : opt. : no            : show help
 -@optionsfile : opt. : none          : file with options
 -order        : opt. : none          : sort merged files
 -engine       : opt. : reuse         : choose between api2|acro|reuse
 -backup       : opt. : false         : backup existing -outfile

=head1 DESCRIPTION:

pdfmerger.pl merges multiple Adobe PDF-files to a single document.
it supports multiple "engines" to produce pdf, namly

=over 3

=item PDF::API2

 due to a bug in PDF::Api2, it seems to be impossible to merge files
 that were merged before.

=item PDF::Reuse

  I encountered a bug with Reuse and documents that were produced by
  MS Word/Acrobat PDF::Writer.

=item Win32 Acrobat OLE-Automation

  No bookmarks, due to the limitations of the Acrobat-Object-Model.

=back

When the -order option is used only the mentioned files are merged.

@optionsfile is a file that contains options for the script, one per line. This is especially
convenient for storing long -order - lists.


=head1 AUTOR

&lt;a href="http://holli.perlmonk.org/"&gt;holli&lt;/a&gt;


=head1 PREREQUISITIES:

 Getopt::ArgvFile
 Getopt::Attribute
 POD::Usage
 File::Log::Shortcut
 File::Copy
 File::Spec
 File::Basename
 PDF::API2;
 PDF::Reuse;
 Win32::OLE

=head1 HISTORY:

version 0.1

=cut
# version
our $VERSION = 0.1;

# modules
use strict;
use warnings;

use Getopt::ArgvFile;
use Getopt::Attribute;

use Pod::Usage;

use File::Log::Shortcut;
use File::Spec qw(rel2abs);

use File::Copy;
use File::Basename qw (fileparse);

use PDF::API2;
use PDF::Reuse;


# arguments
our $inpath   : Getopt(inpath=s ./);
our $inmask   : Getopt(inmask=s .);
our $outfile  : Getopt(outfile=s);
our $logfile  : Getopt(logfile=s pdfmerger.rpt);
our $bookmark : Getopt(bookmark);
our $help     : Getopt(help);
our $engine   : Getopt(engine=s api2);
our @order    : Getopt(order=s);
our $backup   : Getopt(backup);

# Help?
pod2usage(-exitval=&gt;0, -verbose=&gt;1)
    if $help;

# inpath?
pod2usage(-message =&gt; "'$inpath' is no valid directory!\n", -exitval=&gt;1, -verbose=&gt;1)
    unless -e $inpath &amp;&amp; -d $inpath;
$inpath .= "/" unless $inpath =~ m:/$:;

# outfile?
pod2usage(-message =&gt; "-outfile missing!\n", -exitval=&gt;1, -verbose=&gt;1)
    unless $outfile;

pod2usage(-message =&gt; "-outfile is a directory!\n", -exitval=&gt;1, -verbose=&gt;1)
    if -e $outfile &amp;&amp; -f $outfile;

# build file list
my $i=0;
my %files = map { $_ =&gt; ++$i } grep { /$inmask/i } glob ( "$inpath*.pdf" );

$i=0;
my %order = @order ? map { $inpath.$_ =&gt; ++$i } @order : %files;

my @dateien = grep { defined $files{$_} } sort { $order{$a} &lt;=&gt; $order{$b} } keys %order;

die "no files to merge!\n" unless @dateien;

# create log
my $log = File::Log::Shortcut-&gt;new
(
    {
        storeExpText    =&gt; 1,                   # Store internally all exp text
        authorName      =&gt; "Holli",
        logFileName     =&gt; $logfile,
        versionFrom     =&gt; "20050206",
    }
);

$log-&gt;pProgramHeader(2);

# create pdf
eval
{
    $log-&gt;msg(2, "using Engine: $engine\n");
    write_pdfs ($outfile, @dateien);
};

# catch errors
if ( $@ )
{
    $log-&gt;msg(2,"\n!**ABORT**!\n$@\n$!\n");
    exit 1;
}

$log-&gt;pProgramFooter(2);


#engine-dispatcher
sub write_pdfs
{
    my ($outputpdf, $outdir) = fileparse ($_[0]);


    if ( $backup )
    {
        if ( -e "$outdir$outputpdf")
        {
            # backup existing file
            my $i=0;
            while ( -e "$outdir$outputpdf.$i" ) { $i++ };

            move ("$outdir$outputpdf", "$outdir$outputpdf.$i")
                or die "File '$outputpdf' cannot be renamed\n";

            $log-&gt;msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n");
        }
    }
    else
    {
        unlink "$outdir$outputpdf" if -e "$outdir$outputpdf";
    }



    if ( $engine eq "acrobat" )
    {
        write_pdf_acro (@_);
    }
    elsif ( $engine eq "api2" )
    {
        write_pdf_api2 (@_);
    }
    elsif ( $engine eq "reuse" )
    {
        write_pdf_reuse (@_);
    }
    else
    {
        die "Unknow PDF-Engine!\n";
    }

    -e "$outdir$outputpdf" ?
        $log-&gt;pFileInfo(2, "$outdir$outputpdf", "File written $outdir$outputpdf : ") :
        die "File '$outputpdf' not written!\n";
}


sub write_pdf_api2
{

    my ($outputpdf, $outdir) = fileparse (shift @_);

    my @pdfFiles = @_;

    my
    (
        $file,
        $pdf,
        $root,
    );

    $pdf = PDF::API2-&gt;new( -file =&gt; $outdir."\\".$outputpdf );

    #default mediabox to A4
    $pdf-&gt;mediabox (0,0,594.9,841.3597);
    $root = $pdf-&gt;outlines;

    my $import_page = 0;
    my $document_page = 0;

    foreach $file ( @pdfFiles )
    {
        my ($inputpdf, $inputdir) = fileparse (shift @_);

        my $input = PDF::API2-&gt;open( $file );

        my @pages = 1 .. $input-&gt;pages;

        if ( scalar @pages &gt; 0 )
        {
            my $outline;
            $outline = $root-&gt;outline
                if $bookmark;

            foreach ( @pages )
            {
                ++$import_page;
                ++$document_page;

                my $page = $pdf-&gt;importpage($input, $_, $import_page);

                if ( $bookmark )
                {
                    # create bookmark
                    my ($bmtext) = ($inputpdf =~ /([^\.]+)/ );
                    $outline-&gt;title($bmtext);

                    my $bm = $outline-&gt;outline;
                    $bm-&gt;title("page $document_page");

                    $bm-&gt;dest($page);
                    $outline-&gt;dest($page) if $document_page == 1;
                    $outline-&gt;closed;
                }
            }

        }
        $log-&gt;pFileInfo(2, "$file", "Processsing $file : ");

    }


    $pdf-&gt;preferences( -outlines =&gt; 1 )
        if $bookmark;

    $pdf-&gt;update;
    $pdf-&gt;end;
}


sub write_pdf_acro
{

    eval "use Win32::OLE;";

    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = map { File::Spec-&gt;rel2abs($_) } @_;

    my (
        $_empty_pdf,
        $file,
        $bigpdf,
        $singlepdf,
        $fillpdf,
        $acro,
    );

    my $pages=0;

    $singlepdf  = Win32::OLE-&gt;new('AcroExch.AVDoc');
    $bigpdf     = Win32::OLE-&gt;new('AcroExch.AVDoc');

    prFile ($outdir."_empty_tmp_.pdf");
    prPage;
    prEnd;

    $bigpdf-&gt;open (File::Spec-&gt;rel2abs($outdir."_empty_tmp_.pdf"), "");

    foreach $file (@pdfFiles)
    {
        $log-&gt;pFileInfo(2, $file, "Processsing $file : ");

        $singlepdf-&gt;open($file, "");

        $bigpdf-&gt;GetPDDoc-&gt;InsertPages
        (
            $bigpdf-&gt;GetPDDoc-&gt;GetNumPages-1,
            $singlepdf-&gt;GetPDDoc(),
            0,
            $singlepdf-&gt;GetPDDoc-&gt;GetNumPages,
            1
        );

        $pages += $singlepdf-&gt;GetPDDoc-&gt;GetNumPages;

        $singlepdf-&gt;close(1);
    }

    $bigpdf-&gt;GetPDDoc-&gt;DeletePages ( 0, 0 );

    unless ( $bigpdf-&gt;GetPDDoc-&gt;GetNumPages == $pages )
    {
        my $p = $bigpdf-&gt;GetPDDoc-&gt;GetNumPages;

        $bigpdf-&gt;close(1),
        #bug in acrobat? some files just donīt get merged
        die "not all pages inserted. shoud be: $pages, is: $p!\n";
    }

    $bigpdf-&gt;GetPDDoc-&gt;Save(1, File::Spec-&gt;rel2abs("$outdir$outputpdf"));

    $bigpdf-&gt;close(1);

    unlink $outdir."_empty_.pdf";
}


sub write_pdf_reuse
{
    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = @_;

    prFile ("$outdir$outputpdf");

    my $pages=0;

    for ( @pdfFiles )
    {
        my $page=prDoc($_);
        if ( $bookmark )
        {
            my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i);
            prBookmark({ text  =&gt; $inputpdf, act   =&gt; "this.pageNum = $pages;"});

        }
        $log-&gt;pFileInfo(2, $_, "Processsing $_ : ");
        $pages += $page;
    }

    prEnd;
}


__END__

&lt;/code&gt;
The code of File::Log::Shortcut
&lt;code&gt;
package File::Log::Shortcut;

use strict;
use warnings;

use POSIX qw(strftime);

our $VERSION = "0.1";

use base qw(File::Log);

sub new
{
    my $proto  = shift;
    my $class  = ref($proto) || $proto;
    my %args   =  %{$_[0]};
    %args = map { lc($_) =&gt; $args{$_} } keys %args ;



    my $self = $class-&gt;SUPER::new(@_);

    my %defaults =
    (
        appname         =&gt; ($main::0 =~ /(?:^|[\/\\])([^\/\\]+)$/)[-1],
        appversion      =&gt; $main::VERSION || "",
        authorname      =&gt; "",
        _startedepoc    =&gt; time,
        versionfrom     =&gt; strftime( "%Y%m%d-%X", localtime(  (stat($main::0))[9]  )   ) ,
        startedat       =&gt; strftime( "%Y%m%d-%X", localtime() ),
    );

    for ( keys %defaults )
    {
        $self-&gt;{$_} = defined $args{$_} ? $args{$_} : $defaults{$_};
    }

    return $self;
}

sub pProgramHeader
{
    my $self = shift;
    $self-&gt;msg(shift, $self-&gt;programHeader);
}

sub programHeader
{
    my $self  = shift;

    my $msg = "";
    $msg .= $self-&gt;{appname};
    $msg .= " Version $self-&gt;{appversion}"    if $self-&gt;{appversion};
    $msg .= " from $self-&gt;{versionfrom}"      if $self-&gt;{versionfrom};
    $msg .= "\n";
    $msg .= "Author: $self-&gt;{authorname}\n"   if $self-&gt;{authorname};
    $msg .= "Started at $self-&gt;{startedat}\n" if $self-&gt;{startedat};

    return $msg;
}

sub pProgramFooter
{
    my $self = shift;
    my $debug = shift;
    $self-&gt;msg($debug, $self-&gt;programFooter);
}

sub programFooter
{
    my $self = shift;

    return "Ended at " . strftime( "%Y%m%d-%X", localtime() ) . " after " . (time-$self-&gt;{_startedepoc}) . "\n";
}

sub pFileInfo
{
    my $self = shift;
    my $debug = shift;
    my $file  = shift;
    my $msg   = shift || "";
    $self-&gt;msg($debug, $self-&gt;fileInfo ($file, $msg));
}

sub fileInfo
{
    my $self  = shift;
    my $file  = shift;
    my $msg   = shift || "";

    return $msg . strftime( "%Y%m%d-%X", localtime(  (stat($file))[9]  )  ) . "\n";
}

1;

__END__

=head1 NAME

File::Log::Shortcut

=head1 SYNOPSIS

 my $log = File::Log::Shortcut-&gt;new
 (
     {
         # the author of the program, defaults to ""
         authorName      =&gt; "Holli",

         # date of release, defaults to "last changed" of the script-file
         versionFrom      =&gt; "someday",

         # the scripts version, defaults to $main::VERSION or ""
         appVersion       =&gt; "0.1",
     }
 );


 $log-&gt;pProgramHeader(2);

 sleep(2);

 open OUT , "&gt;", "testout";
 close OUT;

 sleep(2);

 $log-&gt;pFileInfo(2, "testout","File created: ");

 $log-&gt;pProgramFooter(2);

 $log-&gt;close();

=head1 DESCRIPTION

This is a subclass of File::Log, that offers some convenience. So everything in the File::Log-documentation
is true for File::Log::Shortcut

=head2 methods

=over 6

=item pProgramHeader ($debug);

calls msg() of the parent class with $debug and a string like the following:

 test.pl Version 0.1 from someday
 Author: Holli
 Started at 20050206-04:57:49

=item programHeader

returns the input for pProgramHeader()

=item pProgramFooter ($debug);

calls msg() of the parent class with $debug and a string like the following:

 Ended at 20050206-04:57:53 after 4

=item programFooter

returns the input for pProgramFooter()

=item pFileInfo ($debug, $filename, $message);

calls msg() of the parent class with $debug and a string like the following:

 File created: 20050205-21:47:16

=item fileInfo ($filename, $message)

 returns the input for pFileInfo

=back

So,
 $log-&gt;pFileInfo (2, "file", "message");
is equivalent to
 $log-&gt;msg(2, $log-&gt;fileInfo ("file", "message"));

=head1 AUTHOR

holli

&lt;/code&gt;</field>
<field name="codedescription">
This script allows to merge multiple pdf-files to a new pdf-file. Despite from some CPAN-modules it needs &lt;code&gt;File::Log::Shortcut&lt;/code&gt; which made it not to CPAN yet. Thus it is included here.&lt;br&gt;&lt;br&gt;
&lt;readmore&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="name"&gt;NAME&lt;/a&gt;&lt;/h1&gt;
&lt;p&gt;pdfmerger.pl&lt;/p&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="description_"&gt;DESCRIPTION:&lt;/a&gt;&lt;/h1&gt;
&lt;p&gt;pdfmerger.pl merges multiple Adobe PDF-files to a single document.
it supports multiple ``engines'' to produce pdf, namly&lt;/p&gt;
&lt;dl&gt;
&lt;dt&gt;&lt;strong&gt;&lt;a name="item_pdf_3a_3aapi2"&gt;PDF::API2&lt;/a&gt;&lt;/strong&gt;&lt;br /&gt;
&lt;/dt&gt;
&lt;dd&gt;
&lt;pre&gt;
 due to a bug in PDF::Api2, it seems to be impossible to merge files
 that were merged before.&lt;/pre&gt;
&lt;/dd&gt;
&lt;dt&gt;&lt;strong&gt;&lt;a name="item_pdf_3a_3areuse"&gt;PDF::Reuse&lt;/a&gt;&lt;/strong&gt;&lt;br /&gt;
&lt;/dt&gt;
&lt;dd&gt;
&lt;pre&gt;
  I encountered a bug with Reuse and documents that were produced by
  MS Word/Acrobat PDF::Writer.&lt;/pre&gt;
&lt;/dd&gt;
&lt;dt&gt;&lt;strong&gt;&lt;a name="item_win32_acrobat_ole_2dautomation"&gt;Win32 Acrobat OLE-Automation&lt;/a&gt;&lt;/strong&gt;&lt;br /&gt;
&lt;/dt&gt;
&lt;dd&gt;
&lt;pre&gt;
  No bookmarks, due to the limitations of the Acrobat-Object-Model.&lt;/pre&gt;
&lt;/dd&gt;
&lt;/dl&gt;
&lt;p&gt;When the -order option is used only the mentioned files are merged.&lt;/p&gt;
&lt;p&gt;@optionsfile is a file that contains options for the script, one per line. This is especially
convenient for storing long -order - lists.&lt;/p&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="synopsis"&gt;SYNOPSIS&lt;/a&gt;&lt;/h1&gt;
&lt;code&gt;
 $perl pdfmerger.pl -outfile &lt;file&gt;
                  [-inpath &lt;directory&gt;]
                  [-inmask &lt;regex&gt;]
                  [-logfile &lt;file&gt;]
                  [-engine &lt;pdf-engine&gt;]
                  [-order &lt;file 1&gt;] [-order &lt;file 2&gt;] [-order &lt;file n&gt;]
                  [-bookmark]
                  [-help]
                  [@optionsfile]&lt;/code&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="arguments"&gt;ARGUMENTS&lt;/a&gt;&lt;/h1&gt;
&lt;code&gt;
 -outfile      : req. : none          : file to create
 -inpath       : opt. : ./            : path to source-files
 -inmask       : opt. : /./           : regex to match files (no glob!)
 -logfile      : opt. : pdfmerger.log : name of the logfile
 -bookmark     : opt. : false         : add bookmarks
 -help         : opt. : no            : show help
 -@optionsfile : opt. : none          : file with options
 -order        : opt. : none          : sort merged files
 -engine       : opt. : reuse         : choose between api2|acro|reuse
 -backup       : opt. : false         : backup existing -outfile&lt;/code&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="prerequisities_"&gt;PREREQUISITIES:&lt;/a&gt;&lt;/h1&gt;
&lt;pre&gt;
 Getopt::ArgvFile
 Getopt::Attribute
 POD::Usage
 File::Copy
 File::Basename
 PDF::API2;
 PDF::Reuse;
 Win32::OLE&lt;/pre&gt;
&lt;pre&gt;
 File::Log::Shortcut&lt;/pre&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="history_"&gt;HISTORY:&lt;/a&gt;&lt;/h1&gt;
&lt;p&gt;version 0.1&lt;/p&gt;
&lt;p&gt;
&lt;/p&gt;
&lt;hr /&gt;
&lt;h1&gt;&lt;a name="autor"&gt;AUTOR&lt;/a&gt;&lt;/h1&gt;
&lt;p&gt;&lt;a href="http://holli.perlmonk.org/"&gt;holli&lt;/a&gt;&lt;/p&gt;
&lt;/readmore&gt;</field>
<field name="codecategory">
PDF</field>
<field name="codeauthor">
holli</field>
</data>
</node>
