comment on

The code of pdfmerger.pl

=pod

=head1 NAME

pdfmerger.pl

=head1 SYNOPSIS

 $perl pdfmerger.pl -outfile <file>
                  [-inpath <directory>]
                  [-inmask <regex>]
                  [-logfile <file>]
                  [-engine <pdf-engine>]
                  [-order <file 1>] [-order <file 2>] [-order <file n>
+]
                  [-bookmark]
                  [-help]
                  [@optionsfile]

=head1 ARGUMENTS

 -outfile      : req. : none          : file to create
 -inpath       : opt. : ./            : path to source-files
 -inmask       : opt. : /./           : regex to match files (no glob!
+)
 -logfile      : opt. : pdfmerger.log : name of the logfile
 -bookmark     : opt. : false         : add bookmarks
 -help         : opt. : no            : show help
 -@optionsfile : opt. : none          : file with options
 -order        : opt. : none          : sort merged files
 -engine       : opt. : reuse         : choose between api2|acro|reuse
 -backup       : opt. : false         : backup existing -outfile

=head1 DESCRIPTION:

pdfmerger.pl merges multiple Adobe PDF-files to a single document.
it supports multiple "engines" to produce pdf, namly

=over 3

=item PDF::API2

 due to a bug in PDF::Api2, it seems to be impossible to merge files
 that were merged before.

=item PDF::Reuse

  I encountered a bug with Reuse and documents that were produced by
  MS Word/Acrobat PDF::Writer.

=item Win32 Acrobat OLE-Automation

  No bookmarks, due to the limitations of the Acrobat-Object-Model.

=back

When the -order option is used only the mentioned files are merged.

@optionsfile is a file that contains options for the script, one per l
+ine. This is especially
convenient for storing long -order - lists.


=head1 AUTOR

<a href="http://holli.perlmonk.org/">holli</a>


=head1 PREREQUISITIES:

 Getopt::ArgvFile
 Getopt::Attribute
 POD::Usage
 File::Log::Shortcut
 File::Copy
 File::Spec
 File::Basename
 PDF::API2;
 PDF::Reuse;
 Win32::OLE

=head1 HISTORY:

version 0.1

=cut
# version
our $VERSION = 0.1;

# modules
use strict;
use warnings;

use Getopt::ArgvFile;
use Getopt::Attribute;

use Pod::Usage;

use File::Log::Shortcut;
use File::Spec qw(rel2abs);

use File::Copy;
use File::Basename qw (fileparse);

use PDF::API2;
use PDF::Reuse;


# arguments
our $inpath   : Getopt(inpath=s ./);
our $inmask   : Getopt(inmask=s .);
our $outfile  : Getopt(outfile=s);
our $logfile  : Getopt(logfile=s pdfmerger.rpt);
our $bookmark : Getopt(bookmark);
our $help     : Getopt(help);
our $engine   : Getopt(engine=s api2);
our @order    : Getopt(order=s);
our $backup   : Getopt(backup);

# Help?
pod2usage(-exitval=>0, -verbose=>1)
    if $help;

# inpath?
pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=>
+1, -verbose=>1)
    unless -e $inpath && -d $inpath;
$inpath .= "/" unless $inpath =~ m:/$:;

# outfile?
pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1)
    unless $outfile;

pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verb
+ose=>1)
    if -e $outfile && -f $outfile;

# build file list
my $i=0;
my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pd
+f" );

$i=0;
my %order = @order ? map { $inpath.$_ => ++$i } @order : %files;

my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order
+{$b} } keys %order;

die "no files to merge!\n" unless @dateien;

# create log
my $log = File::Log::Shortcut->new
(
    {
        storeExpText    => 1,                   # Store internally all
+ exp text
        authorName      => "Holli",
        logFileName     => $logfile,
        versionFrom     => "20050206",
    }
);

$log->pProgramHeader(2);

# create pdf
eval
{
    $log->msg(2, "using Engine: $engine\n");
    write_pdfs ($outfile, @dateien);
};

# catch errors
if ( $@ )
{
    $log->msg(2,"\n!**ABORT**!\n$@\n$!\n");
    exit 1;
}

$log->pProgramFooter(2);


#engine-dispatcher
sub write_pdfs
{
    my ($outputpdf, $outdir) = fileparse ($_[0]);


    if ( $backup )
    {
        if ( -e "$outdir$outputpdf")
        {
            # backup existing file
            my $i=0;
            while ( -e "$outdir$outputpdf.$i" ) { $i++ };

            move ("$outdir$outputpdf", "$outdir$outputpdf.$i")
                or die "File '$outputpdf' cannot be renamed\n";

            $log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n");
        }
    }
    else
    {
        unlink "$outdir$outputpdf" if -e "$outdir$outputpdf";
    }



    if ( $engine eq "acrobat" )
    {
        write_pdf_acro (@_);
    }
    elsif ( $engine eq "api2" )
    {
        write_pdf_api2 (@_);
    }
    elsif ( $engine eq "reuse" )
    {
        write_pdf_reuse (@_);
    }
    else
    {
        die "Unknow PDF-Engine!\n";
    }

    -e "$outdir$outputpdf" ?
        $log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$
+outputpdf : ") :
        die "File '$outputpdf' not written!\n";
}


sub write_pdf_api2
{

    my ($outputpdf, $outdir) = fileparse (shift @_);

    my @pdfFiles = @_;

    my
    (
        $file,
        $pdf,
        $root,
    );

    $pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf );

    #default mediabox to A4
    $pdf->mediabox (0,0,594.9,841.3597);
    $root = $pdf->outlines;

    my $import_page = 0;
    my $document_page = 0;

    foreach $file ( @pdfFiles )
    {
        my ($inputpdf, $inputdir) = fileparse (shift @_);

        my $input = PDF::API2->open( $file );

        my @pages = 1 .. $input->pages;

        if ( scalar @pages > 0 )
        {
            my $outline;
            $outline = $root->outline
                if $bookmark;

            foreach ( @pages )
            {
                ++$import_page;
                ++$document_page;

                my $page = $pdf->importpage($input, $_, $import_page);

                if ( $bookmark )
                {
                    # create bookmark
                    my ($bmtext) = ($inputpdf =~ /([^\.]+)/ );
                    $outline->title($bmtext);

                    my $bm = $outline->outline;
                    $bm->title("page $document_page");

                    $bm->dest($page);
                    $outline->dest($page) if $document_page == 1;
                    $outline->closed;
                }
            }

        }
        $log->pFileInfo(2, "$file", "Processsing $file : ");

    }


    $pdf->preferences( -outlines => 1 )
        if $bookmark;

    $pdf->update;
    $pdf->end;
}


sub write_pdf_acro
{

    eval "use Win32::OLE;";

    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = map { File::Spec->rel2abs($_) } @_;

    my (
        $_empty_pdf,
        $file,
        $bigpdf,
        $singlepdf,
        $fillpdf,
        $acro,
    );

    my $pages=0;

    $singlepdf  = Win32::OLE->new('AcroExch.AVDoc');
    $bigpdf     = Win32::OLE->new('AcroExch.AVDoc');

    prFile ($outdir."_empty_tmp_.pdf");
    prPage;
    prEnd;

    $bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), "")
+;

    foreach $file (@pdfFiles)
    {
        $log->pFileInfo(2, $file, "Processsing $file : ");

        $singlepdf->open($file, "");

        $bigpdf->GetPDDoc->InsertPages
        (
            $bigpdf->GetPDDoc->GetNumPages-1,
            $singlepdf->GetPDDoc(),
            0,
            $singlepdf->GetPDDoc->GetNumPages,
            1
        );

        $pages += $singlepdf->GetPDDoc->GetNumPages;

        $singlepdf->close(1);
    }

    $bigpdf->GetPDDoc->DeletePages ( 0, 0 );

    unless ( $bigpdf->GetPDDoc->GetNumPages == $pages )
    {
        my $p = $bigpdf->GetPDDoc->GetNumPages;

        $bigpdf->close(1),
        #bug in acrobat? some files just don´t get merged
        die "not all pages inserted. shoud be: $pages, is: $p!\n";
    }

    $bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf"
+));

    $bigpdf->close(1);

    unlink $outdir."_empty_.pdf";
}


sub write_pdf_reuse
{
    my ($outputpdf, $outdir) = fileparse (shift @_);
    my @pdfFiles = @_;

    prFile ("$outdir$outputpdf");

    my $pages=0;

    for ( @pdfFiles )
    {
        my $page=prDoc($_);
        if ( $bookmark )
        {
            my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i);
            prBookmark({ text  => $inputpdf, act   => "this.pageNum = 
+$pages;"});

        }
        $log->pFileInfo(2, $_, "Processsing $_ : ");
        $pages += $page;
    }

    prEnd;
}


__END__
[download]

The code of File::Log::Shortcut

package File::Log::Shortcut;

use strict;
use warnings;

use POSIX qw(strftime);

our $VERSION = "0.1";

use base qw(File::Log);

sub new
{
    my $proto  = shift;
    my $class  = ref($proto) || $proto;
    my %args   =  %{$_[0]};
    %args = map { lc($_) => $args{$_} } keys %args ;



    my $self = $class->SUPER::new(@_);

    my %defaults =
    (
        appname         => ($main::0 =~ /(?:^|[\/\\])([^\/\\]+)$/)[-1]
+,
        appversion      => $main::VERSION || "",
        authorname      => "",
        _startedepoc    => time,
        versionfrom     => strftime( "%Y%m%d-%X", localtime(  (stat($m
+ain::0))[9]  )   ) ,
        startedat       => strftime( "%Y%m%d-%X", localtime() ),
    );

    for ( keys %defaults )
    {
        $self->{$_} = defined $args{$_} ? $args{$_} : $defaults{$_};
    }

    return $self;
}

sub pProgramHeader
{
    my $self = shift;
    $self->msg(shift, $self->programHeader);
}

sub programHeader
{
    my $self  = shift;

    my $msg = "";
    $msg .= $self->{appname};
    $msg .= " Version $self->{appversion}"    if $self->{appversion};
    $msg .= " from $self->{versionfrom}"      if $self->{versionfrom};
    $msg .= "\n";
    $msg .= "Author: $self->{authorname}\n"   if $self->{authorname};
    $msg .= "Started at $self->{startedat}\n" if $self->{startedat};

    return $msg;
}

sub pProgramFooter
{
    my $self = shift;
    my $debug = shift;
    $self->msg($debug, $self->programFooter);
}

sub programFooter
{
    my $self = shift;

    return "Ended at " . strftime( "%Y%m%d-%X", localtime() ) . " afte
+r " . (time-$self->{_startedepoc}) . "\n";
}

sub pFileInfo
{
    my $self = shift;
    my $debug = shift;
    my $file  = shift;
    my $msg   = shift || "";
    $self->msg($debug, $self->fileInfo ($file, $msg));
}

sub fileInfo
{
    my $self  = shift;
    my $file  = shift;
    my $msg   = shift || "";

    return $msg . strftime( "%Y%m%d-%X", localtime(  (stat($file))[9] 
+ )  ) . "\n";
}

1;

__END__

=head1 NAME

File::Log::Shortcut

=head1 SYNOPSIS

 my $log = File::Log::Shortcut->new
 (
     {
         # the author of the program, defaults to ""
         authorName      => "Holli",

         # date of release, defaults to "last changed" of the script-f
+ile
         versionFrom      => "someday",

         # the scripts version, defaults to $main::VERSION or ""
         appVersion       => "0.1",
     }
 );


 $log->pProgramHeader(2);

 sleep(2);

 open OUT , ">", "testout";
 close OUT;

 sleep(2);

 $log->pFileInfo(2, "testout","File created: ");

 $log->pProgramFooter(2);

 $log->close();

=head1 DESCRIPTION

This is a subclass of File::Log, that offers some convenience. So ever
+ything in the File::Log-documentation
is true for File::Log::Shortcut

=head2 methods

=over 6

=item pProgramHeader ($debug);

calls msg() of the parent class with $debug and a string like the foll
+owing:

 test.pl Version 0.1 from someday
 Author: Holli
 Started at 20050206-04:57:49

=item programHeader

returns the input for pProgramHeader()

=item pProgramFooter ($debug);

calls msg() of the parent class with $debug and a string like the foll
+owing:

 Ended at 20050206-04:57:53 after 4

=item programFooter

returns the input for pProgramFooter()

=item pFileInfo ($debug, $filename, $message);

calls msg() of the parent class with $debug and a string like the foll
+owing:

 File created: 20050205-21:47:16

=item fileInfo ($filename, $message)

 returns the input for pFileInfo

=back

So,
 $log->pFileInfo (2, "file", "message");
is equivalent to
 $log->msg(2, $log->fileInfo ("file", "message"));

=head1 AUTHOR

holli
[download]

In reply to merging pdf-files: pdf-merger.pl by holli

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Come for the quick hacks, stay for the epiphanies.
	PerlMonks