The code of pdfmerger.pl
=pod
=head1 NAME
pdfmerger.pl
=head1 SYNOPSIS
$perl pdfmerger.pl -outfile <file>
[-inpath <directory>]
[-inmask <regex>]
[-logfile <file>]
[-engine <pdf-engine>]
[-order <file 1>] [-order <file 2>] [-order <file n>
+]
[-bookmark]
[-help]
[@optionsfile]
=head1 ARGUMENTS
-outfile : req. : none : file to create
-inpath : opt. : ./ : path to source-files
-inmask : opt. : /./ : regex to match files (no glob!
+)
-logfile : opt. : pdfmerger.log : name of the logfile
-bookmark : opt. : false : add bookmarks
-help : opt. : no : show help
-@optionsfile : opt. : none : file with options
-order : opt. : none : sort merged files
-engine : opt. : reuse : choose between api2|acro|reuse
-backup : opt. : false : backup existing -outfile
=head1 DESCRIPTION:
pdfmerger.pl merges multiple Adobe PDF-files to a single document.
it supports multiple "engines" to produce pdf, namly
=over 3
=item PDF::API2
due to a bug in PDF::Api2, it seems to be impossible to merge files
that were merged before.
=item PDF::Reuse
I encountered a bug with Reuse and documents that were produced by
MS Word/Acrobat PDF::Writer.
=item Win32 Acrobat OLE-Automation
No bookmarks, due to the limitations of the Acrobat-Object-Model.
=back
When the -order option is used only the mentioned files are merged.
@optionsfile is a file that contains options for the script, one per l
+ine. This is especially
convenient for storing long -order - lists.
=head1 AUTOR
<a href="http://holli.perlmonk.org/">holli</a>
=head1 PREREQUISITIES:
Getopt::ArgvFile
Getopt::Attribute
POD::Usage
File::Log::Shortcut
File::Copy
File::Spec
File::Basename
PDF::API2;
PDF::Reuse;
Win32::OLE
=head1 HISTORY:
version 0.1
=cut
# version
our $VERSION = 0.1;
# modules
use strict;
use warnings;
use Getopt::ArgvFile;
use Getopt::Attribute;
use Pod::Usage;
use File::Log::Shortcut;
use File::Spec qw(rel2abs);
use File::Copy;
use File::Basename qw (fileparse);
use PDF::API2;
use PDF::Reuse;
# arguments
our $inpath : Getopt(inpath=s ./);
our $inmask : Getopt(inmask=s .);
our $outfile : Getopt(outfile=s);
our $logfile : Getopt(logfile=s pdfmerger.rpt);
our $bookmark : Getopt(bookmark);
our $help : Getopt(help);
our $engine : Getopt(engine=s api2);
our @order : Getopt(order=s);
our $backup : Getopt(backup);
# Help?
pod2usage(-exitval=>0, -verbose=>1)
if $help;
# inpath?
pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=>
+1, -verbose=>1)
unless -e $inpath && -d $inpath;
$inpath .= "/" unless $inpath =~ m:/$:;
# outfile?
pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1)
unless $outfile;
pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verb
+ose=>1)
if -e $outfile && -f $outfile;
# build file list
my $i=0;
my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pd
+f" );
$i=0;
my %order = @order ? map { $inpath.$_ => ++$i } @order : %files;
my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order
+{$b} } keys %order;
die "no files to merge!\n" unless @dateien;
# create log
my $log = File::Log::Shortcut->new
(
{
storeExpText => 1, # Store internally all
+ exp text
authorName => "Holli",
logFileName => $logfile,
versionFrom => "20050206",
}
);
$log->pProgramHeader(2);
# create pdf
eval
{
$log->msg(2, "using Engine: $engine\n");
write_pdfs ($outfile, @dateien);
};
# catch errors
if ( $@ )
{
$log->msg(2,"\n!**ABORT**!\n$@\n$!\n");
exit 1;
}
$log->pProgramFooter(2);
#engine-dispatcher
sub write_pdfs
{
my ($outputpdf, $outdir) = fileparse ($_[0]);
if ( $backup )
{
if ( -e "$outdir$outputpdf")
{
# backup existing file
my $i=0;
while ( -e "$outdir$outputpdf.$i" ) { $i++ };
move ("$outdir$outputpdf", "$outdir$outputpdf.$i")
or die "File '$outputpdf' cannot be renamed\n";
$log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n");
}
}
else
{
unlink "$outdir$outputpdf" if -e "$outdir$outputpdf";
}
if ( $engine eq "acrobat" )
{
write_pdf_acro (@_);
}
elsif ( $engine eq "api2" )
{
write_pdf_api2 (@_);
}
elsif ( $engine eq "reuse" )
{
write_pdf_reuse (@_);
}
else
{
die "Unknow PDF-Engine!\n";
}
-e "$outdir$outputpdf" ?
$log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$
+outputpdf : ") :
die "File '$outputpdf' not written!\n";
}
sub write_pdf_api2
{
my ($outputpdf, $outdir) = fileparse (shift @_);
my @pdfFiles = @_;
my
(
$file,
$pdf,
$root,
);
$pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf );
#default mediabox to A4
$pdf->mediabox (0,0,594.9,841.3597);
$root = $pdf->outlines;
my $import_page = 0;
my $document_page = 0;
foreach $file ( @pdfFiles )
{
my ($inputpdf, $inputdir) = fileparse (shift @_);
my $input = PDF::API2->open( $file );
my @pages = 1 .. $input->pages;
if ( scalar @pages > 0 )
{
my $outline;
$outline = $root->outline
if $bookmark;
foreach ( @pages )
{
++$import_page;
++$document_page;
my $page = $pdf->importpage($input, $_, $import_page);
if ( $bookmark )
{
# create bookmark
my ($bmtext) = ($inputpdf =~ /([^\.]+)/ );
$outline->title($bmtext);
my $bm = $outline->outline;
$bm->title("page $document_page");
$bm->dest($page);
$outline->dest($page) if $document_page == 1;
$outline->closed;
}
}
}
$log->pFileInfo(2, "$file", "Processsing $file : ");
}
$pdf->preferences( -outlines => 1 )
if $bookmark;
$pdf->update;
$pdf->end;
}
sub write_pdf_acro
{
eval "use Win32::OLE;";
my ($outputpdf, $outdir) = fileparse (shift @_);
my @pdfFiles = map { File::Spec->rel2abs($_) } @_;
my (
$_empty_pdf,
$file,
$bigpdf,
$singlepdf,
$fillpdf,
$acro,
);
my $pages=0;
$singlepdf = Win32::OLE->new('AcroExch.AVDoc');
$bigpdf = Win32::OLE->new('AcroExch.AVDoc');
prFile ($outdir."_empty_tmp_.pdf");
prPage;
prEnd;
$bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), "")
+;
foreach $file (@pdfFiles)
{
$log->pFileInfo(2, $file, "Processsing $file : ");
$singlepdf->open($file, "");
$bigpdf->GetPDDoc->InsertPages
(
$bigpdf->GetPDDoc->GetNumPages-1,
$singlepdf->GetPDDoc(),
0,
$singlepdf->GetPDDoc->GetNumPages,
1
);
$pages += $singlepdf->GetPDDoc->GetNumPages;
$singlepdf->close(1);
}
$bigpdf->GetPDDoc->DeletePages ( 0, 0 );
unless ( $bigpdf->GetPDDoc->GetNumPages == $pages )
{
my $p = $bigpdf->GetPDDoc->GetNumPages;
$bigpdf->close(1),
#bug in acrobat? some files just donīt get merged
die "not all pages inserted. shoud be: $pages, is: $p!\n";
}
$bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf"
+));
$bigpdf->close(1);
unlink $outdir."_empty_.pdf";
}
sub write_pdf_reuse
{
my ($outputpdf, $outdir) = fileparse (shift @_);
my @pdfFiles = @_;
prFile ("$outdir$outputpdf");
my $pages=0;
for ( @pdfFiles )
{
my $page=prDoc($_);
if ( $bookmark )
{
my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i);
prBookmark({ text => $inputpdf, act => "this.pageNum =
+$pages;"});
}
$log->pFileInfo(2, $_, "Processsing $_ : ");
$pages += $page;
}
prEnd;
}
__END__
The code of File::Log::Shortcut
package File::Log::Shortcut;
use strict;
use warnings;
use POSIX qw(strftime);
our $VERSION = "0.1";
use base qw(File::Log);
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my %args = %{$_[0]};
%args = map { lc($_) => $args{$_} } keys %args ;
my $self = $class->SUPER::new(@_);
my %defaults =
(
appname => ($main::0 =~ /(?:^|[\/\\])([^\/\\]+)$/)[-1]
+,
appversion => $main::VERSION || "",
authorname => "",
_startedepoc => time,
versionfrom => strftime( "%Y%m%d-%X", localtime( (stat($m
+ain::0))[9] ) ) ,
startedat => strftime( "%Y%m%d-%X", localtime() ),
);
for ( keys %defaults )
{
$self->{$_} = defined $args{$_} ? $args{$_} : $defaults{$_};
}
return $self;
}
sub pProgramHeader
{
my $self = shift;
$self->msg(shift, $self->programHeader);
}
sub programHeader
{
my $self = shift;
my $msg = "";
$msg .= $self->{appname};
$msg .= " Version $self->{appversion}" if $self->{appversion};
$msg .= " from $self->{versionfrom}" if $self->{versionfrom};
$msg .= "\n";
$msg .= "Author: $self->{authorname}\n" if $self->{authorname};
$msg .= "Started at $self->{startedat}\n" if $self->{startedat};
return $msg;
}
sub pProgramFooter
{
my $self = shift;
my $debug = shift;
$self->msg($debug, $self->programFooter);
}
sub programFooter
{
my $self = shift;
return "Ended at " . strftime( "%Y%m%d-%X", localtime() ) . " afte
+r " . (time-$self->{_startedepoc}) . "\n";
}
sub pFileInfo
{
my $self = shift;
my $debug = shift;
my $file = shift;
my $msg = shift || "";
$self->msg($debug, $self->fileInfo ($file, $msg));
}
sub fileInfo
{
my $self = shift;
my $file = shift;
my $msg = shift || "";
return $msg . strftime( "%Y%m%d-%X", localtime( (stat($file))[9]
+ ) ) . "\n";
}
1;
__END__
=head1 NAME
File::Log::Shortcut
=head1 SYNOPSIS
my $log = File::Log::Shortcut->new
(
{
# the author of the program, defaults to ""
authorName => "Holli",
# date of release, defaults to "last changed" of the script-f
+ile
versionFrom => "someday",
# the scripts version, defaults to $main::VERSION or ""
appVersion => "0.1",
}
);
$log->pProgramHeader(2);
sleep(2);
open OUT , ">", "testout";
close OUT;
sleep(2);
$log->pFileInfo(2, "testout","File created: ");
$log->pProgramFooter(2);
$log->close();
=head1 DESCRIPTION
This is a subclass of File::Log, that offers some convenience. So ever
+ything in the File::Log-documentation
is true for File::Log::Shortcut
=head2 methods
=over 6
=item pProgramHeader ($debug);
calls msg() of the parent class with $debug and a string like the foll
+owing:
test.pl Version 0.1 from someday
Author: Holli
Started at 20050206-04:57:49
=item programHeader
returns the input for pProgramHeader()
=item pProgramFooter ($debug);
calls msg() of the parent class with $debug and a string like the foll
+owing:
Ended at 20050206-04:57:53 after 4
=item programFooter
returns the input for pProgramFooter()
=item pFileInfo ($debug, $filename, $message);
calls msg() of the parent class with $debug and a string like the foll
+owing:
File created: 20050205-21:47:16
=item fileInfo ($filename, $message)
returns the input for pFileInfo
=back
So,
$log->pFileInfo (2, "file", "message");
is equivalent to
$log->msg(2, $log->fileInfo ("file", "message"));
=head1 AUTHOR
holli
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.