=pod =head1 NAME pdfmerger.pl =head1 SYNOPSIS $perl pdfmerger.pl -outfile [-inpath ] [-inmask ] [-logfile ] [-engine ] [-order ] [-order ] [-order ] [-bookmark] [-help] [@optionsfile] =head1 ARGUMENTS -outfile : req. : none : file to create -inpath : opt. : ./ : path to source-files -inmask : opt. : /./ : regex to match files (no glob!) -logfile : opt. : pdfmerger.log : name of the logfile -bookmark : opt. : false : add bookmarks -help : opt. : no : show help -@optionsfile : opt. : none : file with options -order : opt. : none : sort merged files -engine : opt. : reuse : choose between api2|acro|reuse -backup : opt. : false : backup existing -outfile =head1 DESCRIPTION: pdfmerger.pl merges multiple Adobe PDF-files to a single document. it supports multiple "engines" to produce pdf, namly =over 3 =item PDF::API2 due to a bug in PDF::Api2, it seems to be impossible to merge files that were merged before. =item PDF::Reuse I encountered a bug with Reuse and documents that were produced by MS Word/Acrobat PDF::Writer. =item Win32 Acrobat OLE-Automation No bookmarks, due to the limitations of the Acrobat-Object-Model. =back When the -order option is used only the mentioned files are merged. @optionsfile is a file that contains options for the script, one per line. This is especially convenient for storing long -order - lists. =head1 AUTOR holli =head1 PREREQUISITIES: Getopt::ArgvFile Getopt::Attribute POD::Usage File::Log::Shortcut File::Copy File::Spec File::Basename PDF::API2; PDF::Reuse; Win32::OLE =head1 HISTORY: version 0.1 =cut # version our $VERSION = 0.1; # modules use strict; use warnings; use Getopt::ArgvFile; use Getopt::Attribute; use Pod::Usage; use File::Log::Shortcut; use File::Spec qw(rel2abs); use File::Copy; use File::Basename qw (fileparse); use PDF::API2; use PDF::Reuse; # arguments our $inpath : Getopt(inpath=s ./); our $inmask : Getopt(inmask=s .); our $outfile : Getopt(outfile=s); our $logfile : Getopt(logfile=s pdfmerger.rpt); our $bookmark : Getopt(bookmark); our $help : Getopt(help); our $engine : Getopt(engine=s api2); our @order : Getopt(order=s); our $backup : Getopt(backup); # Help? pod2usage(-exitval=>0, -verbose=>1) if $help; # inpath? pod2usage(-message => "'$inpath' is no valid directory!\n", -exitval=>1, -verbose=>1) unless -e $inpath && -d $inpath; $inpath .= "/" unless $inpath =~ m:/$:; # outfile? pod2usage(-message => "-outfile missing!\n", -exitval=>1, -verbose=>1) unless $outfile; pod2usage(-message => "-outfile is a directory!\n", -exitval=>1, -verbose=>1) if -e $outfile && -f $outfile; # build file list my $i=0; my %files = map { $_ => ++$i } grep { /$inmask/i } glob ( "$inpath*.pdf" ); $i=0; my %order = @order ? map { $inpath.$_ => ++$i } @order : %files; my @dateien = grep { defined $files{$_} } sort { $order{$a} <=> $order{$b} } keys %order; die "no files to merge!\n" unless @dateien; # create log my $log = File::Log::Shortcut->new ( { storeExpText => 1, # Store internally all exp text authorName => "Holli", logFileName => $logfile, versionFrom => "20050206", } ); $log->pProgramHeader(2); # create pdf eval { $log->msg(2, "using Engine: $engine\n"); write_pdfs ($outfile, @dateien); }; # catch errors if ( $@ ) { $log->msg(2,"\n!**ABORT**!\n$@\n$!\n"); exit 1; } $log->pProgramFooter(2); #engine-dispatcher sub write_pdfs { my ($outputpdf, $outdir) = fileparse ($_[0]); if ( $backup ) { if ( -e "$outdir$outputpdf") { # backup existing file my $i=0; while ( -e "$outdir$outputpdf.$i" ) { $i++ }; move ("$outdir$outputpdf", "$outdir$outputpdf.$i") or die "File '$outputpdf' cannot be renamed\n"; $log->msg(2, "'$outputpdf' renamed to '$outputpdf.$i'\n"); } } else { unlink "$outdir$outputpdf" if -e "$outdir$outputpdf"; } if ( $engine eq "acrobat" ) { write_pdf_acro (@_); } elsif ( $engine eq "api2" ) { write_pdf_api2 (@_); } elsif ( $engine eq "reuse" ) { write_pdf_reuse (@_); } else { die "Unknow PDF-Engine!\n"; } -e "$outdir$outputpdf" ? $log->pFileInfo(2, "$outdir$outputpdf", "File written $outdir$outputpdf : ") : die "File '$outputpdf' not written!\n"; } sub write_pdf_api2 { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; my ( $file, $pdf, $root, ); $pdf = PDF::API2->new( -file => $outdir."\\".$outputpdf ); #default mediabox to A4 $pdf->mediabox (0,0,594.9,841.3597); $root = $pdf->outlines; my $import_page = 0; my $document_page = 0; foreach $file ( @pdfFiles ) { my ($inputpdf, $inputdir) = fileparse (shift @_); my $input = PDF::API2->open( $file ); my @pages = 1 .. $input->pages; if ( scalar @pages > 0 ) { my $outline; $outline = $root->outline if $bookmark; foreach ( @pages ) { ++$import_page; ++$document_page; my $page = $pdf->importpage($input, $_, $import_page); if ( $bookmark ) { # create bookmark my ($bmtext) = ($inputpdf =~ /([^\.]+)/ ); $outline->title($bmtext); my $bm = $outline->outline; $bm->title("page $document_page"); $bm->dest($page); $outline->dest($page) if $document_page == 1; $outline->closed; } } } $log->pFileInfo(2, "$file", "Processsing $file : "); } $pdf->preferences( -outlines => 1 ) if $bookmark; $pdf->update; $pdf->end; } sub write_pdf_acro { eval "use Win32::OLE;"; my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = map { File::Spec->rel2abs($_) } @_; my ( $_empty_pdf, $file, $bigpdf, $singlepdf, $fillpdf, $acro, ); my $pages=0; $singlepdf = Win32::OLE->new('AcroExch.AVDoc'); $bigpdf = Win32::OLE->new('AcroExch.AVDoc'); prFile ($outdir."_empty_tmp_.pdf"); prPage; prEnd; $bigpdf->open (File::Spec->rel2abs($outdir."_empty_tmp_.pdf"), ""); foreach $file (@pdfFiles) { $log->pFileInfo(2, $file, "Processsing $file : "); $singlepdf->open($file, ""); $bigpdf->GetPDDoc->InsertPages ( $bigpdf->GetPDDoc->GetNumPages-1, $singlepdf->GetPDDoc(), 0, $singlepdf->GetPDDoc->GetNumPages, 1 ); $pages += $singlepdf->GetPDDoc->GetNumPages; $singlepdf->close(1); } $bigpdf->GetPDDoc->DeletePages ( 0, 0 ); unless ( $bigpdf->GetPDDoc->GetNumPages == $pages ) { my $p = $bigpdf->GetPDDoc->GetNumPages; $bigpdf->close(1), #bug in acrobat? some files just donīt get merged die "not all pages inserted. shoud be: $pages, is: $p!\n"; } $bigpdf->GetPDDoc->Save(1, File::Spec->rel2abs("$outdir$outputpdf")); $bigpdf->close(1); unlink $outdir."_empty_.pdf"; } sub write_pdf_reuse { my ($outputpdf, $outdir) = fileparse (shift @_); my @pdfFiles = @_; prFile ("$outdir$outputpdf"); my $pages=0; for ( @pdfFiles ) { my $page=prDoc($_); if ( $bookmark ) { my ($inputpdf, $indir) = fileparse ($_, /\.pdf$/i); prBookmark({ text => $inputpdf, act => "this.pageNum = $pages;"}); } $log->pFileInfo(2, $_, "Processsing $_ : "); $pages += $page; } prEnd; } __END__ #### package File::Log::Shortcut; use strict; use warnings; use POSIX qw(strftime); our $VERSION = "0.1"; use base qw(File::Log); sub new { my $proto = shift; my $class = ref($proto) || $proto; my %args = %{$_[0]}; %args = map { lc($_) => $args{$_} } keys %args ; my $self = $class->SUPER::new(@_); my %defaults = ( appname => ($main::0 =~ /(?:^|[\/\\])([^\/\\]+)$/)[-1], appversion => $main::VERSION || "", authorname => "", _startedepoc => time, versionfrom => strftime( "%Y%m%d-%X", localtime( (stat($main::0))[9] ) ) , startedat => strftime( "%Y%m%d-%X", localtime() ), ); for ( keys %defaults ) { $self->{$_} = defined $args{$_} ? $args{$_} : $defaults{$_}; } return $self; } sub pProgramHeader { my $self = shift; $self->msg(shift, $self->programHeader); } sub programHeader { my $self = shift; my $msg = ""; $msg .= $self->{appname}; $msg .= " Version $self->{appversion}" if $self->{appversion}; $msg .= " from $self->{versionfrom}" if $self->{versionfrom}; $msg .= "\n"; $msg .= "Author: $self->{authorname}\n" if $self->{authorname}; $msg .= "Started at $self->{startedat}\n" if $self->{startedat}; return $msg; } sub pProgramFooter { my $self = shift; my $debug = shift; $self->msg($debug, $self->programFooter); } sub programFooter { my $self = shift; return "Ended at " . strftime( "%Y%m%d-%X", localtime() ) . " after " . (time-$self->{_startedepoc}) . "\n"; } sub pFileInfo { my $self = shift; my $debug = shift; my $file = shift; my $msg = shift || ""; $self->msg($debug, $self->fileInfo ($file, $msg)); } sub fileInfo { my $self = shift; my $file = shift; my $msg = shift || ""; return $msg . strftime( "%Y%m%d-%X", localtime( (stat($file))[9] ) ) . "\n"; } 1; __END__ =head1 NAME File::Log::Shortcut =head1 SYNOPSIS my $log = File::Log::Shortcut->new ( { # the author of the program, defaults to "" authorName => "Holli", # date of release, defaults to "last changed" of the script-file versionFrom => "someday", # the scripts version, defaults to $main::VERSION or "" appVersion => "0.1", } ); $log->pProgramHeader(2); sleep(2); open OUT , ">", "testout"; close OUT; sleep(2); $log->pFileInfo(2, "testout","File created: "); $log->pProgramFooter(2); $log->close(); =head1 DESCRIPTION This is a subclass of File::Log, that offers some convenience. So everything in the File::Log-documentation is true for File::Log::Shortcut =head2 methods =over 6 =item pProgramHeader ($debug); calls msg() of the parent class with $debug and a string like the following: test.pl Version 0.1 from someday Author: Holli Started at 20050206-04:57:49 =item programHeader returns the input for pProgramHeader() =item pProgramFooter ($debug); calls msg() of the parent class with $debug and a string like the following: Ended at 20050206-04:57:53 after 4 =item programFooter returns the input for pProgramFooter() =item pFileInfo ($debug, $filename, $message); calls msg() of the parent class with $debug and a string like the following: File created: 20050205-21:47:16 =item fileInfo ($filename, $message) returns the input for pFileInfo =back So, $log->pFileInfo (2, "file", "message"); is equivalent to $log->msg(2, $log->fileInfo ("file", "message")); =head1 AUTHOR holli