Hello fellow monks,
I get the following error when I try to convert pdf to text using CAM::PDF
Expected string closing 4810 8.01.88 Td^M1.394.9zio...
Can someone help me either fix the error or make the script ignore the error and keep running?
Some pdfs are converted while some fail maybe it's a code that I get for pdf that only have pictures?
Here is the code
#!/usr/bin/perl -w
use strict;
use CAM::PDF;
use CAM::PDF::PageText;
use File::Spec::Functions;
use File::Find::Rule;
#1. Specify directory
my $dirPath= 'F:/project_italia/firm_files';
my $src_fmt='*.*';
#2 retrieve all sub-dirs from the directory
my @allDir = File::Find::Rule->maxdepth(1)->directory->in($dirPath);
open(LIST, ">>", 'F:/project_italia/firmList_pdf.txt' ) or die "Can't
+open destination.fil $!";
foreach (@allDir)
{
my $src_path = catfile($_, $src_fmt);
my @filings = glob($src_path);
foreach (@filings)
{
my $fileName=$_;
my $pdf = CAM::PDF->new($_);
#split path name
my @textName = split(/\\/, $_);
#Create new text file
open(DEST, ">>", 'F:/project_italia/firm_text/'.$textName[4].'.
+txt' ) or die "Can't open destination.fil $!";
#keeps track on whether the pdf to text conversion was successf
+ul
my $conv=0;
my $count=0;
#Get total page number of the pdf
my $pageNum=$pdf->numPages();
for (1..$pageNum)
{
my $pageConv = $pdf->getPageContentTree($_);
#if conversion fails move to the next file
print DEST CAM::PDF::PageText->render($pageConv) or do
{
warn "PDF->new failed on $_\n";
next;
};
$count++;
#conversion is successful only when all pages have been co
+nverted
$conv=1 if ($count==$pageNum);
}
#close text file
close DEST;
#keep track of which files converted successfully
print LIST "$textName[4] | $conv \n";
}
}
close LIST;