#!/usr/bin/env perl use strict; use warnings; use constant { AMOUNT => 3, ADDL_RATE_PER => 4, DISCOUNT_PRICE => 6, }; use CAM::PDF; use Data::Dumper; my $jacket_id = $ARGV[0]; my $pdf_file = "pm_11113472_$jacket_id.pdf"; my $pdf = CAM::PDF::->new($pdf_file) or die $CAM::PDF::errstr; my $re = qr{(?x: \A \s*? ((?:A|)) # Awd \s+ (\d+-\d+) # Contractor Code \s+ ([^\$]+?) # Name \s+ (\$\s[0-9,.]+) # Amount \s+ (\$\s[0-9,.]+\s[A-Z]) # Add'l Rate/PER \s+ ([0-9.]+\s+\d+) # Discount % Days \s+ (\$\s[0-9,.]+) # Discount Price \s+ ([\D]+?) # Bidders Name \s+ (\S+) # Date Received \s+ (\(\d+\)\s\d+-\d+) # Phone Number )}; for my $page_num (1 .. $pdf->numPages) { my $text = $pdf->getPageText($page_num); my @lines; my $wanted_line = 0; for my $line (split /$jacket_id/, $text) { next unless $wanted_line++; my @fields = $line =~ $re; $fields[AMOUNT] =~ y/ //d; $fields[ADDL_RATE_PER] =~ s/ //; $fields[DISCOUNT_PRICE] =~ y/ //d; push @lines, [ $jacket_id, @fields ]; } print Dumper(\@lines); }