Added: flags -begin and -end to be easily able to execute code before and after the loop
excelPerl 0.12:
#! /usr/bin/perl
use warnings;
use strict;
our $VERSION = 0.12;
use Win32; # automatically loaded with Activestate Perl
use File::Basename; # for PrintUsage
use Getopt::Long; # for parsing program arguments
use FindBin;
use lib $FindBin::Bin;
use ExcelPerl;
use Win32::OLE::Const 'Microsoft Excel';
$|++; # no suffering from buffering
my( $file, $perlCode, $beginCode, $endCode );
my $worksheetNumber = 1; # use first worksheet as default
my $headline = 0; # don't skip a headline as default
my $visible = 0; # is of no use in this version
my $colSeparator = "\t"; # standard col separator for $_
GetOptions( 'file=s' => \$file,
'sheet=i' => \$worksheetNumber,
'headline=i' => \$headline,
'ane=s' => \$perlCode,
'begin=s' => \$beginCode,
'end=s' => \$endCode,
# 'visible' => \$visible,
'colseparator' => \$colSeparator,
)
or die &PrintUsage();
# check file and convert it to absolute path if necessary
defined $file or die &PrintUsage();
$file = Win32::GetFullPathName( $file );
my $xlsPerl = ExcelPerl->new( excelFile => $file,
worksheetNumber => $worksheetNumber,
)
->open;
#if( $visible ) {
$xlsPerl->excel->{Visible} = 1;
#} # if
# skip all lines until -headline if available
$xlsPerl->currentRow( $headline ) if $headline;
$xlsPerl->colSeparator( $colSeparator );
my @F;
# try to evaluate begin block
if( $beginCode ) {
eval <<"EOH";
no strict;
$beginCode;
EOH
;
die "Error in begin: $@\n" if $@;
} # if
# evaluate the perl code from argument -ane ( thanks @pKai for this id
+ea )
my $aneCode = eval "sub {
my \$xlsPerl = shift;
my \$C = shift;
my \@C = \@\$C;
# prepare some other helpful variables
local \$. = \$xlsPerl->currentRow();
no warnings \'uninitialized\';
local \$_ = join( \$xlsPerl->colSeparator, \@F ) . \"\\n\";
no strict;
$perlCode;
}";
die $@ if $@;
while( my @cells = $xlsPerl->getNextRow( \@F ) ) {
$aneCode->($xlsPerl, \@cells);
} # while
# try to evaluate begin block
if( $endCode ) {
eval <<"EOH";
no strict;
$endCode;
EOH
;
die "Error in begin: $@\n" if $@;
} # if
# ------------------------------------------------------------
sub PrintUsage {
my $bin = File::Basename::basename($0);
die <<"EOH";
Usage:
$bin -sheet 1 -headline 1 -ane "print qq~\$.: \$F[0], \$F[1]\\n~" -f
+ile excelFile
try perldoc $bin for more details
EOH
} # PrintUsage
# ------------------------------------------------------------
=pod
=head1 excelPerl
You know about the perl parameters -ane? This script is a way to try
to do the same thing to Excel files.
=head1 Prerequisites
=over 1
=item Win32::OLE
=item MS Excel needs to be installed
=back
=head1 Description
Like perl -ane with a plaintext file, excelPerl.pl loops over an excel
file and automatically (-a) splits up the columns of one row into an
array with the name @F. Since @F is a ied array, by changing one
element of @F you change the content of the excel cells.
If you need one whole line with a tailing \n, you can use the variable
$_ (like perl -ne) which joins @F by the value given in the parameter
-colseparator (default: \t)
If you need to know the current line number (like $. for while), you
can use the special variable $. (big surprise).
=head1 Params:
-file String: Name of excel file (full path!)
-sheetnumber Integer: which sheet shell I read (1..n, default: 1
+)
-headline Integer: line number of headline (used for skipping
+ headline)
-ane String: Perl-Code to execute
-begin String: Perl-Code to be executed before the loop (
+BEGIN)
-end String: Perl-Code to be executed after the loop (E
+ND)
-colseparator String: how to join columns in $_ (default: \t)
Abbreviations of params are allowed, as long as they are unique, e.g
excelPerl.pl -s 2 -h 1 -ane "print qq~$.: $F[0] $F[5]\n~" -f test.xls
excelPerl.pl -ane "$F[0] =~ s/abc/def/" -f test.xls
excelPerl.pl -ane "$C[0]->{Font}->{Name} = 'Courier New'" -f test.xls
excelPerl.pl -ane "$x{$F[0]}++" ^
-end "use Data::Dumper; print Dumper \%x" ^
-f test.xls
excelPerl.pl ^
-begin "use Text::CSV_XS; $csv = Text::CSV_XS->new( { binary => 1}
+ )" ^
-ane "$csv->combine(@F); print $csv->string, $/" ^
-f file.xls
(The ^ is the windows continuation line char; I used it in these examp
+les
to avoid line wrappings; but if you write the whole parameters into on
+e
line, don't use the dashes)
=head1 Special Variables:
$. Row number
@F Array containing the values of one complete row
$_ Row joined by -colseparator, with \n at the end
@C Advanced: List of cells (e.g. to change a format). You may eve
+n
use excel constants.
=head2 Some examples for $C[$i]-> ($i is index of column):
{Font}->{Name} name of font: 'Courier New', 'Arial', ...
{Font}->{FontStyle} style of font: 'Bold Italic', ...
{Font}->{Size} font size: ... 10 11 12 ...
{Font}->{Strikethrough} strike through: 0 or 1
{HorizontalAlignment} align: xlCenter, xlRight, xlLeft, xlJustify
{VerticalAlignment} align vertically: xlCenter, xlTop, xlBottom
{WrapText} wrap text in line: 0 or 1
{FormulaR1C1} formula: "=CONCATENATE(RC[-1],$R[-1]C[-1])"
{FormulaLocal} formula: "=ANZAHL2(A2:G2)"
=head2 Object model of $C->[$i]->{...} (not complete, may differ):
# try something like the following (as one line) to find out about
# more properties:
excelPerl.pl -ane "$cell = $C->[0]; for $k (sort keys %$cell ) {
eval { print qq~$k => $cell->{$k}\n~} }; exit" -f excelfile.xls
Application => {
# lots of stuff, enhance the previous example with:
excelPerl.pl -ane "my $cell = $C->[0]->{Application}; for $k ...
},
Font => {
Background => '' ???
Bold => 0 or 1,
Color => integer ???
ColorIndex => integer ???
FontStyle => 'Standard', ?????
Italic => 0 or 1,
Name => 'Arial',
OutlineFont => 0 ???
Shadow => 0 ???
Size => 10, # Font Size
Strikethrough => 0 or 1,
Subscript => 0 ???
Superscript => 0 ???
Underline => 2 ???
},
=head2 Further but not yet tested properties for $C->[$i] with Excel 2
+003
AddIndent => 0
Address => $G$1
AddressLocal => $G$1
AllowEdit => 1
Areas => Obj
Borders => Obj
Cells => Obj
Characters => Obj
Column => 7
ColumnWith => 19.43
Comment => ''
Count => int
Creator => ???
CurrentRegion => Object
Errors => Object
FormatConditions => Win32::OLE=HASH(0x1d54b64)
Formula => string
FormulaArray => string
FormulaHidden => 0
FormulaLabel => -4142
FormulaLocal => string
FormulaR1C1 => string
FormulaR1C1Local => string
HasArray => 0
HasFormula => 0
Height => 12.75
Hyperlinks => Object
ID =>
IndentLevel => 0
Interior => Object
Left => 539.25
ListHeaderRows => 0
ListObject =>
Locked => 1
MergeArea => Object
MergeCells => 0
Next => Object
NumberFormat => Standard
NumberFormatLocal => Standard
Offset => Object
Orientation => -4128
Parent => Object
Phonetic => Object
Phonetics =>
PrefixCharacter =>
Previous => Object
ReadingOrder => -5002
Resize => Object
Row => 1
RowHeight => 12.75
ShrinkToFit => 0
SmartTags => Object
SoundNote => Object
Style => Object
Text => Url
Top => 0
UseStandardHeight => 1
UseStandardWidth => 0
Validation => Object
Value => Url
Value2 => Url
Width => 105.75
Worksheet => Object
XPath => Object
=head1 Bugs/...
This code is experimental and a lot of properties are not yet tested!
If you do changes, you better save the workbook manually, that's the
reason why the parameter -visible always is on and auto-save is off.
=head1 SEE ALSO
I heard about XLSperl from John Allen which is a great tool. But it
only allows to read excel files and not to change them on the fly.
Since I often need this feature, I decided to write it with
Win32::OLE, and it was not difficult. But unlike XLSperl, excelPerl
only runs under Windows and needs an installed version of Excel. But
starting with v0.10, it supports @C which is a list containing the
cells of the actual row. You can query or modify elements of @C to
get or set cell properties like format, alignment, ...
Starting with v0.12, it supports the args -begin and -end
You can find XLSPerl at L<http://perl.jonallen.info/projects/xlstools>
=head1 Author
Martin Fabiani L<http://www.fabiani.net/>
=cut
Module ExcelPerl.pm
package ExcelPerl;
use warnings;
use strict;
use Carp qw(croak);
our $VERSION = 0.12;
# use Readonly; # better not, since Readonly is no standard module yet
use Win32::OLE;
use Win32::OLE::Const 'Microsoft Excel';
use Win32::OLE::Variant;
Win32::OLE->Option( Warn => 3);
use vars qw( @ObjInterfaceMethods @F );
# which object interface methods shell be available as standard
#Readonly::Array: Readonly is not yet a standard module :-(
@ObjInterfaceMethods =
qw( excelFile excel workBook worksheet colSeparator
worksheetNumber currentRow lastRow maxCol changeCount
);
# install object interface methods at startup
foreach my $method ( @ObjInterfaceMethods ) {
no strict 'refs'; # Sub::Install is no standard module :-(
*{ $method } = sub {
my( $self, @values ) = @_;
$self->{ $method } = $values[0] if scalar @values;
return $self->{ $method };
};
} # foreach
# ============================================================
sub new {
my( $class, %params ) = @_;
my $self = bless {}, $class;
foreach my $param (keys %params) {
$self->$param( $params{$param} );
} # foreach
return $self;
} # new
# ------------------------------------------------------------
sub increaseCurrentRow {
my $self = shift;
return $self->currentRow( 1 + $self->currentRow );
} # increaseCurrentRow
# ------------------------------------------------------------
sub increaseChangeCount {
my $self = shift;
if( defined $self->changeCount ) {
$self->changeCount( 1 + $self->changeCount );
} # if
else {
$self->changeCount( 1 );
} # else
return;
} # increaseChangeCount
# ------------------------------------------------------------
sub open {
my( $self ) = @_;
my $filename = $self->excelFile;
defined $filename or croak "Error: no filename given";
-f $filename or croak( "Error: filename '$filename' doesn't exist"
+);
my $excel; # try to re-use running instance of Excel
eval { $excel = Win32::OLE->GetActiveObject( 'Excel.Application' )
+ };
die "Error: no Excel installed\n" if $@;
unless( defined $excel ) { # if not running, start excel
$excel = Win32::OLE->new( 'Excel.Application', sub { $_[0]->Qu
+it } )
or die "Error: can't start Excel\n";
} # unless
$self->excel( $excel );
$self->workBook( $excel->Workbooks->Open( $filename ) );
$self->worksheet( $self->workBook->Worksheets( $self->worksheetNum
+ber ) );
my $range = $self->worksheet->UsedRange->{Value};
$self->lastRow( $#{$range} + 1 );
$self->currentRow( 0 );
return $self;
} # open
# ------------------------------------------------------------
sub getNextRow {
my( $self, $F ) = @_;
$self->increaseCurrentRow;
my $row = $self->currentRow;
my $sheet = $self->worksheet;
return if $row > $self->lastRow;
# thanks to pKai for this idea:
my $bottomRight =
( split( /:/, $self->worksheet->UsedRange->address(0,0) ) )[-1
+];
my( $colNameMax, $rowNumMax ) = $bottomRight =~ /^([A-Z]+)(\d+)$/;
$self->lastRow( $rowNumMax );
$self->maxCol ( $colNameMax );
my $rowData = $sheet->Range("A$row:" . $self->maxCol . $row)->{Val
+ue};
# prepare ole objects for return representing array of cells
my @cells = ();
for my $cell ( 'A' .. $self->maxCol ) {
push( @cells, $sheet->Range( "$cell$row" ) );
} # for
tie( @$F, 'ExcelPerl::RowArray', $self );
foreach my $value ( ref $rowData ? @{ $rowData->[0] } : $rowData )
+ {
push( @$F, $value );
} # foreach
return @cells;
} # getNextRow
# ------------------------------------------------------------
# ============================================================
package ExcelPerl::RowArray;
# ------------------------------------------------------------
use Tie::Array;
use vars qw( @ISA );
@ISA = qw( Tie::Array );
# ------------------------------------------------------------
sub TIEARRAY {
my( $class, $excelObj ) = @_;
my $self = bless( { data => [] }, $class );
$self->{excelObj} = $excelObj;
$self->{row} = $excelObj->currentRow;
return $self;
} # TIEARRAY
# ------------------------------------------------------------
sub FETCH {
my( $self, $index) = @_;
return $self->{data}->[ $index ];
} # FETCH
# ------------------------------------------------------------
sub STORE {
my( $self, $index, $newValue ) = @_;
my $excelObj = $self->{excelObj};
my $sheet = $excelObj->worksheet;
my $row = $excelObj->currentRow;
my $col = $index + 1;
my $oldValue = $self->{data}->[$index];
if( ( not defined $oldValue and defined $newValue ) or
( defined $oldValue and not defined $newValue ) or
( $oldValue ne $newValue )
) {
$excelObj->increaseChangeCount;
$sheet ->Cells($row, $col)->{'Value' } = $newValue;
$self ->{'data'} ->[ $index ] = $newValue;
} # if
return;
} # STORE
# ------------------------------------------------------------
sub FETCHSIZE {
my $self = shift;
return $#{ $self->{data} };
} # FETCHSIZE
# ------------------------------------------------------------
sub STORESIZE {
my( $self, $newLength ) = @_;
return $#{ $self->{data} } = $newLength;
} # STORESIZE
# ------------------------------------------------------------
sub PUSH {
my $self = shift;
return push( @{ $self->{data} }, @_ );
} # PUSH
# ------------------------------------------------------------
sub CLEAR {
my( $self ) = @_;
my $data = $self->{data};
for my $i ( 0..$#{$data} ) {
$self->STORE( $i, '' );
} # for
return;
} # CLEAR
# ------------------------------------------------------------
1; # modules have to return a true value
Best regards,
perl -e "s>>*F>e=>y)\*martinF)stronat)=>print,print v8.8.8.32.11.32"