<?xml version="1.0" encoding="windows-1252"?>
<node id="636590" title="excelPerl V0.12 released" created="2007-09-02 06:05:24" updated="2007-09-02 02:05:24">
<type id="11">
note</type>
<author id="123953">
strat</author>
<data>
<field name="doctext">
&lt;p&gt;Version 0.12 of excelPerl released&lt;/p&gt;
&lt;p&gt;Added: flags -begin and -end to be easily able to execute code before and after the loop&lt;/p&gt;
&lt;readmore&gt;

&lt;p&gt;excelPerl 0.12:&lt;/p&gt;
&lt;code&gt;
#! /usr/bin/perl
use warnings;
use strict;

our $VERSION = 0.12;

use Win32;          # automatically loaded with Activestate Perl
use File::Basename; # for PrintUsage
use Getopt::Long;   # for parsing program arguments
use FindBin;
use lib $FindBin::Bin;
use ExcelPerl;
use Win32::OLE::Const 'Microsoft Excel';

$|++; # no suffering from buffering

my( $file, $perlCode, $beginCode, $endCode );
my $worksheetNumber = 1;    # use first worksheet as default
my $headline        = 0;    # don't skip a headline as default
my $visible         = 0;    # is of no use in this version
my $colSeparator    = "\t"; # standard col separator for $_

GetOptions( 'file=s'       =&gt; \$file,
            'sheet=i'      =&gt; \$worksheetNumber,
            'headline=i'   =&gt; \$headline,
            'ane=s'        =&gt; \$perlCode,
            'begin=s'      =&gt; \$beginCode,
            'end=s'        =&gt; \$endCode,
#            'visible'      =&gt; \$visible,
            'colseparator' =&gt; \$colSeparator,
       )
    or die &amp;PrintUsage();

# check file and convert it to absolute path if necessary
defined $file or die &amp;PrintUsage();
$file = Win32::GetFullPathName( $file );

my $xlsPerl = ExcelPerl-&gt;new( excelFile       =&gt; $file,
                              worksheetNumber =&gt; $worksheetNumber,
                       )
    -&gt;open;

#if( $visible ) {
$xlsPerl-&gt;excel-&gt;{Visible} = 1;
#} # if

# skip all lines until -headline if available
$xlsPerl-&gt;currentRow( $headline ) if $headline;
$xlsPerl-&gt;colSeparator( $colSeparator );

my @F;

# try to evaluate begin block
if( $beginCode ) {
    eval &lt;&lt;"EOH";
no strict;
$beginCode;
EOH
    ;
    die "Error in begin: $@\n" if $@;
} # if

# evaluate the perl code from argument -ane ( thanks @pKai for this idea )
my $aneCode = eval "sub {
    my \$xlsPerl = shift;
    my \$C = shift;
    my \@C = \@\$C;

    # prepare some other helpful variables
    local \$. = \$xlsPerl-&gt;currentRow();
    no warnings \'uninitialized\';
    local \$_ = join( \$xlsPerl-&gt;colSeparator, \@F ) . \"\\n\";
    no strict;
    $perlCode;
}";
die $@ if $@;

while( my @cells = $xlsPerl-&gt;getNextRow( \@F ) ) {
    $aneCode-&gt;($xlsPerl, \@cells);
} # while

# try to evaluate begin block
if( $endCode ) {
    eval &lt;&lt;"EOH";
no strict;
$endCode;
EOH
    ;
    die "Error in begin: $@\n" if $@;
} # if

# ------------------------------------------------------------
sub PrintUsage {

    my $bin = File::Basename::basename($0);
    die &lt;&lt;"EOH";
Usage:
  $bin -sheet 1 -headline 1 -ane "print qq~\$.: \$F[0], \$F[1]\\n~" -file excelFile

try perldoc $bin for more details

EOH

} # PrintUsage
# ------------------------------------------------------------

=pod

=head1 excelPerl

You know about the perl parameters -ane? This script is a way to try
to do the same thing to Excel files.

=head1 Prerequisites

=over 1

=item Win32::OLE

=item MS Excel needs to be installed

=back

=head1 Description

Like perl -ane with a plaintext file, excelPerl.pl loops over an excel
file and automatically (-a) splits up the columns of one row into an
array with the name @F. Since @F is a ied array, by changing one
element of @F you change the content of the excel cells.

If you need one whole line with a tailing \n, you can use the variable
$_ (like perl -ne) which joins @F by the value given in the parameter
-colseparator (default: \t)

If you need to know the current line number (like $. for while), you
can use the special variable $. (big surprise).

=head1 Params:

  -file            String:  Name of excel file (full path!)
  -sheetnumber     Integer: which sheet shell I read (1..n, default: 1)
  -headline        Integer: line number of headline (used for skipping headline)
  -ane             String:  Perl-Code to execute
  -begin           String:  Perl-Code to be executed before the loop (BEGIN)
  -end             String:  Perl-Code to be executed after the loop (END)
  -colseparator    String:  how to join columns in $_ (default: \t)

Abbreviations of params are allowed, as long as they are unique, e.g

 excelPerl.pl -s 2 -h 1 -ane "print qq~$.: $F[0] $F[5]\n~" -f test.xls

 excelPerl.pl -ane "$F[0] =~ s/abc/def/" -f test.xls

 excelPerl.pl -ane "$C[0]-&gt;{Font}-&gt;{Name} = 'Courier New'" -f test.xls

 excelPerl.pl -ane "$x{$F[0]}++" ^
    -end "use Data::Dumper; print Dumper \%x" ^
    -f test.xls

 excelPerl.pl ^
    -begin "use Text::CSV_XS; $csv = Text::CSV_XS-&gt;new( { binary =&gt; 1} )" ^
    -ane "$csv-&gt;combine(@F); print $csv-&gt;string, $/" ^
    -f file.xls

(The ^ is the windows continuation line char; I used it in these examples
to avoid line wrappings; but if you write the whole parameters into one
line, don't use the dashes)

=head1 Special Variables:

  $.    Row number
  @F    Array containing the values of one complete row
  $_    Row joined by -colseparator, with \n at the end
  @C    Advanced: List of cells (e.g. to change a format). You may even
        use excel constants.

=head2 Some examples for $C[$i]-&gt; ($i is index of column):

  {Font}-&gt;{Name}           name of font: 'Courier New', 'Arial', ...
  {Font}-&gt;{FontStyle}      style of font: 'Bold Italic', ...
  {Font}-&gt;{Size}           font size: ... 10 11 12 ...
  {Font}-&gt;{Strikethrough}  strike through: 0 or 1
  {HorizontalAlignment}    align: xlCenter, xlRight, xlLeft, xlJustify
  {VerticalAlignment}      align vertically: xlCenter, xlTop, xlBottom
  {WrapText}               wrap text in line: 0 or 1
  {FormulaR1C1}            formula: "=CONCATENATE(RC[-1],$R[-1]C[-1])"
  {FormulaLocal}           formula: "=ANZAHL2(A2:G2)"


=head2 Object model of $C-&gt;[$i]-&gt;{...} (not complete, may differ):

  # try something like the following (as one line) to find out about
  # more properties:
  excelPerl.pl -ane "$cell = $C-&gt;[0]; for $k (sort keys %$cell ) {
   eval { print qq~$k =&gt; $cell-&gt;{$k}\n~} }; exit" -f excelfile.xls

  Application =&gt; {
    # lots of stuff, enhance the previous example with:
    excelPerl.pl -ane "my $cell = $C-&gt;[0]-&gt;{Application}; for $k ...
  },

  Font =&gt; {
    Background  =&gt; '' ???
    Bold        =&gt; 0 or 1,
    Color       =&gt; integer ???
    ColorIndex  =&gt; integer ???
    FontStyle   =&gt; 'Standard', ?????
    Italic      =&gt; 0 or 1,
    Name        =&gt; 'Arial',
    OutlineFont =&gt; 0 ???
    Shadow      =&gt; 0 ???
    Size        =&gt; 10, # Font Size
    Strikethrough =&gt; 0 or 1,
    Subscript     =&gt; 0 ???
    Superscript   =&gt; 0 ???
    Underline     =&gt; 2 ???
  },

=head2 Further but not yet tested properties for $C-&gt;[$i] with Excel 2003

  AddIndent =&gt; 0
  Address =&gt; $G$1
  AddressLocal =&gt; $G$1
  AllowEdit =&gt; 1
  Areas =&gt; Obj
  Borders =&gt; Obj
  Cells =&gt; Obj
  Characters =&gt; Obj
  Column =&gt; 7
  ColumnWith =&gt; 19.43
  Comment =&gt; ''
  Count =&gt; int
  Creator =&gt; ???
  CurrentRegion =&gt; Object
  Errors =&gt; Object
  FormatConditions =&gt; Win32::OLE=HASH(0x1d54b64)
  Formula =&gt; string
  FormulaArray =&gt; string
  FormulaHidden =&gt; 0
  FormulaLabel =&gt; -4142
  FormulaLocal =&gt; string
  FormulaR1C1 =&gt; string
  FormulaR1C1Local =&gt; string
  HasArray =&gt; 0
  HasFormula =&gt; 0
  Height =&gt; 12.75
  Hyperlinks =&gt; Object
  ID =&gt;
  IndentLevel =&gt; 0
  Interior =&gt; Object
  Left =&gt; 539.25
  ListHeaderRows =&gt; 0
  ListObject =&gt;
  Locked =&gt; 1
  MergeArea =&gt; Object
  MergeCells =&gt; 0
  Next =&gt; Object
  NumberFormat =&gt; Standard
  NumberFormatLocal =&gt; Standard
  Offset =&gt; Object
  Orientation =&gt; -4128
  Parent =&gt; Object
  Phonetic =&gt; Object
  Phonetics =&gt;
  PrefixCharacter =&gt;
  Previous =&gt; Object
  ReadingOrder =&gt; -5002
  Resize =&gt; Object
  Row =&gt; 1
  RowHeight =&gt; 12.75
  ShrinkToFit =&gt; 0
  SmartTags =&gt; Object
  SoundNote =&gt; Object
  Style =&gt; Object
  Text =&gt; Url
  Top =&gt; 0
  UseStandardHeight =&gt; 1
  UseStandardWidth =&gt; 0
  Validation =&gt; Object
  Value =&gt; Url
  Value2 =&gt; Url
  Width =&gt; 105.75
  Worksheet =&gt; Object
  XPath =&gt; Object

=head1 Bugs/...

This code is experimental and a lot of properties are not yet tested!

If you do changes, you better save the workbook manually, that's the
reason why the parameter -visible always is on and auto-save is off.

=head1 SEE ALSO

I heard about XLSperl from John Allen which is a great tool. But it
only allows to read excel files and not to change them on the fly.
Since I often need this feature, I decided to write it with
Win32::OLE, and it was not difficult. But unlike XLSperl, excelPerl
only runs under Windows and needs an installed version of Excel. But
starting with v0.10, it supports @C which is a list containing the
cells of the actual row. You can query or modify elements of @C to
get or set cell properties like format, alignment, ...
Starting with v0.12, it supports the args -begin and -end

You can find XLSPerl at L&lt;http://perl.jonallen.info/projects/xlstools&gt;

=head1 Author

Martin Fabiani L&lt;http://www.fabiani.net/&gt;

=cut

&lt;/code&gt;

&lt;p&gt;Module ExcelPerl.pm&lt;/p&gt;
&lt;code&gt;
package ExcelPerl;
use warnings;
use strict;
use Carp qw(croak);

our $VERSION = 0.12;

# use Readonly; # better not, since Readonly is no standard module yet
use Win32::OLE;
use Win32::OLE::Const 'Microsoft Excel';
use Win32::OLE::Variant;
Win32::OLE-&gt;Option( Warn =&gt; 3);

use vars qw( @ObjInterfaceMethods @F );

# which object interface methods shell be available as standard
#Readonly::Array: Readonly is not yet a standard module :-(
@ObjInterfaceMethods =
    qw( excelFile excel workBook worksheet colSeparator
        worksheetNumber currentRow lastRow maxCol changeCount
       );

# install object interface methods at startup
foreach my $method ( @ObjInterfaceMethods ) {
    no strict 'refs'; # Sub::Install is no standard module :-(
    *{ $method } = sub {
        my( $self, @values ) = @_;
        $self-&gt;{ $method } = $values[0] if scalar @values;
        return $self-&gt;{ $method };
    };
} # foreach

# ============================================================
sub new {
    my( $class, %params ) = @_;

    my $self = bless {}, $class;
    foreach my $param (keys %params) {
        $self-&gt;$param( $params{$param} );
    } # foreach

    return $self;
} # new
# ------------------------------------------------------------
sub increaseCurrentRow {
    my $self = shift;
    return $self-&gt;currentRow( 1 + $self-&gt;currentRow );
} # increaseCurrentRow
# ------------------------------------------------------------
sub increaseChangeCount {
    my $self = shift;

    if( defined $self-&gt;changeCount ) {
        $self-&gt;changeCount( 1 + $self-&gt;changeCount );
    } # if
    else {
        $self-&gt;changeCount( 1 );
    } # else

    return;
} # increaseChangeCount
# ------------------------------------------------------------
sub open {
    my( $self ) = @_;

    my $filename = $self-&gt;excelFile;
    defined $filename or croak "Error: no filename given";
    -f $filename or croak( "Error: filename '$filename' doesn't exist");

    my $excel; # try to re-use running instance of Excel
    eval { $excel = Win32::OLE-&gt;GetActiveObject( 'Excel.Application' ) };
    die "Error: no Excel installed\n" if $@;

    unless( defined $excel ) { # if not running, start excel
        $excel = Win32::OLE-&gt;new( 'Excel.Application', sub { $_[0]-&gt;Quit } )
            or die "Error: can't start Excel\n";
    } # unless
    $self-&gt;excel( $excel );

    $self-&gt;workBook( $excel-&gt;Workbooks-&gt;Open( $filename ) );

    $self-&gt;worksheet( $self-&gt;workBook-&gt;Worksheets( $self-&gt;worksheetNumber ) );

    my $range = $self-&gt;worksheet-&gt;UsedRange-&gt;{Value};
    $self-&gt;lastRow( $#{$range} + 1 );
    $self-&gt;currentRow( 0 );

    return $self;
} # open
# ------------------------------------------------------------
sub getNextRow {
    my( $self, $F ) = @_;

    $self-&gt;increaseCurrentRow;

    my $row   = $self-&gt;currentRow;
    my $sheet = $self-&gt;worksheet;
    return if $row &gt; $self-&gt;lastRow;

    # thanks to pKai for this idea:
    my $bottomRight =
        ( split( /:/, $self-&gt;worksheet-&gt;UsedRange-&gt;address(0,0) ) )[-1];
    my( $colNameMax, $rowNumMax ) = $bottomRight =~ /^([A-Z]+)(\d+)$/;
    $self-&gt;lastRow( $rowNumMax  );
    $self-&gt;maxCol ( $colNameMax );
    my $rowData = $sheet-&gt;Range("A$row:" . $self-&gt;maxCol . $row)-&gt;{Value};

    # prepare ole objects for return representing array of cells
    my @cells = ();
    for my $cell ( 'A' .. $self-&gt;maxCol ) {
        push( @cells, $sheet-&gt;Range( "$cell$row" ) );
    } # for

    tie( @$F, 'ExcelPerl::RowArray', $self );

    foreach my $value ( ref $rowData ? @{ $rowData-&gt;[0] } : $rowData ) {
        push( @$F, $value );
    } # foreach

    return @cells;
} # getNextRow
# ------------------------------------------------------------

# ============================================================
package ExcelPerl::RowArray;
# ------------------------------------------------------------
use Tie::Array;
use vars qw( @ISA );
@ISA   = qw( Tie::Array );
# ------------------------------------------------------------
sub TIEARRAY {
    my( $class, $excelObj ) = @_;

    my $self = bless( { data =&gt; [] }, $class );
    $self-&gt;{excelObj} = $excelObj;
    $self-&gt;{row} = $excelObj-&gt;currentRow;

    return $self;
} # TIEARRAY
# ------------------------------------------------------------
sub FETCH {
    my( $self, $index) = @_;
    return $self-&gt;{data}-&gt;[ $index ];
} # FETCH
# ------------------------------------------------------------
sub STORE {
    my( $self, $index, $newValue ) = @_;

    my $excelObj = $self-&gt;{excelObj};
    my $sheet    = $excelObj-&gt;worksheet;

    my $row      = $excelObj-&gt;currentRow;
    my $col      = $index + 1;
    my $oldValue = $self-&gt;{data}-&gt;[$index];

    if( ( not defined $oldValue and defined $newValue ) or
        ( defined $oldValue and not defined $newValue ) or
        ( $oldValue ne $newValue )
   ) {
        $excelObj-&gt;increaseChangeCount;
        $sheet   -&gt;Cells($row, $col)-&gt;{'Value' } = $newValue;
        $self    -&gt;{'data'}         -&gt;[ $index ] = $newValue;
    } # if

    return;
} # STORE
# ------------------------------------------------------------
sub FETCHSIZE {
    my $self = shift;
    return $#{ $self-&gt;{data} };
} # FETCHSIZE
# ------------------------------------------------------------
sub STORESIZE {
    my( $self, $newLength ) = @_;
    return $#{ $self-&gt;{data} } = $newLength;
} # STORESIZE
# ------------------------------------------------------------
sub PUSH {
    my $self = shift;
    return push( @{ $self-&gt;{data} }, @_ );
} # PUSH
# ------------------------------------------------------------
sub CLEAR {
    my( $self ) = @_;

    my $data = $self-&gt;{data};
    for my $i ( 0..$#{$data} ) {
        $self-&gt;STORE( $i, '' );
    } # for

    return;
} # CLEAR
# ------------------------------------------------------------
1; # modules have to return a true value
&lt;/code&gt;

&lt;!-- Node text goes above. Div tags should contain sig only --&gt;
&lt;div class="pmsig"&gt;&lt;div class="pmsig-123953"&gt;
&lt;P&gt;
Best regards,&lt;BR&gt;
perl -e "s&gt;&gt;*F&gt;e=&gt;y)\*martinF)stronat)=&gt;print,print v8.8.8.32.11.32"
&lt;/div&gt;&lt;/div&gt;</field>
<field name="root_node">
594230</field>
<field name="parent_node">
594230</field>
</data>
</node>
