Hi,
I'm using Spreadsheet::XLSX to parse the xlsx file. for around 20000 rows , the memory it takes around 800 MB, though size of file is 5 MB. Can anybody helps me to optimize it?
following is the code:
my ($self, $in_file, $out_file) = @_;
$count = @_;
$count -= 1;
if(2 != $count) {
util->logit("[$PARENT_PROC_NAME]: xlsx_to_csv:invalid args [@_
+] [$count]");
return $FAILURE;
}
my $excel = Spreadsheet::XLSX -> new ($in_file);
my $line;
my $out_csv_name = $out_file;
my $orig_out_file = $out_csv_name;
#binmode CSV;
binmode CSV, ":utf8";
my $Csv = Text::CSV_XS->new({
'quote_char' => '"',
'escape_char' => '"',
'sep_char' => ',',
'binary' => 1,
});
my $i = 1;
foreach my $sheet (@{$excel -> {Worksheet}}) {
$i++;
$out_csv_name = "$orig_out_file$underscore$sheet->{Name}.csv";
#$out_csv_name =~ s/\s+//g;
@ret_array[$i] = $out_csv_name;
open CSV, "> $out_csv_name" || die "Cannot create csv file: $!
+" ;
$sheet -> {MaxRow} ||= $sheet -> {MinRow};
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) {
my @Row;
$sheet -> {MaxCol} ||= $sheet -> {MinCol};
foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}
+) {
my $cell = $sheet -> {Cells} [$row] [$col];
my $Value = "";
if ($cell) {
#$Value = $cell->Value; #rounds off
#$Value = $cell->{_Value}; #rounds off
#$Value = $cell->Value(); #rounds off
#$Value = $cell->{Val};
$Value = $cell->unformatted();
#my $avl = $sheet->{Cells}[$row][$col]{Val};
#print "avl:[$avl]\n";
#$Value = $cell->value();
if ($Value eq 'GENERAL') {
$Value = $cell->{Val};
}
}
last if $col == $sheet->{MinCol} and !$Value;
push(@Row, $Value);
}
next unless @Row;
my $Status = $Csv->combine(@Row);
if (!defined $Status) {
my $Error = $Csv->error_input();
warn "ERROR FOUND!: $Error";
}
if (defined $Status) {
my $Line = $Csv->string();
print CSV "$Line\n";
}
I don't know how to do it...I'm new to perl. Kindly guide
me.
|