https://github.com/YordanGeorgiev/issue-tracker/blob/master/src/perl/issue_tracker/lib/IssueTracker/App/IO/In/ReaderXls.pm
# ------------------------------------------------------ # convert an excel file into a hash ref of hash ref of hash refs # ------------------------------------------------------ sub doReadXlsFileToHsr2 { my $self = shift; my $xls_file = shift; my $ret = 1; my $msg = "open the xls_file: $xls_file"; $objLogger->doLogDebugMsg($msg); my $formatter = Spreadsheet::ParseExcel::FmtJapan->new(); my $objXlsParser = 'Spreadsheet::ParseExcel'->new(); # my $objWorkbook = $objXlsParser->Parse( $xls_file , $formatte +r ); my $objWorkbook = $objXlsParser->Parse($xls_file); my $hsr2 = {}; # this is the data hash ref of hash refs # check if we are using Excel2007 open xml format if (!defined $objWorkbook) { # works too my $objConverter = () ; my $objConverter = Text::Iconv->new("utf-8", "utf-8"); # my $objConverter = (); $objWorkbook = Spreadsheet::XLSX->new($xls_file, $objConverter); # exit the whole application if there is no excel defined if (!defined $objWorkbook) { $msg = "cannot parse \$xls_file $xls_file $! $objXlsParser->erro +r()"; $objLogger->doLogErrorMsg("$msg"); return ($ret, $msg, {}); } } #eof if not $objWorkbook foreach my $worksheet (@{$objWorkbook->{Worksheet}}) { my $hsWorkSheet = {}; my $WorkSheetName = $worksheet->{'Name'}; next unless $WorkSheetName =~ m/^.*_issues$/g; $objLogger->doLogDebugMsg("foreach my worksheet: " . $WorkSheetNam +e) if ($module_trace == 1); my $RowMin = $worksheet->{'MinRow'}; my $RowMax = $worksheet->{'MaxRow'}; # my ( $RowMin, $RowMax) = $worksheet->row_range(); # my ( $MinCol, $MaxCold ) = $worksheet->col_range(); my $row_num = 0; for my $row ($RowMin .. $RowMax) { my $hsRow = {}; my $MinCol = $worksheet->{'MinCol'}; my $MaxCol = $worksheet->{'MaxCol'}; #debug print "MinCol::$MinCol , MaxCol::$MaxCol \n" ; my $col_num = 0; #print "row_num:: $row_num \n" ; for my $col ($MinCol .. $MaxCol) { # debug print "col_num:: $col_num \n" ; my $cell = $worksheet->{'Cells'}[$row][$col]; my $obj_header = $worksheet->{'Cells'}[0][$col]; my $header = $obj_header->unformatted(); my $token = ''; # to represent NULL in the sql unless (defined($cell)) { $token = 'NULL'; } else { # this one seems to return the value ONLY if # it is formateed properly with Ctrl + 1 # $token = $cell->Value(); # this one seems to return the value as it has been typed in +to ... $token = $cell->unformatted(); # this is must have !!! $token = decode('utf8', $token); # $token = $cell->{'Val'} ; my $encoding = $cell->encoding(); # debug print "token is :: " . $token . "\n" ; # debug print "encoding is :: " . $encoding . "\n" ; # debug print "is_utf8 " . is_utf8 ( $token ) ; # p($token); # and this one of those wtf moments ?! $token =~ s/\&gt;/\>/g; $token =~ s/\&lt;/\</g; $token =~ s/\&amp;/\&/g; } $hsRow->{$header} = $token; $col_num++; } #eof for col $hsWorkSheet->{"$row_num"} = $hsRow; $row_num++; # debug sleep 3 ; } #eof foreach row $hsr2->{"$WorkSheetName"} = $hsWorkSheet; # p($hsWorkSheet ); } $ret = 0; $msg = 'xls file parse OK'; return ($ret, $msg, $hsr2); }

In reply to Re^2: What to do when converting Excel-supplied data to Unicode by YordanGeorgiev
in thread What to do when converting Excel-supplied data to Unicode by davis

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.