Here is a simple timing code to replicate the issue.
I couldn't find any large tables in public websites but I found one in Wikipedia with 162 rows that illustrates the problem. If you find one with 400+ you'll see it takes 3-4 seconds for obtaining the TDs of a TR.
#!/usr/bin/env perl
use strict;
use warnings;
use feature qw(say);
no warnings qw(experimental);
use Log::Log4perl qw(:easy);
use WWW::Mechanize::Chrome;
use Time::HiRes qw( gettimeofday tv_interval );
my $debug = 0;
my ($t0, $elapsed);
Log::Log4perl->easy_init($ERROR);
my $mech = WWW::Mechanize::Chrome->new(
headless => 0,
autodie => 0,
autoclose => 0
);
$mech->get('https://meta.wikimedia.org/wiki/Wikipedia_article_depth');
sleep(2);
my @nodes = $mech->xpath('//table');
$t0 = [gettimeofday];
my @rows = $mech->xpath('.//tr', node => $nodes[3]);
say 'xpath for TR tooK:'.tv_interval ( $t0 );
my @cell_keys = ( );
my @table_data = ( );
say "Timing for $#rows rows.";
foreach my $row_index (0 .. $#rows) {
my %row_data = ( );
# column names
if($row_index == 0){
$t0 = [gettimeofday];
my @cells = $mech->xpath('.//th', node => $rows[$row_index]);
say 'xpath for TH tooK:'.tv_interval ( $t0 );
foreach (0 ... $#cells) {
say "HEADER CELL: $_, VALUE:".$cells[$_]->get_text() if $d
+ebug;
push @cell_keys, $cells[$_]->get_text();
}
if($debug) {
say 'Column Names:';
say $_ foreach @cell_keys;
}
}
# data row
else{
$t0 = [gettimeofday];
my @cells = $mech->xpath('.//td', node => $rows[$row_index]);
say 'xpath for TD tooK:'.tv_interval ( $t0 );
say "DATA ROW: $row_index" if $debug;
foreach (0 ... $#cells) {
say "DATA CELL: $_, VALUE:" . $cells[$_]->get_text() if $d
+ebug;
$row_data{ $cell_keys[$_] } = $cells[$_]->get_text();
}
push @table_data, \%row_data;
if($debug) {
say 'Column Data:';
say $row_data{$_} foreach @cell_keys;
}
}
}
say Dumper(@table_data) if $debug;
Here are the results:
xpath for TR tooK:0.14171
Timing for 162 rows.
xpath for TH tooK:0.021609
xpath for TD tooK:0.878599
xpath for TD tooK:0.913126
xpath for TD tooK:0.9246
xpath for TD tooK:0.949696
xpath for TD tooK:0.97479
xpath for TD tooK:0.969646
xpath for TD tooK:0.980076
xpath for TD tooK:1.009137
xpath for TD tooK:1.001323
xpath for TD tooK:0.991992
xpath for TD tooK:1.023424
xpath for TD tooK:1.007205
xpath for TD tooK:1.015443
xpath for TD tooK:1.014373
xpath for TD tooK:1.022451
xpath for TD tooK:1.000317
xpath for TD tooK:1.009301
xpath for TD tooK:1.002823
xpath for TD tooK:1.018567
xpath for TD tooK:0.998177
xpath for TD tooK:1.003273
xpath for TD tooK:1.017177
xpath for TD tooK:1.00484
xpath for TD tooK:1.025769
xpath for TD tooK:1.019259
xpath for TD tooK:1.004221
xpath for TD tooK:0.978498
xpath for TD tooK:0.993305
xpath for TD tooK:0.981095
xpath for TD tooK:0.981023
xpath for TD tooK:1.007667
xpath for TD tooK:0.996014
xpath for TD tooK:0.989607
xpath for TD tooK:0.981084
xpath for TD tooK:0.998068
xpath for TD tooK:1.008075
xpath for TD tooK:0.995706
xpath for TD tooK:0.982913
xpath for TD tooK:0.994211
xpath for TD tooK:0.9854
xpath for TD tooK:0.986041
xpath for TD tooK:1.00003
xpath for TD tooK:1.000264
xpath for TD tooK:0.993049
xpath for TD tooK:0.990954
xpath for TD tooK:1.032756
xpath for TD tooK:1.009093
xpath for TD tooK:1.014658
xpath for TD tooK:1.017741
xpath for TD tooK:0.981517
xpath for TD tooK:1.055033
xpath for TD tooK:1.012932
xpath for TD tooK:1.00662
xpath for TD tooK:0.997766
xpath for TD tooK:0.976339
xpath for TD tooK:1.005549
xpath for TD tooK:0.994226
xpath for TD tooK:1.009729
xpath for TD tooK:1.016273
xpath for TD tooK:1.01258
xpath for TD tooK:0.991508
xpath for TD tooK:1.003135
xpath for TD tooK:0.996127
xpath for TD tooK:0.994465
xpath for TD tooK:0.984879
xpath for TD tooK:1.000486
xpath for TD tooK:1.013313
xpath for TD tooK:1.003019
xpath for TD tooK:1.010758
xpath for TD tooK:1.003538
xpath for TD tooK:1.005913
xpath for TD tooK:0.998934
xpath for TD tooK:1.002246
xpath for TD tooK:1.015106
xpath for TD tooK:1.030986
xpath for TD tooK:1.015739
xpath for TD tooK:1.007505
xpath for TD tooK:1.031035
xpath for TD tooK:1.022296
xpath for TD tooK:1.012498
xpath for TD tooK:1.012772
xpath for TD tooK:1.016398
xpath for TD tooK:0.998467
xpath for TD tooK:1.009135
xpath for TD tooK:1.023647
xpath for TD tooK:1.019091
xpath for TD tooK:1.011917
xpath for TD tooK:0.993233
xpath for TD tooK:1.00045
xpath for TD tooK:1.011704
xpath for TD tooK:0.999245
xpath for TD tooK:1.000345
xpath for TD tooK:1.002763
xpath for TD tooK:1.006262
xpath for TD tooK:1.003934
xpath for TD tooK:1.024795
xpath for TD tooK:1.017474
xpath for TD tooK:1.004507
xpath for TD tooK:0.986803
xpath for TD tooK:0.983318
xpath for TD tooK:0.97499
xpath for TD tooK:0.995466
xpath for TD tooK:0.974658
xpath for TD tooK:0.990834
xpath for TD tooK:1.006221
xpath for TD tooK:1.000403
xpath for TD tooK:0.975936
xpath for TD tooK:0.989006
xpath for TD tooK:1.001537
xpath for TD tooK:0.973566
xpath for TD tooK:0.990409
xpath for TD tooK:0.981671
xpath for TD tooK:0.997336
xpath for TD tooK:0.993645
xpath for TD tooK:0.996437
xpath for TD tooK:1.014808
xpath for TD tooK:0.999135
xpath for TD tooK:0.987786
xpath for TD tooK:1.012054
xpath for TD tooK:1.02549
xpath for TD tooK:1.010458
xpath for TD tooK:1.00814
xpath for TD tooK:1.015967
xpath for TD tooK:1.025007
xpath for TD tooK:1.013029
xpath for TD tooK:1.00664
xpath for TD tooK:0.997569
xpath for TD tooK:1.030964
xpath for TD tooK:0.996797
xpath for TD tooK:1.006234
xpath for TD tooK:0.982486
xpath for TD tooK:1.009541
xpath for TD tooK:0.990757
xpath for TD tooK:0.984965
xpath for TD tooK:0.993506
xpath for TD tooK:1.007426
xpath for TD tooK:0.997715
xpath for TD tooK:1.014092
xpath for TD tooK:0.980298
xpath for TD tooK:0.997222
xpath for TD tooK:0.982583
xpath for TD tooK:0.993896
xpath for TD tooK:1.000831
xpath for TD tooK:1.041912
xpath for TD tooK:1.012431
xpath for TD tooK:1.026563
xpath for TD tooK:1.046388
xpath for TD tooK:1.02357
xpath for TD tooK:0.990898
xpath for TD tooK:1.023132
xpath for TD tooK:0.998428
xpath for TD tooK:1.014679
xpath for TD tooK:1.00892
xpath for TD tooK:1.024412
xpath for TD tooK:1.008872
xpath for TD tooK:1.026012
xpath for TD tooK:1.024926
xpath for TD tooK:1.017763
xpath for TD tooK:0.995255
xpath for TD tooK:1.02212
xpath for TD tooK:1.017319
xpath for TD tooK:1.00299
Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
Read Where should I post X? if you're not absolutely sure you're posting in the right place.
Please read these before you post! —
Posts may use any of the Perl Monks Approved HTML tags:
- a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
|
For: |
|
Use: |
| & | | & |
| < | | < |
| > | | > |
| [ | | [ |
| ] | | ] |
Link using PerlMonks shortcuts! What shortcuts can I use for linking?
See Writeup Formatting Tips and other pages linked from there for more info.
|
|