It is possible to modify the code to get an Out of Memory error on MSYS2 5.36 and Strawberry Perl 5.20 and 5.38. This does not occur with a perlbrewed 5.36 on Ubuntu via WSL, nor Strawberry Perl 5.18.
Collating an array of lines from the in-memory file handle is sufficient. Commenting out the regex in the in-memory file handle loop makes the OOM go away, as does modifying the string before adding it to the array.
(I also modified the code to use a separate variable for the in-memory file handle. It has no effect but is arguably cleaner.)
#!/usr/bin/env perl
use warnings;
use strict;
use Time::HiRes qw( time );
use Devel::Peek;
my $file = shift @ARGV;
my ($fh, $time);
my (@arr1, @arr2);
my $use_dump = 0;
if (!$file) {
# should use Path::Tiny::tempfile
$file = 'tempfile.txt';
open my $ofh, '>', $file or die "Cannot open $file for writing, $!
+";
srand(1234567);
for my $i (0..200000) {
my $string = 'some random text ' . rand();
$string = $string x (1 + int (rand() * 10));
if (rand() < 0.163) {
$string = " Query${string}";
}
say {$ofh} $string;
}
$ofh->close or die "Cannot close $file, $!";
printf "%s is size %i Mb\n", $file, (-s $file) / (1028**2);
}
open $fh, "<", $file;
my $s = do {local $/ = undef; <$fh>};
seek $fh, 0, 0;
print "\n\n";
$time = time;
my $match_count1 = 0;
my $i1 = 0;
my $xx;
while(<$fh>) {
/^ ?Query/ && $match_count1 ++;
push @arr1, $_;
if ($use_dump and /^ Query/) {
Dump $_;
$i1 ++;
last if $i1 > 5;
}
}
printf "%f read lines from disk and do RE ($match_count1 matches).\n",
+ time - $time;
$fh->close;
open my $mfh, "<", \$s;
$time = time;
my $match_count2 = 0;
my $i2 = 0;
while(<$mfh>) {
# comment this out to avoid the OOM
/^ ?Query/ && $match_count2++;
#push @arr2, ($_ . ""); # avoids OOM
push @arr2, $_; # OOM!
if ($use_dump and /^ Query/) {
Dump $_;
$i2++;
last if $i2 > 5;
}
}
printf "%f read lines from in-memory file and do RE ($match_count2 mat
+ches).\n", time - $time;
$mfh->close;