The upcoming MCE update 1.828 allows a hash_ref as input_data. MCE workers may be spawned early to prevent Perl from making extra copies. I made this for a fellow Monk who pinged me to look at this thread. The OP's case does not require parallelization though. However, impoved MCE if one were to process a big hash later on involving complex operations. MCE 1.828 will be released soon. Among other things, signal handling is improved. Also 14% reduction in memory consumption made possible by loading Symbol, Fcntl, and File::Path on demand.
#!/usr/bin/env perl
# Re^2: search for particular elements of hash with multiple values
# http://www.perlmonks.org/?node_id=1188593
use strict;
use warnings;
use feature qw ( say );
use MCE;
use constant {
_FOWPRIM => 0, _REVPRIM => 1, _BC_PAIR => 2, _BC_PNUM => 3
};
our $barcode_pair_35 = 'TTTT_AAAA';
# The core MCE API defaults to 1 for chunk_size.
my $mce = MCE->new(
max_workers => 4,
chunk_size => 8000,
user_func => sub {
my ($mce, $chunk_ref, $chunk_id) = @_;
for my $key ( keys %{ $chunk_ref } ) {
if ($chunk_ref->{$key}[_BC_PAIR] eq $barcode_pair_35) {
MCE->say("Found at $key");
MCE->gather($key);
}
}
}
)->spawn;
my $max = 1000000;
my $data =
[ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_CCCC', 0 ];
our %barcode_hash = map { $_ => $data } 1 .. $max - 2;
$barcode_hash{ ($max - 1) } =
[ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_AAAA', 0 ];
$barcode_hash{ $max } =
[ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_AAAA', 0 ];
my @found;
$mce->process( { gather => \@found }, \%barcode_hash );
$mce->shutdown;
# increment bc_pair_num field
$barcode_hash{$_}[_BC_PNUM]++ for @found;
# display summary and found keys
printf "Found %d keys\n", scalar @found;
say join ', ', sort { $a <=> $b } @found;