If you're really only going to be doing one column, but want it to be chosen by the variable $i,
I'd suggest substr: $respective_read_letter = substr $read_seq, $i, 1;. If finding an optimum solution
is important to you (ie, if you'll use this script many times for the forseeable future, rather than just once or twice
where "fast engouh" is fast enough), then I'd recommend Benchmarking the substr vs unpack vs
LanX's regex (and any others that are suggested). But whatever you do, make sure to use ++LanX's hash %count.
use warnings;
use strict;
use Benchmark qw/cmpthese/;
use Test::More tests => 1;
my @dataset = ();
push @dataset, join('', map { (qw/A C G T/)[rand 4] } 1 .. 30 ) for 1
+.. 1000;
my $i = $ARGV[0] // 10;
sub test {
my $fnref = shift;
my $count;
for my $read_seq( @dataset ) {
my $letter = $fnref->($read_seq, $i);
$count->{$letter}++;
}
return $count;
}
sub rfn {
test( sub {
my $skip = $_[1];
$_[0] =~ /.{$skip}(.)/;
return $1;
});
};
sub sfn {
test( sub {
substr $_[0], $_[1], 1;
});
};
sub ufn {
test( sub {
... # I'm no unpack expert
});
};
cmpthese(0, {
regex => \&rfn,
substr => \&sfn,
#unpack => \&ufn,
});
is_deeply rfn(), sfn(), 'same results';