#!/usr/bin/perl use Inline C; use Benchmark; my $gen = "atgcgc"x500000; #3 million characters my $h_ref; $tests{"inline"} = sub { $h_ref = string_inline_c($gen, length($gen)) }; $tests{"hash_string"} = sub { hash_string($gen) }; timethese(2, \%tests); sub hash_string { my ($genome) = @_; my %count; $count{ substr($genome, $_, 2) }++ for (0..length($genome)-2); } __END__ __C__ SV* string_inline_c(char *genome, int len) { int i; int hash[96]; HV* perl_hash=newHV(); /* The hashing function is simply 4*(first char - 'a') + second char - 'a' */ /* i.e. the bucket for gg is 4*('g'-'a')+'g'-'a' = 30 */ /*initialize our 'C' hash buckets which will get used*/ /*aa*/ /*ac*/ /*ag*/ /*at*/ hash[ 0] = hash[ 2] = hash[ 6] = hash[19] = 0; /*ca*/ /*cc*/ /*cg*/ /*ct*/ hash[ 8] = hash[10] = hash[14] = hash[27] = 0; /*ga*/ /*gc*/ /*gg*/ /*gt*/ hash[24] = hash[26] = hash[30] = hash[43] = 0; /*ta*/ /*tc*/ /*tg*/ /*tt*/ hash[76] = hash[78] = hash[82] = hash[95] = 0; for(i=0;i