Just thought I finish off the code by actually returning the hash back to perl...
#!/usr/bin/perl
use Inline C;
use Benchmark;
my $gen = "atgcgc"x500000; #3 million characters
my $h_ref;
$tests{"inline"} = sub { $h_ref = string_inline_c($gen, length($gen))
+};
$tests{"hash_string"} = sub { hash_string($gen) };
timethese(2, \%tests);
sub hash_string {
my ($genome) = @_;
my %count;
$count{ substr($genome, $_, 2) }++ for (0..length($genome)-2);
}
__END__
__C__
SV* string_inline_c(char *genome, int len)
{
int i;
int hash[96];
HV* perl_hash=newHV();
/* The hashing function is simply 4*(first char - 'a') + second ch
+ar - 'a' */
/* i.e. the bucket for gg is 4*('g'-'a')+'g'-'a' = 30 */
/*initialize our 'C' hash buckets which will get used*/
/*aa*/ /*ac*/ /*ag*/ /*at*/
hash[ 0] = hash[ 2] = hash[ 6] = hash[19] = 0;
/*ca*/ /*cc*/ /*cg*/ /*ct*/
hash[ 8] = hash[10] = hash[14] = hash[27] = 0;
/*ga*/ /*gc*/ /*gg*/ /*gt*/
hash[24] = hash[26] = hash[30] = hash[43] = 0;
/*ta*/ /*tc*/ /*tg*/ /*tt*/
hash[76] = hash[78] = hash[82] = hash[95] = 0;
for(i=0;i<len-1;i++)
{
hash[4*(genome[i]-'a')+(genome[i+1]-'a')]++;
}
/*move our values over from the 'C' hash to the perl hash*/
#define h(c,i) (hv_store(perl_hash, (c), sizeof((c))-1, newSViv(hash[(
+i)]), 0))
h("aa", 0); h("ac", 2); h("ag", 6); h("at",19);
h("ca", 8); h("cc",10); h("cg",14); h("ct",27);
h("ga",24); h("gc",26); h("gg",30); h("gt",43);
h("ta",76); h("tc",78); h("tg",82); h("tt",95);
return newRV_noinc((SV*) perl_hash); /*return a ref to a hash*/
}
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.