When in order to call this:
void _24to32( SV* packed ) {
IS_VARS;
char *pp = SvPVX( packed );
_4BY6 up;
int i;
IS_RESET;
for( i=0; i<24; i+=3 ) {
up.packed = _byteswap_ulong( *(unsigned long*)&pp[ i ] );
IS_PUSHUV( up.u.a );
IS_PUSHUV( up.u.b );
IS_PUSHUV( up.u.c );
IS_PUSHUV( up.u.d );
}
IS_DONE;
return;
}
You have to go through this lot:
void XS_main__24to32(register PerlInterpreter* my_perl , CV* cv);
void XS_main__24to32(register PerlInterpreter* my_perl , CV* cv)
{
extern int Perl___notused ; SV **sp = (*Perl_Istack_sp_ptr(((PerlI
+nterpreter *)Perl_get_context()))); I32 ax = (*(*Perl_Imarkstack_ptr_
+ptr(((PerlInterpreter *)Perl_get_context())))--); register SV **mark
+= (*Perl_Istack_base_ptr(((PerlInterpreter *)Perl_get_context()))) +
+ax++; I32 items = (I32)(sp - mark);
#line 179 "_24to32.c"
if (items != 1)
Perl_croak_xs_usage(((PerlInterpreter *)Perl_get_context()), cv
+,"packed");
((void)ax);
sp -= items;
{
SV * packed = (*Perl_Istack_base_ptr(((PerlInterpreter *)Perl_g
+et_context())))[ax + (0)];
#line 117 "_24to32.xs"
I32* temp;
#line 188 "_24to32.c"
#line 119 "_24to32.xs"
temp = (*Perl_Imarkstack_ptr_ptr(((PerlInterpreter *)Perl_get_cont
+ext())))++;
_24to32(packed);
if ((*Perl_Imarkstack_ptr_ptr(((PerlInterpreter *)Perl_get_context
+()))) != temp) {
(*Perl_Imarkstack_ptr_ptr(((PerlInterpreter *)Perl_get_context()
+))) = temp;
do { do { const IV tmpXSoff = (0); (*Perl_Istack_sp_ptr(((PerlIn
+terpreter *)Perl_get_context()))) = (*Perl_Istack_base_ptr(((PerlInte
+rpreter *)Perl_get_context()))) + ax + (tmpXSoff - 1); return; } whil
+e (0); } while (0);
}
return;
#line 199 "_24to32.c"
(*Perl_Istack_sp_ptr(((PerlInterpreter *)Perl_get_context()))) = s
+p;
return;
}
}
and this lot:
void _24to32( SV* packed ) {
SV **sp = (*Perl_Istack_sp_ptr(((PerlInterpreter *)Perl_get_contex
+t()))); I32 ax = (*(*Perl_Imarkstack_ptr_ptr(((PerlInterpreter *)Perl
+_get_context())))--); register SV **mark = (*Perl_Istack_base_ptr(((P
+erlInterpreter *)Perl_get_context()))) + ax++; I32 items = (I32)(sp -
+ mark);
char *pp = ((packed)->sv_u.svu_pv);
_4BY6 up;
int i;
sp = mark;
for( i=0; i<24; i+=3 ) {
up.packed = _byteswap_ulong( *(unsigned long*)&pp[ i ] );
do { do { if ((*Perl_Istack_max_ptr(((PerlInterpreter *)Perl_g
+et_context()))) - sp < (int)(1)) { sp = Perl_stack_grow(((PerlInterpr
+eter *)Perl_get_context()), sp,sp,(int) (1)); } } while (0); (*++sp =
+ (Perl_sv_2mortal(((PerlInterpreter *)Perl_get_context()), Perl_newSV
+uv(((PerlInterpreter *)Perl_get_context()), up.u.a)))); } while (0);
do { do { if ((*Perl_Istack_max_ptr(((PerlInterpreter *)Perl_g
+et_context()))) - sp < (int)(1)) { sp = Perl_stack_grow(((PerlInterpr
+eter *)Perl_get_context()), sp,sp,(int) (1)); } } while (0); (*++sp =
+ (Perl_sv_2mortal(((PerlInterpreter *)Perl_get_context()), Perl_newSV
+uv(((PerlInterpreter *)Perl_get_context()), up.u.b)))); } while (0);
do { do { if ((*Perl_Istack_max_ptr(((PerlInterpreter *)Perl_g
+et_context()))) - sp < (int)(1)) { sp = Perl_stack_grow(((PerlInterpr
+eter *)Perl_get_context()), sp,sp,(int) (1)); } } while (0); (*++sp =
+ (Perl_sv_2mortal(((PerlInterpreter *)Perl_get_context()), Perl_newSV
+uv(((PerlInterpreter *)Perl_get_context()), up.u.c)))); } while (0);
do { do { if ((*Perl_Istack_max_ptr(((PerlInterpreter *)Perl_g
+et_context()))) - sp < (int)(1)) { sp = Perl_stack_grow(((PerlInterpr
+eter *)Perl_get_context()), sp,sp,(int) (1)); } } while (0); (*++sp =
+ (Perl_sv_2mortal(((PerlInterpreter *)Perl_get_context()), Perl_newSV
+uv(((PerlInterpreter *)Perl_get_context()), up.u.d)))); } while (0);
}
(*Perl_Istack_sp_ptr(((PerlInterpreter *)Perl_get_context()))) = s
+p;
return;
}
You really don't see any opportunities for some radical optimisations?
And remember, that is positively lightweight compared to the unoptimised C code produced for all of Perl's internal opcodes. Existing C compilers may be able to optimise some of that lot away on a function-by-function basis, but how much?
And now consider the possibilities of allowing the optimiser to look at *all* the internal functions and look for really radical optimisations?
Consider the possibilities of LTO and whole program analysis to lift whole chunks of that boiler plate above up to the runloop level on a per interpreter basis?
Then consider the possibilities of JIT noticing that the context doesn't (cannot) change across a whole swath of runtime code and reserving a register, say one of the unused segment registers, for it and using register relative addressing for each interpreter?
And then consider that LLVM doesn't have to follow C rules. It can invent weird stuff that C compilers (and C programmers) wouldn't even think of -- see my earlier example of it converting (at the IF level) an array of 52 shorts into a single 832-bit integer.
What might it do with the whole SVt_* type hierarchy? Imagine (for sake of totally speculative example) that it could reduce *all* the SV flag tests & sets to a single, simple bit manipulation at some calculated bit offset into a huge integer. Is that possible? Would it result in substantial savings if used uniformly throughout the code base?
Does any of this intrigue you? Even if only so that you can say: I told you so?
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
RIP Neil Armstrong
/div/div