Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

Exploring IEEE754 floating point bit patterns.

by BrowserUk (Pope)
on Jul 28, 2012 at 05:05 UTC ( #984141=CUFP: print w/ replies, xml ) Need Help??

Of the 2**64 possible bit patterns a double can take on only 99.90234375% of them are used to store normal (normalised) floating point values (including +0 & -0).

Of the rest, some 18,014,398,509,481,984 bit patterns:

  • two are used to represent +infinity and -infinity.

    Numbers too big or too small to be represented respectively.

  • 9,007,199,254,740,988 are used to represent denormalised real values.

    In decimal, 3.14159e0 is normalised. Whereas 0.314159e1 & 31.4159e-1 represent the same number, but are denormalised.

  • 18,442,240,474,082,181,118 are used to represent signalling NANs. Half positive; half negative.

    When produced by a mathematical operation, signalling NaNs cause an immediate trap. They represent invalid operations.

  • 18,442,240,474,082,181,117 are used to represent quiet NANs. Half positive; half-1 negative.

    When produced, quiet NaNs don't immediately trap, but rather propogate through subsequent operations they are involved in. They represent indeterminate results.

  • And the remaining 1 is assigned (by the MSC compiler at least) to "indeterminate" -1.#IND.

    There doesn't appear to be a positive 1.#IND (on MSC).

    Numerically, this sits in the negative qNaN range -- which are all "indeterminate".

    Quite why this one value -- from the 18 quintillion -- is singled out for special treatment I have yet to discover.

#! perl -slw use strict; no warnings 'portable'; use constant { POS_ZERO => 0b0_00000000000_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, POS_DENORM_1ST => 0b0_00000000000_0000_00000000_00000000_00000000_000 +00000_00000000_00000001, POS_DENORM_LST => 0b0_00000000000_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, POS_NORM_1ST => 0b0_00000000001_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, POS_NORM_LST => 0b0_11111111110_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, POS_INF => 0b0_11111111111_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, POS_SNAN_1ST => 0b0_11111111111_0000_00000000_00000000_00000000_000 +00000_00000000_00000001, POS_SNAN_LST => 0b0_11111111111_0111_11111111_11111111_11111111_111 +11111_11111111_11111111, POS_QNAN_1ST => 0b0_11111111111_1000_00000000_00000000_00000000_000 +00000_00000000_00000000, POS_QNAN_LST => 0b0_11111111111_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, NEG_ZERO => 0b1_00000000000_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, NEG_DENORM_1ST => 0b1_00000000000_0000_00000000_00000000_00000000_000 +00000_00000000_00000001, NEG_DENORM_LST => 0b1_00000000000_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, NEG_NORM_1ST => 0b1_00000000001_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, NEG_NORM_LST => 0b1_11111111110_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, NEG_INF => 0b1_11111111111_0000_00000000_00000000_00000000_000 +00000_00000000_00000000, NEG_SNAN_1ST => 0b1_11111111111_0000_00000000_00000000_00000000_000 +00000_00000000_00000001, NEG_SNAN_LST => 0b1_11111111111_0111_11111111_11111111_11111111_111 +11111_11111111_11111111, NEG_IND => 0b1_11111111111_1000_00000000_00000000_00000000_000 +00000_00000000_00000000, NEG_QNAN_1ST => 0b1_11111111111_1000_00000000_00000000_00000000_000 +00000_00000000_00000001, NEG_QNAN_LST => 0b1_11111111111_1111_11111111_11111111_11111111_111 +11111_11111111_11111111, }; printf "%23.16g : %016x\n", unpack( 'd', pack 'Q', $_ ), $_ for POS_ZERO, POS_DENORM_1ST, POS_DENORM_LST, POS_NORM_1ST, POS_NORM_L +ST, POS_INF, POS_SNAN_1ST, POS_SNAN_LST, POS_QNAN_1ST, POS_QNAN_LST, NEG_ZERO, NEG_DENORM_1ST, NEG_DENORM_LST, NEG_NORM_1ST, NEG_NORM_L +ST, NEG_INF, NEG_SNAN_1ST, NEG_SNAN_LST, NEG_IND, NEG_QNAN_1ST, NEG_QNAN_LST;

Produces:

C:\test>ieee.pl 0 : 0000000000000000 4.940656458412465e-324 : 0000000000000001 2.225073858507201e-308 : 000fffffffffffff 2.225073858507201e-308 : 0010000000000000 1.797693134862316e+308 : 7fefffffffffffff 1.#INF : 7ff0000000000000 1.#SNAN : 7ff0000000000001 1.#SNAN : 7ff7ffffffffffff 1.#QNAN : 7ff8000000000000 1.#QNAN : 7fffffffffffffff -0 : 8000000000000000 -4.940656458412465e-324 : 8000000000000001 -2.225073858507201e-308 : 800fffffffffffff -2.225073858507201e-308 : 8010000000000000 -1.797693134862316e+308 : ffefffffffffffff -1.#INF : fff0000000000000 -1.#SNAN : fff0000000000001 -1.#SNAN : fff7ffffffffffff -1.#IND : fff8000000000000 -1.#QNAN : fff8000000000001 -1.#QNAN : ffffffffffffffff

For completeness, here is the equivelent code in C:

#include <stdio.h> #include <float.h> typedef unsigned __int64 U64; U64 dblPatns[] = { 0x0000000000000000, // +zero 0x0000000000000001, // +denorm lo 0x000fffffffffffff, // +denorm hi 0x0010000000000000, // +norm lo 0x7fefffffffffffff, // +norm hi 0x7ff0000000000000, // +infinity 0x7ff0000000000001, // +sNAN lo 0x7ff7ffffffffffff, // +sNAN hi 0x7ff8000000000000, // +qNAN lo 0x7fffffffffffffff, // +qNAN hi 0x8000000000000000, // -zero 0x8000000000000001, // -denorm lo 0x800fffffffffffff, // -denorm hi 0x8010000000000000, // -norm lo 0xffefffffffffffff, // -norm hi 0xfff0000000000000, // -infinity 0xfff0000000000001, // -sNAN lo 0xfff7ffffffffffff, // -sNAN hi 0xfff8000000000000, // -IND ?? 0xfff8000000000001, // -qNAN lo 0xffffffffffffffff // -qNAN hi }; double asDouble( U64 u ) { return *(double*)&u; } char *FPClassAsText( int class ) { switch( class ) { case _FPCLASS_SNAN: return "Signaling NaN"; break; case _FPCLASS_QNAN: return "Quiet NaN"; break; case _FPCLASS_NINF: return "Negative infinity (–INF)"; break; case _FPCLASS_NN : return "Negative normalized non-zero"; bre +ak; case _FPCLASS_ND : return "Negative denormalized"; break; case _FPCLASS_NZ : return "Negative zero (-0)"; break; case _FPCLASS_PZ : return "Positive zero (+0)"; break; case _FPCLASS_PD : return "Positive denormalized"; break; case _FPCLASS_PN : return "Positive normalized non-zero"; bre +ak; case _FPCLASS_PINF: return "Positive infinity (+INF)"; break; default: return "Never happen; but silence the warn +ing"; } } int main( int argc, char **argv ) { int i; for( i = 0; i < sizeof( dblPatns ) / sizeof( U64 ); ++i ) printf( "%016I64x : % 23.16g %s\n", dblPatns[ i ], asDouble( dblPatns[ i ] ), FPClassAsText( _fpclass( asDouble( dblPatns[ i ] ) ) ) ); }

Which produces:

C:\test>qnan 0000000000000000 : 0 Positive zero (+0) 0000000000000001 : 4.940656458412465e-324 Positive denormalized 000fffffffffffff : 2.225073858507201e-308 Positive denormalized 0010000000000000 : 2.225073858507201e-308 Positive normalized non-zer +o 7fefffffffffffff : 1.797693134862316e+308 Positive normalized non-zer +o 7ff0000000000000 : 1.#INF Positive infinity (+INF) 7ff0000000000001 : 1.#SNAN Signaling NaN 7ff7ffffffffffff : 1.#SNAN Signaling NaN 7ff8000000000000 : 1.#QNAN Quiet NaN 7fffffffffffffff : 1.#QNAN Quiet NaN 8000000000000000 : -0 Negative zero (-0) 8000000000000001 : -4.940656458412465e-324 Negative denormalized 800fffffffffffff : -2.225073858507201e-308 Negative denormalized 8010000000000000 : -2.225073858507201e-308 Negative normalized non-zer +o ffefffffffffffff : -1.797693134862316e+308 Negative normalized non-zer +o fff0000000000000 : -1.#INF Negative infinity (-INF) fff0000000000001 : -1.#SNAN Signaling NaN fff7ffffffffffff : -1.#SNAN Signaling NaN fff8000000000000 : -1.#IND Quiet NaN fff8000000000001 : -1.#QNAN Quiet NaN ffffffffffffffff : -1.#QNAN Quiet NaN

Note how the oddball -1.#IND is labeled as "Quiet NaN" by the runtime library _fpclass() routine.


With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.

The start of some sanity?

Comment on Exploring IEEE754 floating point bit patterns.
Select or Download Code
Re: Exploring IEEE754 floating point bit patterns.
by roboticus (Canon) on Jul 28, 2012 at 12:05 UTC

    4.120300081267965e+103:

    Pretty nifty!

    2.914708259813678e+257

    When your only tool is a hammer, all problems look like your thumb.

    P.S. Too bad my perl isn't compiled for 64 bits. I had to use the hack below, as my (un)pack-fu isn't strong. (If I were going to redo any of perls features, it would probably be to make a comprehensible pack/unpack facility.)

    $ cat t.cpp #include <stdio.h> #include <string.h> union f { char s[32]; double d; } g; int main(int, char **s) { memcpy(g.s, s[1], sizeof(g)); printf("%23.16g\n", g.d); }

    I think I'll play around with it to make it accept bitstrings.

      Too bad my perl isn't compiled for 64 bits.

      Try this version:

      #! perl -slw use strict; use constant { POS_ZERO => '0'.'00000000000'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', POS_DENORM_1ST => '0'.'00000000000'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000001', POS_DENORM_LST => '0'.'00000000000'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', POS_NORM_1ST => '0'.'00000000001'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', POS_NORM_LST => '0'.'11111111110'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', POS_INF => '0'.'11111111111'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', POS_SNAN_1ST => '0'.'11111111111'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000001', POS_SNAN_LST => '0'.'11111111111'.'0111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', POS_QNAN_1ST => '0'.'11111111111'.'1000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', POS_QNAN_LST => '0'.'11111111111'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', NEG_ZERO => '1'.'00000000000'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', NEG_DENORM_1ST => '1'.'00000000000'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000001', NEG_DENORM_LST => '1'.'00000000000'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', NEG_NORM_1ST => '1'.'00000000001'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', NEG_NORM_LST => '1'.'11111111110'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', NEG_INF => '1'.'11111111111'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', NEG_SNAN_1ST => '1'.'11111111111'.'0000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000001', NEG_SNAN_LST => '1'.'11111111111'.'0111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', NEG_IND => '1'.'11111111111'.'1000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000000', NEG_QNAN_1ST => '1'.'11111111111'.'1000'.'00000000'.'00000000'.'000 +00000'.'00000000'.'00000000'.'00000001', NEG_QNAN_LST => '1'.'11111111111'.'1111'.'11111111'.'11111111'.'111 +11111'.'11111111'.'11111111'.'11111111', }; sub bitsToDouble{ unpack 'd', pack 'b64', scalar reverse $_[0] } sub bitsToInts{ reverse unpack 'VV', pack 'b64', scalar reverse $_[0 +] } printf "%23.16g : %08x%08x\n", bitsToDouble( $_ ), bitsToInts( $_ ) for POS_ZERO, POS_DENORM_1ST, POS_DENORM_LST, POS_NORM_1ST, POS_NORM_L +ST, POS_INF, POS_SNAN_1ST, POS_SNAN_LST, POS_QNAN_1ST, POS_QNAN_LST, NEG_ZERO, NEG_DENORM_1ST, NEG_DENORM_LST, NEG_NORM_1ST, NEG_NORM_L +ST, NEG_INF, NEG_SNAN_1ST, NEG_SNAN_LST, NEG_IND, NEG_QNAN_1ST, NEG_QNAN_LST;

      Outputs:

      C:\test>\perl32\bin\perl ieee-32b.pl 0 : 0000000000000000 4.940656458412465e-324 : 0000000000000001 2.225073858507201e-308 : 000fffffffffffff 2.225073858507201e-308 : 0010000000000000 1.797693134862316e+308 : 7fefffffffffffff 1.#INF : 7ff0000000000000 1.#QNAN : 7ff0000000000001 1.#QNAN : 7ff7ffffffffffff 1.#QNAN : 7ff8000000000000 1.#QNAN : 7fffffffffffffff -0 : 8000000000000000 -4.940656458412465e-324 : 8000000000000001 -2.225073858507201e-308 : 800fffffffffffff -2.225073858507201e-308 : 8010000000000000 -1.797693134862316e+308 : ffefffffffffffff -1.#INF : fff0000000000000 -1.#QNAN : fff0000000000001 -1.#QNAN : fff7ffffffffffff -1.#IND : fff8000000000000 -1.#QNAN : fff8000000000001 -1.#QNAN : ffffffffffffffff

      Seems that the compiler used to build AS 5.8.9 didn't distinguish between SNaN & QNaN. But the oddball -1.#IND was there. Perhaps its presence in the current compiler is legacy code.

      The C version above should compile fine for 32-bit.


      With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
      Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
      "Science is about questioning the status quo. Questioning authority".
      In the absence of evidence, opinion is indistinguishable from prejudice.

      The start of some sanity?

        BrowserUk:

        That one worked nicely:

        marco@Boink:~ $ perl 984255_bitvec_floats_2.pl 0 : 0000000000000000 4.940656458412465e-324 : 0000000000000001 2.225073858507201e-308 : 000fffffffffffff 2.225073858507201e-308 : 0010000000000000 1.797693134862316e+308 : 7fefffffffffffff inf : 7ff0000000000000 nan : 7ff0000000000001 nan : 7ff7ffffffffffff nan : 7ff8000000000000 nan : 7fffffffffffffff -0 : 8000000000000000 -4.940656458412465e-324 : 8000000000000001 -2.225073858507201e-308 : 800fffffffffffff -2.225073858507201e-308 : 8010000000000000 -1.797693134862316e+308 : ffefffffffffffff -inf : fff0000000000000 -nan : fff0000000000001 -nan : fff7ffffffffffff -nan : fff8000000000000 -nan : fff8000000000001 -nan : ffffffffffffffff

        ...roboticus

        When your only tool is a hammer, all problems look like your thumb.

Re: Exploring IEEE754 floating point bit patterns.
by syphilis (Canon) on Jul 29, 2012 at 04:35 UTC
    Very interesting stuff.
    They now provide 5 separate symbols for a nan - negative and positive versions of both the signalling and non-signalling nan, plus an extra one (-1.#IND) just in case you are not already sufficiently confused.
    Actually, make that 10 separate symbols, as the 5 nan symbols that appear in the output of BrowserUk's qnan.c, under some circumstances, will be suffixed with '0' ('00' in the case of -1.#IND).

    I wonder why so many symbols are needed ?
    Just one nan symbol would be enough for me - as it is (eg)for the mpfr library.

    It's also interesting that,of my Microsoft compilers (6.0, 7.0 and 8.0), none of them produce the same output as BrowserUk's 9.0 compiler.
    My 8.0 comes closest - the only discrepancy being that it outputs '0' for negative zero instead of '-0'.
    My 6.0 and 7.0 compilers also display negative zero as '0'. Additionally they don't provide the 1.#SNAN symbols - choosing to show them as 1.#QNAN instead, and designating all of the nans as "quiet" nans.

    My gcc (MinGW) compilers (versions 3.4.5, 4.5.2 and 4.7.0) all do the right thing with negative zero but it's only 4.7.0 that goes to the trouble of providing the 1.#SNAN signalling nans.

    It's a pity that the consistency is lacking - but the real puzzle for me is how to predict which of the various 'nans' will be produced by a specific operation.

    An example:
    #include <stdio.h> #include <math.h> int main(void) { double x = 1.0; double y = 0.0; double inf = x /y; printf("%f %f\n", inf / inf, -(inf / inf)); return 0; }
    For that program, my 8.0 MS compiler outputs:
    -1.#IND00 -1.#IND00

    but my gcc-4.5.2 produces:
    -1.#IND00 1.#QNAN0

    Which one is doing it correctly ?

    What operations will result in a signalling nan ?

    Cheers
    Rob
      Actually, make that 10 separate symbols, as the 5 nan symbols that appear in the output of BrowserUk's qnan.c, under some circumstances, will be suffixed with '0' ('00' in the case of -1.#IND).I wonder why so many symbols are needed ?

      The suffixing of zeros to the symbols is just an artifact of using "%f" printf format, which implicitly is equivalent to "%.6f" and therefore requires that the fractional part be padded with trailing zeros to 6 places.

      If you adjust the template, you can see this. (The printf from your code above + output):

      printf("%f %f\n", inf / inf, -(inf / inf)); // -1.#IND00 -1. +#IND00 printf("%.4f %.4f\n", inf / inf, -(inf / inf)); // -1.#IND -1.#I +ND printf("%.10f %.10f\n", inf / inf, -(inf / inf)); // -1.#IND000000 + -1.#IND000000

      I've found %g more useful:

      printf("%g %g\n", inf / inf, -(inf / inf)); // -1.#IND -1.#IN +D
      It's also interesting that,of my Microsoft compilers (6.0, 7.0 and 8.0), none of them produce the same output as BrowserUk's 9.0 compiler.

      The changes are (roughly) in line with the timing of the compiler releases relative to the requirements of the various ANSI C standards C89/C90/C99/C11; and as affected by the updates to the IEEE754 (1987) standard and its revision in 2004. (Roughly:)

      For that program, my 8.0 MS compiler outputs: -1.#IND00 -1.#IND00 but my gcc-4.5.2 produces: -1.#IND00 1.#QNAN0 Which one is doing it correctly ?

      You'll hate this answer. Effectively, they both are!.

      The problem is, the computer scientists and the mathematicians can't agree on a single Function definition with respect to FP math.

      The problem is two-fold:

      1. FP math isn't like real math (Real in the mathematical sense).

        Eg. FP is not always associative:

        print +( 10.0**20 + 1 ) - 10.0**20;; 0 print +( 10.0**20 - 10.0**20 ) + 1;; 1
      2. Mathematicians have conventions for the results of some operations that don't quite work arithmetically.

        Eg. infinity**0 == 1!

        These allow their operations on the continuous Real domain to avoid singularity points

      You pays your money and makes your choice.

      What operations will result in a signalling nan ?

      From what I've been able to discover, none of them! (At least as far as MSC v9 is concerned.)

      The problem here is that the IEEE-754 allows for traps, but doesn't require them. See Signaling NaNs

      My take on what I see coming out of my attempts to produce NaNs with MSC(v9), is that they settled on using -1.#IND as the result of any math operation that is (for want of a better phrase) 'not well defined'. eg. sqrt( -1 ), 0/0; fmod( 1, 0 ) etc.


      With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
      Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
      "Science is about questioning the status quo. Questioning authority".
      In the absence of evidence, opinion is indistinguishable from prejudice.

      The start of some sanity?

        BrowserUk, many thanks for this thread.
        I'm currently trying to make sense of what happens wrt a PDL test script (t/pdl_from_string.t) on MS Windows, and this has helped enormously.

        I now have a clear view of what needs to be done.

        Cheers,
        Rob

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: CUFP [id://984141]
Approved by marto
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (7)
As of 2014-10-01 23:56 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    What is your favourite meta-syntactic variable name?














    Results (41 votes), past polls