Beefy Boxes and Bandwidth Generously Provided by pair Networks
Pathologically Eclectic Rubbish Lister
 
PerlMonks  

ISONUM Entities

by Sixtease (Friar)
on Nov 17, 2007 at 20:27 UTC ( [id://651446]=CUFP: print w/replies, xml ) Need Help??

Some texts contain weird SGML entities: ] \ etc. I found out that these are ISONUM entities. Here is a hash that maps the names to the unicode characters, extracted from the table linked above. It is ready to be used in the HTML::Entities::_decode_entities function.
{ 'amp' => chr(0x00026), 'amp;' => chr(0x00026), 'apos' => chr(0x00027), 'apos;' => chr(0x00027), 'ast' => chr(0x0002A), 'ast;' => chr(0x0002A), 'brvbar' => chr(0x000A6), 'brvbar;' => chr(0x000A6), 'bsol' => chr(0x0005C), 'bsol;' => chr(0x0005C), 'cent' => chr(0x000A2), 'cent;' => chr(0x000A2), 'colon' => chr(0x0003A), 'colon;' => chr(0x0003A), 'comma' => chr(0x0002C), 'comma;' => chr(0x0002C), 'commat' => chr(0x00040), 'commat;' => chr(0x00040), 'copy' => chr(0x000A9), 'copy;' => chr(0x000A9), 'curren' => chr(0x000A4), 'curren;' => chr(0x000A4), 'darr' => chr(0x02193), 'darr;' => chr(0x02193), 'deg' => chr(0x000B0), 'deg;' => chr(0x000B0), 'divide' => chr(0x000F7), 'divide;' => chr(0x000F7), 'dollar' => chr(0x00024), 'dollar;' => chr(0x00024), 'equals' => chr(0x0003D), 'equals;' => chr(0x0003D), 'excl' => chr(0x00021), 'excl;' => chr(0x00021), 'frac12' => chr(0x000BD), 'frac12;' => chr(0x000BD), 'frac14' => chr(0x000BC), 'frac14;' => chr(0x000BC), 'frac18' => chr(0x0215B), 'frac18;' => chr(0x0215B), 'frac34' => chr(0x000BE), 'frac34;' => chr(0x000BE), 'frac38' => chr(0x0215C), 'frac38;' => chr(0x0215C), 'frac58' => chr(0x0215D), 'frac58;' => chr(0x0215D), 'frac78' => chr(0x0215E), 'frac78;' => chr(0x0215E), 'gt' => chr(0x0003E), 'gt;' => chr(0x0003E), 'half' => chr(0x000BD), 'half;' => chr(0x000BD), 'horbar' => chr(0x02015), 'horbar;' => chr(0x02015), 'hyphen' => chr(0x02010), 'hyphen;' => chr(0x02010), 'iexcl' => chr(0x000A1), 'iexcl;' => chr(0x000A1), 'iquest' => chr(0x000BF), 'iquest;' => chr(0x000BF), 'laquo' => chr(0x000AB), 'laquo;' => chr(0x000AB), 'larr' => chr(0x02190), 'larr;' => chr(0x02190), 'lcub' => chr(0x0007B), 'lcub;' => chr(0x0007B), 'ldquo' => chr(0x0201C), 'ldquo;' => chr(0x0201C), 'lowbar' => chr(0x0005F), 'lowbar;' => chr(0x0005F), 'lpar' => chr(0x00028), 'lpar;' => chr(0x00028), 'lsqb' => chr(0x0005B), 'lsqb;' => chr(0x0005B), 'lsquo' => chr(0x02018), 'lsquo;' => chr(0x02018), 'lt' => chr(0x0003C), 'lt;' => chr(0x0003C), 'micro' => chr(0x000B5), 'micro;' => chr(0x000B5), 'middot' => chr(0x000B7), 'middot;' => chr(0x000B7), 'nbsp' => chr(0x000A0), 'nbsp;' => chr(0x000A0), 'not' => chr(0x000AC), 'not;' => chr(0x000AC), 'num' => chr(0x00023), 'num;' => chr(0x00023), 'ohm' => chr(0x02126), 'ohm;' => chr(0x02126), 'ordf' => chr(0x000AA), 'ordf;' => chr(0x000AA), 'ordm' => chr(0x000BA), 'ordm;' => chr(0x000BA), 'para' => chr(0x000B6), 'para;' => chr(0x000B6), 'percnt' => chr(0x00025), 'percnt;' => chr(0x00025), 'period' => chr(0x0002E), 'period;' => chr(0x0002E), 'plus' => chr(0x0002B), 'plus;' => chr(0x0002B), 'plusmn' => chr(0x000B1), 'plusmn;' => chr(0x000B1), 'pound' => chr(0x000A3), 'pound;' => chr(0x000A3), 'quest' => chr(0x0003F), 'quest;' => chr(0x0003F), 'quot' => chr(0x00022), 'quot;' => chr(0x00022), 'raquo' => chr(0x000BB), 'raquo;' => chr(0x000BB), 'rarr' => chr(0x02192), 'rarr;' => chr(0x02192), 'rcub' => chr(0x0007D), 'rcub;' => chr(0x0007D), 'rdquo' => chr(0x0201D), 'rdquo;' => chr(0x0201D), 'reg' => chr(0x000AE), 'reg;' => chr(0x000AE), 'rpar' => chr(0x00029), 'rpar;' => chr(0x00029), 'rsqb' => chr(0x0005D), 'rsqb;' => chr(0x0005D), 'rsquo' => chr(0x02019), 'rsquo;' => chr(0x02019), 'sect' => chr(0x000A7), 'sect;' => chr(0x000A7), 'semi' => chr(0x0003B), 'semi;' => chr(0x0003B), 'shy' => chr(0x000AD), 'shy;' => chr(0x000AD), 'sol' => chr(0x0002F), 'sol;' => chr(0x0002F), 'sung' => chr(0x0266A), 'sung;' => chr(0x0266A), 'sup1' => chr(0x000B9), 'sup1;' => chr(0x000B9), 'sup2' => chr(0x000B2), 'sup2;' => chr(0x000B2), 'sup3' => chr(0x000B3), 'sup3;' => chr(0x000B3), 'times' => chr(0x000D7), 'times;' => chr(0x000D7), 'trade' => chr(0x02122), 'trade;' => chr(0x02122), 'uarr' => chr(0x02191), 'uarr;' => chr(0x02191), 'verbar' => chr(0x0007C), 'verbar;' => chr(0x0007C), 'yen' => chr(0x000A5), 'yen;' => chr(0x000A5), }

Replies are listed 'Best First'.
Re: ISONUM Entities
by jdporter (Paladin) on Nov 17, 2007 at 21:27 UTC

    That seems about twice as verbose as it needs to be.

    { map { my $hr = $_; map { ( $_, chr($hr->{$_}), $_.';', chr($hr->{$_}) +) } keys %$hr } { amp => 0x00026, apos => 0x00027, ast => 0x0002A, brvbar => 0x000A6, bsol => 0x0005C, cent => 0x000A2, colon => 0x0003A, comma => 0x0002C, commat => 0x00040, copy => 0x000A9, curren => 0x000A4, darr => 0x02193, deg => 0x000B0, divide => 0x000F7, dollar => 0x00024, equals => 0x0003D, excl => 0x00021, frac12 => 0x000BD, frac14 => 0x000BC, frac18 => 0x0215B, frac34 => 0x000BE, frac38 => 0x0215C, frac58 => 0x0215D, frac78 => 0x0215E, gt => 0x0003E, half => 0x000BD, horbar => 0x02015, hyphen => 0x02010, iexcl => 0x000A1, iquest => 0x000BF, laquo => 0x000AB, larr => 0x02190, lcub => 0x0007B, ldquo => 0x0201C, lowbar => 0x0005F, lpar => 0x00028, lsqb => 0x0005B, lsquo => 0x02018, lt => 0x0003C, micro => 0x000B5, middot => 0x000B7, nbsp => 0x000A0, not => 0x000AC, num => 0x00023, ohm => 0x02126, ordf => 0x000AA, ordm => 0x000BA, para => 0x000B6, percnt => 0x00025, period => 0x0002E, plus => 0x0002B, plusmn => 0x000B1, pound => 0x000A3, quest => 0x0003F, quot => 0x00022, raquo => 0x000BB, rarr => 0x02192, rcub => 0x0007D, rdquo => 0x0201D, reg => 0x000AE, rpar => 0x00029, rsqb => 0x0005D, rsquo => 0x02019, sect => 0x000A7, semi => 0x0003B, shy => 0x000AD, sol => 0x0002F, sung => 0x0266A, sup1 => 0x000B9, sup2 => 0x000B2, sup3 => 0x000B3, times => 0x000D7, trade => 0x02122, uarr => 0x02191, verbar => 0x0007C, yen => 0x000A5, } }
    A word spoken in Mind will reach its own level, in the objective world, by its own weight

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: CUFP [id://651446]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others surveying the Monastery: (5)
As of 2024-04-24 22:17 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found