# If using a Perl version prior to v5.16, comment out the "use feature" line, # and uncomment the BEGIN{...} block. use feature ':5.16'; #BEGIN { # die "Must install Unicode::CaseFold." if ! eval "use Unicode::CaseFold; 1;"; #} use strict; use warnings FATAL => 'utf8'; use utf8; use charnames ':full'; use Unicode::Normalize qw(NFD NFC); binmode STDOUT, ':encoding(UTF-8)'; my $string = "λαάὰὸς"; while ( $string =~ m/(?\X)/g ) { my $grapheme = $+{grapheme}; print explain( $+{grapheme} ), "\n"; } sub explain { my $grapheme = shift; my %pri = decompose( $grapheme ); my %base = decompose( $pri{base} ); my $output = <<"END_OUTPUT"; Grapheme:($grapheme) Dec, Hex, Name: [$pri{cp}], [$pri{hex_str}], '$pri{name}' Case: (Fold,Lower,Upper): ($pri{fc}), ($pri{lc}), ($pri{uc}) Grapheme Base: ($pri{base}), [$base{hex_str}], '$base{name}' END_OUTPUT foreach my $extend ( @{$pri{comb}} ) { my %ext = decompose( $extend ); my $grapheme = fc $ext{grapheme}; $output .= <<"END_OUTPUT"; Combining Mark: ($grapheme ) Dec, Hex, Name: [$ext{cp}], [$ext{hex_str}], '$ext{name}' END_OUTPUT } return $output; } sub decompose { my $grapheme = shift; my $decomp = NFD( $grapheme ); my $cp = ord $grapheme; my ( $base ) = substr($decomp, 0, 1 ); my ( @comb ) = map { substr $decomp, $_, 1 } 1 .. length($decomp)-1; return ( grapheme => $grapheme, cp => $cp, hex_str => sprintf( "%#0.4x", $cp ), name => charnames::viacode( $cp ), lc => lc $grapheme, uc => uc $grapheme, fc => fc $grapheme, base => $base, comb => [ @comb ], ); }