my $valid_XML_BaseChars = join('', "\x{0041}-\x{005A}", # Uppercase A-Z "\x{0100}-\x{0131}", # Extended Latin A subset # Skipping ligatures 0132, 0133 "\x{0134}-\x{013E}", # Continuing Ext. Latin A # Skipping middle dots 013F, 0140 "\x{0141}-\x{0148}", # Finishing Ext. Latin A "\x{01FA}-\x{0217}", # Extended Latin B subset "\x{0250}-\x{02A8}", # IPA Extensions ); my $XML_BaseChar= qr/[$valid_xml_basechar]/o;