<?xml version="1.0" encoding="windows-1252"?>
<node id="1006825" title="How to handle UTF-8 content with Win32::IE::Mechanize" created="2012-12-03 04:15:26" updated="2012-12-03 04:15:26">
<type id="115">
perlquestion</type>
<author id="961">
Anonymous Monk</author>
<data>
<field name="doctext">
&lt;p&gt;I am using Win32::IE::Mechanize to access a web page that is encoded in UTF-8.&lt;/p&gt;&lt;br&gt;

&lt;p&gt;However, when I try to access data in the DOM model that includes unicode characters these are returned as question mark characters (HEX 3F).&lt;/p&gt;&lt;br&gt;

&lt;p&gt;Any help would be very much appreciated. Sample code is below:&lt;/p&gt;&lt;br&gt;

&lt;code&gt;
use strict;
use warnings;
use File::BOM;
use Win32::IE::Mechanize;
use Time::HiRes qw( usleep gettimeofday tv_interval stat );
use utf8;

# create Win32::IE::Mechanize object
my $mech = Win32::IE::Mechanize-&gt;new(visible =&gt; 1);

# open the URL
$mech-&gt;get('http://kr.yahoo.com/'); 

sleep (10);

# get the DOM document
my $doc = $mech-&gt;{agent}-&gt;Document;

# get the webpage title
my $title = $doc-&gt;title;

# create a utf-8 text file
open DEBUGFILE, "&gt;:via(File::BOM):encoding(UTF-8)", "debug.txt" or die $!;

# write the title to file
print DEBUGFILE "Title:" . $title . "\n";

# write the title length to the file
print DEBUGFILE "Title Length:" . length ($title) . "\n";

# write the hex byte string of the title to the file
print DEBUGFILE "Title Hex Byte String:" . unpack("H48", $title) . "\n";
&lt;/code&gt;

&lt;p&gt;Code output is:&lt;/p&gt;&lt;br&gt;

&lt;code&gt;
Title:??! ???
Title Length:7
Title Hex Byte String:3f3f21203f3f3f
&lt;/code&gt;</field>
<field name="reputation">
15</field>
</data>
</node>
