#!/usr/bin/perl use strict; use warnings; use open ':locale'; # tell perl to use the $LANG environment encoding for STDOUT/IN/ERR # check 'man 3 open' for details; but this is quite important use HTML::Entities; # used to decode HTML &; entities use Encode qw(decode); # used to decode utf8/iso into perls internal representation (which is utf8) my $data = { 'href' => 'http://www.accountancyage.com/accountancyage/news/2159769/kpmg-sets-retail-think-tank', 'teaser' => '

AccountancyAge.com, Accountancy Age, Thursday 6 July 2006 at 00:00:00

Firm forms partnership with retail research group

KPMG has launched the ‘Retail Think Tank’ (RTT) aimed at establishing ‘the true health and status\' of the retail sector. The Big Four firm has joined forces with retail research group...

Read the full article

', 'title' => "KPMG sets up retail \x{e2}\x{80}\x{98}think tank\x{e2}\x{80}\x{99}", }; my $html = $data->{teaser}; decode_entities($html); # because it's html, we need to do this first $html = decode('utf8',$html); # now 'parse' the utf8 my $title = $data->{title}; # this is 'raw' utf8; the \x{e2} sequences indicate this $title = decode('utf8',$title); # so just parse it print "** $title:\n"; print "$html\n";