use 5.008000; use strict; use warnings; use Test::More 'no_plan'; use Encode qw( encode ); use HTML::Strip qw( ); use constant ENCODING => 'iso-latin-1'; sub html_to_text { my ($html) = @_; my $stripper = HTML::Strip->new(); return $stripper->parse($html); } sub chars_to_bytes { my ($encoding, $text) = @_; return encode($encoding, $text); } { foreach ( [ 'blah', 'blah' ], [ 'Ü --', 'Ü --' ], [ 'blah -- ’ -- blah', 'blah -- ? -- blah' ], [ 'Ü -- ’ -- blah', 'Ü -- ? -- blah' ], ) { my $html = $_->[0]; my $expect = $_->[1]; my $text = chars_to_bytes(ENCODING, html_to_text($html)); is($text, $expect, $html); } }