sub readbackwards_utf8 { # returns an iterator my ($fn, $window) = @_; die "Bad window $window" unless $window>=4; open my $fh, '<:raw', $fn or die "open $fn: $!"; my $curpos = -s $fh; return sub { if ( $curpos<1 ) { close $fh if $fh; $fh=undef; return } my $bytes = $curpos < $window ? $curpos : $window; seek($fh, $curpos-=$bytes, 0) or die "seek $curpos $fn: $!"; read($fh, my $buf, $bytes) == $bytes or die "read $bytes bytes at $curpos from $fn: $!"; while ( (ord(substr $buf, 0, 1) & 0b11000000)==0b10000000 ) { $buf = substr $buf, 1; $curpos++ } utf8::decode($buf); return $buf; } }