moritz basically implemented a similar idea (that I formulated incorrectly in my message, btw). I think it will be better to use the unicode collation module advised above.
#!/usr/bin/env perl
use warnings;
use strict;
use Sort::External;
use Unicode::Collate::Locale;
my $in = shift // 'large-unsorted.txt';
my $out = shift // 'sorted.txt';
my $comparator = Unicode::Collate::Locale->new(locale =>'vi');
my $sorter = Sort::External->new
(
sortsub => sub { $comparator->cmp($Sort::External::a, $Sort::Externa
+l::b) }
);
open my $unsorted, '<', $in or die $!;
$sorter->feed($_) while <$unsorted>;
$sorter->finish(outfile => $out);
|