http://www.perlmonks.org?node_id=979542


in reply to Pointers needed to tweak a sort

#!/usr/bin/perl use warnings; use strict; # First, build the data structure. # I took the liberty of making this a standalone test-case so I can ru +n it. our $shows_str = shows_str(); our @showtab; for (split /^/, $shows_str) { /\S/ or next; /\A\s*(\S.*?)\s*\(([^()]+)\)/ or die "error parseing input line ($_)"; my %s = ( name => $1, user => $2, ); push @showtab, \%s; }
# Write a function to precompute all those boolean keys you want to # sort by. This should be an improvement because now the code has # each condition only once, not twice, so we reduced code duplication. # The key is just a string of "0" and "1" characters. For simplicity, # we store the key right into the record. our @networks = qw(ABC CBS FOX NBC SyFy TNT USA); our $network_string = join('|',@networks); sub show_sortflags { my($s) = @_; my @k; push @k, ( !($s->{user} =~ /LadyAleena_($network_string|TV)/ && $s->{name +} !~ /TV/), !($s->{user} =~ /LadyAleena_($network_string)/ && $s->{name} = +~ /TV shows$/), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'Premium TV s +hows'), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV shows'), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV networks' +), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Comedians|Mus +icians)/), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Horror|Scienc +e fiction)/), !($s->{user} eq 'Lady_Aleena' && $s->{name} eq 'Ripley\'s & Gu +inness'), !($s->{user} eq 'LadyAleena_home'), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Followers' bu +sinesses|List subscribers)/), ); my $k = join "", map { $_ ? "1" : "0" } @k; $$s{sortflag} = $k; } for my $s (@showtab) { show_sortflags($s); }
# Depending on what my_sort does, you might also want to transform it # in a similar way, to generate a key you can compare easily later, # such as storing the show title with the article stripped off, # casefolded. As I don't know what it does, I'll just use this stub # and leave that as a homework to the reader. sub my_sort { my($a, $b) = @_; $a cmp $b; }
# Now do the sort relying on the key we computed. Note that as # the compare function is still non-trivial, I don't write its # body in the same line as the sort, because that's ugly. sub show_compare { my($a, $b) = @_; return $$a{sortflag} cmp $$b{sortflag} || my_sort($$a{name}, $$b{name}, "article"); } our @showtab_sorted = sort { show_compare($a, $b) } @showtab;
# Print the results. I print the keys I've generated for # debugging, you normally wouldn't print those. print q{ ,-----------A !($s->{user} =~ /LadyAleena_($network_string|TV)/ && $s +->{name} !~ /TV/), | B !($s->{user} =~ /LadyAleena_($network_string)/ && $s->{ +name} =~ /TV shows$/), | C !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'Premi +um TV shows'), | ,--------D !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV sh +ows'), | | E !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV ne +tworks'), | | F !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Comedi +ans|Musicians)/), | | ,-----G !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Horror +|Science fiction)/), | | | H !($s->{user} eq 'Lady_Aleena' && $s->{name} eq 'Ripley\ +'s & Guinness'), | | | I !($s->{user} eq 'LadyAleena_home'), | | | ,--J !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Follow +ers' ...)/), v v v v ABCDEFGHIJ }; for my $s (@showtab_sorted) { print $$s{sortflag}, " ", $$s{name}, " (", $$s{user}, ")\n"; } # The sample input sub shows_str { q{ Doctor Who (LadyAleena_TV) Hercules & Xena (LadyAleena_TV) Highlander (LadyAleena_TV) Star Trek (LadyAleena_TV) Leverage (LadyAleena_TNT) Body of Proof (LadyAleena_ABC) In Plain Sight (LadyAleena_USA) Necessary Roughness (LadyAleena_USA) seaQuest (LadyAleena_NBC) Sanctuary (LadyAleena_SyFy) Fairly Legal (LadyAleena_USA) White Collar (LadyAleena_USA) MythBusters (LadyAleena_TV) Painkiller Jane (LadyAleena_SyFy) Jericho (LadyAleena_CBS) Harry's Law (LadyAleena_NBC) Chuck (LadyAleena_NBC) Grimm (LadyAleena_NBC) Once Upon a Time (LadyAleena_ABC) Primeval (LadyAleena_TV) Stargate (LadyAleena_SyFy) Eureka Warehouse13 Alphas (LadyAleena_SyFy) No Ordinary Family (LadyAleena_ABC) Buffy & Angel (LadyAleena_TV) Covert Affairs (LadyAleena_USA) Studio 60 (LadyAleena_NBC) Buck Rogers 25th Century (LadyAleena_NBC) Lipstick Jungle (LadyAleena_NBC) Crossing Jordan (LadyAleena_NBC) Law & Order (LadyAleena_NBC) Firefly (LadyAleena_FOX) Advtr of Brisco County Jr (LadyAleena_FOX) Babylon 5 (LadyAleena_TV) Haven (LadyAleena_SyFy) Falling Skies (LadyAleena_TNT) Bones & The Finder (LadyAleena_FOX) The Mercury Men (LadyAleena_SyFy) Andromeda (LadyAleena_SyFy) Burn Notice (LadyAleena_USA) Castle (LadyAleena_ABC) Numb3rs (LadyAleena_CBS) Rizzoli & Isles (LadyAleena_TNT) ER Third Watch Med Invgtn (LadyAleena_NBC) NBC TV shows (LadyAleena_NBC) USA TV shows (LadyAleena_USA) ABC TV shows (LadyAleena_ABC) TNT TV shows (LadyAleena_TNT) CBS TV shows (LadyAleena_CBS) FOX TV shows (LadyAleena_FOX) Premium TV shows (LadyAleena_TV) TV shows (LadyAleena_TV) TV networks (LadyAleena_TV) Musicians (Lady_Aleena) Comedians (Lady_Aleena) Science fiction (Lady_Aleena) Horror (Lady_Aleena) Ripley's & Guinness (Lady_Aleena) Utilities (LadyAleena_home) Groceries (LadyAleena_home) Software (LadyAleena_home) Stores (LadyAleena_home) List subscribers (Lady_Aleena) Followers' businesses (Lady_Aleena) } }; __END__

Output:

,-----------A !($s->{user} =~ /LadyAleena_($network_string|TV)/ && $s +->{name} !~ /TV/), | B !($s->{user} =~ /LadyAleena_($network_string)/ && $s->{ +name} =~ /TV shows$/), | C !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'Premi +um TV shows'), | ,--------D !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV sh +ows'), | | E !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV ne +tworks'), | | F !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Comedi +ans|Musicians)/), | | ,-----G !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Horror +|Science fiction)/), | | | H !($s->{user} eq 'Lady_Aleena' && $s->{name} eq 'Ripley\ +'s & Guinness'), | | | I !($s->{user} eq 'LadyAleena_home'), | | | ,--J !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Follow +ers' ...)/), v v v v ABCDEFGHIJ 0111111111 Advtr of Brisco County Jr (LadyAleena_FOX) 0111111111 Andromeda (LadyAleena_SyFy) 0111111111 Babylon 5 (LadyAleena_TV) 0111111111 Body of Proof (LadyAleena_ABC) 0111111111 Bones & The Finder (LadyAleena_FOX) 0111111111 Buck Rogers 25th Century (LadyAleena_NBC) 0111111111 Buffy & Angel (LadyAleena_TV) 0111111111 Burn Notice (LadyAleena_USA) 0111111111 Castle (LadyAleena_ABC) 0111111111 Chuck (LadyAleena_NBC) 0111111111 Covert Affairs (LadyAleena_USA) 0111111111 Crossing Jordan (LadyAleena_NBC) 0111111111 Doctor Who (LadyAleena_TV) 0111111111 ER Third Watch Med Invgtn (LadyAleena_NBC) 0111111111 Eureka Warehouse13 Alphas (LadyAleena_SyFy) 0111111111 Fairly Legal (LadyAleena_USA) 0111111111 Falling Skies (LadyAleena_TNT) 0111111111 Firefly (LadyAleena_FOX) 0111111111 Grimm (LadyAleena_NBC) 0111111111 Harry's Law (LadyAleena_NBC) 0111111111 Haven (LadyAleena_SyFy) 0111111111 Hercules & Xena (LadyAleena_TV) 0111111111 Highlander (LadyAleena_TV) 0111111111 In Plain Sight (LadyAleena_USA) 0111111111 Jericho (LadyAleena_CBS) 0111111111 Law & Order (LadyAleena_NBC) 0111111111 Leverage (LadyAleena_TNT) 0111111111 Lipstick Jungle (LadyAleena_NBC) 0111111111 MythBusters (LadyAleena_TV) 0111111111 Necessary Roughness (LadyAleena_USA) 0111111111 No Ordinary Family (LadyAleena_ABC) 0111111111 Numb3rs (LadyAleena_CBS) 0111111111 Once Upon a Time (LadyAleena_ABC) 0111111111 Painkiller Jane (LadyAleena_SyFy) 0111111111 Primeval (LadyAleena_TV) 0111111111 Rizzoli & Isles (LadyAleena_TNT) 0111111111 Sanctuary (LadyAleena_SyFy) 0111111111 Star Trek (LadyAleena_TV) 0111111111 Stargate (LadyAleena_SyFy) 0111111111 Studio 60 (LadyAleena_NBC) 0111111111 The Mercury Men (LadyAleena_SyFy) 0111111111 White Collar (LadyAleena_USA) 0111111111 seaQuest (LadyAleena_NBC) 1011111111 ABC TV shows (LadyAleena_ABC) 1011111111 CBS TV shows (LadyAleena_CBS) 1011111111 FOX TV shows (LadyAleena_FOX) 1011111111 NBC TV shows (LadyAleena_NBC) 1011111111 TNT TV shows (LadyAleena_TNT) 1011111111 USA TV shows (LadyAleena_USA) 1101111111 Premium TV shows (LadyAleena_TV) 1110111111 TV shows (LadyAleena_TV) 1111011111 TV networks (LadyAleena_TV) 1111101111 Comedians (Lady_Aleena) 1111101111 Musicians (Lady_Aleena) 1111110111 Horror (Lady_Aleena) 1111110111 Science fiction (Lady_Aleena) 1111111011 Ripley's & Guinness (Lady_Aleena) 1111111101 Groceries (LadyAleena_home) 1111111101 Software (LadyAleena_home) 1111111101 Stores (LadyAleena_home) 1111111101 Utilities (LadyAleena_home) 1111111110 Followers' businesses (Lady_Aleena) 1111111110 List subscribers (Lady_Aleena)

Update: some random nodes with sorts (in case you want to find keywords to search for) are Schwartzian Transform, Schwartzian Transform, Benchmark, -s versus schwartzian, Sorting an array of arrays by field, Re: Spiraling integers. I think none of these really tell about sorting using multiple keys, some boolean and some textual though.

Update: However, the Sort::Key CPAN module does seem to offer a way to sort using multiple keys of different types, so check that out. It might be the easiest solution.

Update: removed use 5.014; – I think all of that should work fine in older perls too. But people, come on, can't you upgrade yet?

Replies are listed 'Best First'.
Re^2: Pointers needed to tweak a sort
by Lady_Aleena (Priest) on Jul 02, 2012 at 21:55 UTC
    sub my_sort { my ($c,$d,$type) = @_; if ($c =~ /^index/) { return -1; } elsif ($d =~ /^index/) { return 1; } else { if ($c =~ /^ssi/) { return -1; } elsif ($d =~ /^ssi/) { return 1; } else { if ($type =~ /article/) { #written mostly by kent/n in #perl on freenode. for ($c, $d) { s/<.+?>//g; s/\s*\b(A|a|An|an|The|the)(_|\s)//xi; decode_entities($_); } $c = ucfirst($c) if $c =~ /^\l./; $d = ucfirst($d) if $d =~ /^\l./; $c =~ s/\B([A-Z])/\L$1/g; $d =~ s/\B([A-Z])/\L$1/g; if ($c =~ /^\d+$/ and $d =~ /^\d+$/) { return $c <=> $d; } else { return $c cmp $d; } } elsif ($type =~ /name/) { for ($c,$d) { s/\|.+$//; $_ = join(' ', (reverse split(/(?:_|\s)(?=[^_\s]+$)/, $_,2)) +) if $_ !~ /^_/; s/^_//; s/^(A|a|An|an|The|the)(_|\s)//; $_ = lc; } return $c cmp $d; } else { die(qq($type is not valid in the sort.)); } } } }

    my_sort strips off articles and alphabetizes numbers as numbers (2100 AFTER 22). The index and ssi sorting are there until I finally wipe out all .ssi files from my site, which may never happen since I'm low on imagination for coding those pages. It also alphabetizes names last name then first name. This was written a long time ago with help.

    Have a cookie and a very nice day!
    Lady Aleena
Re^2: Pointers needed to tweak a sort
by Lady_Aleena (Priest) on Jul 04, 2012 at 05:40 UTC

    ambrus, I hope I'm using this as you intended. I separated out the subroutines into a module, and use them where ever I need them. I use hashes of hashes instead of arrays of hashes, but I did not have to rewrite your code to make it work, though I renamed the subroutines and moved @networks and $network_string inside the list_sortflags subroutine.

    package Twitter::ListSort; use strict; use warnings FATAL => qw( all ); use base 'Exporter'; our @EXPORT_OK = qw(list_sortflags list_compare); # Written by ambrus on PerlMonks. use Base::Nifty qw(my_sort); # Write a function to precompute all those boolean keys you want to # sort by. This should be an improvement because now the code has # each condition only once, not twice, so we reduced code duplication. # The key is just a string of "0" and "1" characters. For simplicity, # we store the key right into the record. sub list_sortflags { my($s) = @_; my @networks = qw(ABC CBS FOX NBC SyFy TNT USA); my $network_string = join('|',@networks); my @k; push @k, ( !($s->{user} =~ /LadyAleena_($network_string|TV)/ && $s->{name} !~ + /TV/), !($s->{user} =~ /LadyAleena_($network_string)/ && $s->{name} =~ /T +V shows$/), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'Premium TV shows +'), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV shows'), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV networks'), !($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV authors'), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Comedians|Musicia +ns)/), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Horror|Science fi +ction)/), !($s->{user} eq 'Lady_Aleena' && $s->{name} eq 'Ripley\'s & Guinne +ss'), !($s->{user} eq 'LadyAleena_home'), !($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Followers' busine +sses|List subscribers)/), ); my $k = join "", map { $_ ? "1" : "0" } @k; $$s{sortflag} = $k; } # Now do the sort relying on the key we computed. Note that as # the compare function is still non-trivial, I don't write its # body in the same line as the sort, because that's ugly. sub list_compare { my($a, $b) = @_; return $$a{sortflag} cmp $$b{sortflag} || my_sort($$a{name}, $$b{name}, "article"); } 1;

    This is how it is used in a script.

    #!/usr/bin/perl use strict; use warnings FATAL => qw( all ); use lib '../files/perl/lib'; use Base::Data qw(data_file get_hash); use Twitter qw(twitter_accounts); use Twitter::ListSort qw(list_sortflags list_compare); my %lists; for my $account (twitter_accounts) { my $account_list_file = data_file("Twitter/users/$account","lists.tx +t"); if (-s $account_list_file) { my %sublists = get_hash( file => $account_list_file, headings => [qw(id slug name user members mem_change subscribers + sub_change status)], ); for (keys %sublists) { next if $sublists{$_}{status} ne 'owner'; $lists{$_} = $sublists{$_}; } } } for my $list (values %lists) { list_sortflags($list); } for my $list (sort { list_compare($a,$b) } values %lists) { local $\ = "\n"; my $name = $list->{name}; my $user = $list->{user}; print "$name ($user)"; }
    Have a cookie and a very nice day!
    Lady Aleena

      By the way, if you wanted only up to one of those conditions to match, then you don't need all this complication with a string of zeros and ones. Just have the key be a single number you just compare with spaceship. Something like this, untested:

      ... sub list_sortflags { my($s) = @_; my @networks = ...; my $network_string = ...; my $k; if ($s->{user} =~ /LadyAleena_($network_string|TV)/ && $s->{name} !~ + /TV/) { $k = 1; } elsif ($s->{user} =~ /LadyAleena_($network_string)/ && $s->{name} =~ + /TV shows$/) { $k = 2; } elsif ($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'Premium TV sh +ows') { $k = 3; } elsif ($s->{user} eq 'LadyAleena_TV' && $s->{name} eq 'TV shows') { $k = 4; } ... elsif ($s->{user} eq 'Lady_Aleena' && $s->{name} =~ /(Followers' bus +inesses|List subscribers)/) { $k = 11; } else { $k = 12; } $$s{sortflag} = $k; } ... sub list_compare { my($a, $b) = @_; return $$a{sortflag} <=> $$b{sortflag} || my_sort($$a{name}, $$b{name}, "article"); } ...