#!/usr/bin/perl
use strict;
use warnings;
use Benchmark qw(:all);
use Data::Dumper;

# Dump counts? 1=yes, 0=no
my $showall = 1;

my $letters = join("", "A" .. "Z", "a" .. "z");
my @words;
my %counth;
my @counta;
my @countg;

sub do_hash {
    %counth = ();
    for my $word (@words) {
        $word = lc $word;
        $counth{$word}++;
    }
}

sub do_array {
    @counta = ();
    WORD: for my $word (@words) {
        $word = lc $word;
        for my $index (0 .. $#counta) {
            if ($counta[$index][0] eq $word) {
                $counta[$index][1]++;
                next WORD;
            }
        }
        push @counta, [ $word, 1 ];
    }
}

sub do_greparray {
    @countg = ();
    WORD: for my $word (@words) {
        $word = lc $word;
        my @entries = grep { $word eq $$_[1] } @countg;
        if (@entries) {
            $entries[0][1]++;
        }
        else {
            push @countg, [ $word, 1 ];
        }
    }
}

sub make_word {
    # Half the time, reuse a word
    if (@words and 0.5 > rand) {
        return $words[int rand @words];
    }

    my $len = int( rand 3 * rand 3 )+1;
    my $word;
    $word .= substr($letters, int(rand length $letters), 1) for 1 .. $len;
    return $word;
}

for my $num_words (100, 1000, 10000, 100000, 1000000) {
    push @words, make_word() for 1 .. $num_words;
    my $num_iterations = 1000000 / $num_words;

    print "\n***** Comparing a list of $num_words words "
    .     "$num_iterations times *****\n\n";
    cmpthese($num_iterations, {
        'hash'=>\&do_hash,
        'array'=>\&do_array,
        'greparray'=>\&do_greparray,
    });
}

=h1

# For debugging, enable this chunk (by removing the =h1 and =cut lines).
# It'll show any differences between the final run of the three methods.

for my $ar (@counta) {
    my ($word, $cnta) = @$ar;
    my $cnth = $counth{$word};
    my @ag = grep { $word eq $$_[0] } @countg;
    my $cntg = $ag[0][1] // -1;
    next if $showall or $cnta != $cnth or $cnta != $cntg;
    printf "%-10.10s % 6u % 6u % 6u\n",
           $word, $cnth, $cnta, $cntg;
}

=cut