my @aWords; # words from the abstract my %hWords; # maps words to their id my $iUniqueWordsSoFar; # cheap way to assign ids my @aIds; # sliding window with ids from last N-M words for (0..($#aWords-$n)) { #look up/assign word id my $sWord = $aWords[$_]; my $iWord; if (exists($hWords{$sWord})) { $iWord = $hWords{$sWord}; } else { $iWord = $hWords{$sWord} = ++$iUniqueWordsSoFar; } # update sliding window of ids for last M words shift(@aIds) if scalar(@aIds); push @aIds, $iWord; # add key to hash for N..M length phrases by taking # first X elements of sliding window to construct # the key. } # final pass: convert what is left in @aIds to keys and # update appropriate phrase hashes.