#!/usr/bin/perl use strict; # Berkeley DBMs are my fav. use DB_File; my %Index; # remove the old index and start fresh unlink("/home/maverick/tmp/index_dbm"); tie (%Index,'DB_File',"/home/maverick/tmp/index_dbm",O_RDWR|O_CREAT,0640,$DB_BTREE) || die "Tie Failed: $!"; foreach my $file (glob("/home/maverick/tmp/*.txt")) { open(F,$file) || die "Can't open $file: $!"; # slirp up the file and make a list of words my @words = map { split(/\W+/,$_) } ; # add this file to the list of matches for this word my %uniq; foreach (@words) { if (!defined($uniq{$_})) { # we've not seen this word before, so we add it. # I'm also assuming that ~ is safe to use as a seperator. if (!defined($Index{$_})) { # it's the first additon of this word, so I don't need to prepend a '~' $Index{$_} = $file; } else { $Index{$_} .= "~$file"; } $uniq{$_} = 1; } } close(F); } untie %Index;