use strict; use warnings; use Data::Dump 'pp'; # Or Data::Dumper, but I prefer the pp output. # Note that I'm using scalars here to represent the files. # I'm using Perl's capability to use references-to-scalars # as if they were files. my $setOne = < 456->abc xxx ==> 789->abc yyy ==> 456->abc yyy ==> 456->abc zzz ==> 123->abc EOF ; my %hash; # See? Here I'm opening a file handle to read from $setOne. # Typically, you'd put "somefilename.txt" in place of \$setOne, # but this code if for demonstration purposes only. open my $fileOne, "<", \$setOne or die "Couldn't read set one: $!"; while (my $line = <$fileOne>) { chomp $line; # Goodbye, newlines characters. # It would seem like the order of values and keys is turned around # in the file for set one. That, or I misinterpreted your intention. my ($value, $key) = split(/\s+/, $line); if (ref $hash{$key}) { # If $hash{"123"} already is a reference, then we can just add the new # key to it. # Note that the name '$value' is a little misleading here, since we'll # be using it as a key... Well, such is life. $hash{$key}->{$value} = undef; } elsif (exists $hash{$key}) { # $hash{"123"} is already there, but we got another value for it. # So we need a hash ref. Let's make one. $hash{$key} = {$hash{$key} => undef, $value => undef}; } else { # $hash{"123"} isn't there yet, so here we go rather plainly: $hash{$key} = $value; } } close $fileOne; # Let's see what we've got so far. pp \%hash; # print Dumper \%hash; # Again, I prefer pp, but there are always multiple options. # Fine then. Let's read that other file. open my $fileTwo, "<", \$setTwo or die "Couldn't read set two: $!"; while (my $line = <$fileTwo>) { chomp $line; my ($newvalue, $key, $oldvalue) = $line =~ m/^(.+)\s+==>\s+(.+)->(.+)$/; if (ref $hash{$key}) { # If $hash{"123"} is a reference... if (ref $hash{$key}->{$oldvalue}) { # If $hash{"123"}->{"abc"} is also a reference # Jump right to the next iteration if we've run into a duplicate. # Since your original example didn't specify that it should take # duplicates into account. next if grep {$_ eq $newvalue} @{$hash{$key}->{$oldvalue}}; push @{$hash{$key}->{$oldvalue}}, $newvalue; } elsif (exists $hash{$key}->{$oldvalue}) { # $hash{"123"}->{"abc"} is not a reference, even though # it already exists. if (defined $hash{$key}->{$oldvalue}) { # This means that $hash{"123"}->{"abc"} already has a value, # so what we really need here is to convert it into an array ref # so that it can hold multiple values. $hash{$key}->{$oldvalue} = [$hash{$key}->{$oldvalue}, $newvalue]; } else { # $hash{"123"}->{"abc"} is undef, so we can simply replace # the undef with the new value. $hash{$key}->{$oldvalue} = $newvalue; } } } else { # $hash{"123"} is not a reference yet. Let's turn it into one. $hash{$key} = {$oldvalue => $newvalue}; } } close $fileTwo; # Final results: pp \%hash;