This subroutine accepts a list of numbers, and returns a list of hashrefs that look like: { $number => [ $idx1, $idx2, $idx_n, ... ] }, where each index represents where that particular number will be found in the original list.
use strict;
use warnings;
use Data::Dump 'dump';
my @numbers = ( 1, 2, 2, 4, 8, 42, 7, 2, 6, 7, 9, 42 );
my @duplicate_locations = find_duplicates( @numbers );
dump @duplicate_locations;
sub find_duplicates {
my @list = @_;
my $idx;
my %buckets;
foreach my $item ( @list ) {
push @{$buckets{$item}}, $idx++;
}
my @rv;
foreach my $key ( keys %buckets ) {
push @rv, { $list[$buckets{$key}->[0]] => $buckets{$key} }
if @{$buckets{$key}} > 1;
}
return @rv;
}
The output...
({ 42 => [5, 11] }, { 7 => [6, 9] }, { 2 => [1, 2, 7] })
Update:Simplifying the data structure returned simplifies the subroutine that produces it:
use strict;
use warnings;
use Data::Dump 'dump';
my @numbers = ( 1, 2, 2, 4, 8, 42, 7, 2, 6, 7, 9, 42 );
dump { find_duplicates( @numbers ) };
sub find_duplicates {
my @list = @_;
my $idx;
my %buckets;
foreach my $item ( @list ) {
push @{$buckets{$item}}, $idx++;
}
delete @buckets{ grep { @{$buckets{$_}} < 2 } keys %buckets };
return %buckets;
}
Now the output is...
{ 2 => [1, 2, 7], 7 => [6, 9], 42 => [5, 11] }
...so the actual return value from the sub is a hash where the keys are the values from your original list, and the values are array refs containing lists of where the corresponding elements are found. ...and we've stripped away any that weren't duplicated.
|