The parameters are not well isolated in this code. They are:

`
use Carp;
use List::Util qw( shuffle );
use strict;
use warnings;
# an individual represents a distribution of points among clusters.
# that is, it is a specific allocation of points to clusters.
# in the initial population, in each individual, the points are random
+ly assigned to clusters.
# each individual is an array.
# each element represents a point in the data set, and its value
# is the number of the cluster to which it has been assigned.
my @datapoints;
# The subs in Point:: need to be customized for the type/representatio
+n of a "point".
sub Point::set_metric; # "distance" or "area" or something like that.
+small values mean "close"
sub Point::as_string;
sub Point::ScalarNumber::set_metric
{
my $set = shift;
my @set = @datapoints[@$set];
@set == 0 and return 1;
@set == 1 and return 2;
# RMS
my $total = 0;
my $n = 0;
for my $i ( 1 .. $#set )
{
for my $j ( $i .. $#set )
{
my $dist = abs( $set[$i-1] - $set[$j] );
$total += $dist ** 2;
$n++;
}
}
sqrt( $total / $n )
}
sub Point::ScalarNumber::as_string
{
$_[0]
}
sub Point::NumberPair::set_metric
{
my $set = shift;
my @set = @datapoints[@$set];
@set == 0 and return 1;
@set == 1 and return 2;
# RMS
my $total = 0;
my $n = 0;
for my $i ( 1 .. $#set )
{
for my $j ( $i .. $#set )
{
my $dist2 = ( ( $set[$i-1][0] - $set[$j][0] ) ** 2 )
+ ( ( $set[$i-1][1] - $set[$j][1] ) ** 2 );
$total += $dist2;
$n++;
}
}
sqrt( $total / $n )
}
sub Point::NumberPair::as_string
{
"[$_[0][0],$_[0][1]]"
}
######################################################################
+#
my @clusters;
sub Ind::new_randomized
{
#@datapoints <= 0 and croak "No datapoints defined!\n";
#@datapoints < 1 and croak "Only one cluster defined!\n";
#@clusters <= 0 and croak "No clusters defined!\n";
#@clusters < 1 and croak "Only one cluster defined!\n";
[ map { int( rand @clusters ) } @datapoints ]
}
sub Ind::clone
{
my $ind = shift;
[ @$ind ]
}
# optional arg: number of points to move
sub Ind::mutate
{
my( $ind, $n ) = @_;
for my $i ( 0 .. ($n||1) )
{
my $j = int( rand @datapoints );
$ind->[$j] = int( rand @clusters );
}
$ind
}
sub Ind::_crossover_points
{
my $l = @datapoints;
my $seglen = 1 + int rand( $l - 1 );
my $start = int rand( $l - $seglen );
( $start .. ($start+$seglen-1) )
}
sub Ind::crossover
{
my( $ind1, $ind2 ) = @_;
my @xo = Ind::_crossover_points();
for my $i ( @xo )
{
( $ind1->[$i], $ind2->[$i] ) =
( $ind2->[$i], $ind1->[$i] )
}
}
sub Ind::fitness
{
my $ind = shift;
my @cluster_points = map {
my $cl = $_;
[ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ]
} 0 .. $#clusters;
my $total_metric = 0;
for my $ci ( 0 .. $#cluster_points )
{
my $val = Point::set_metric( $cluster_points[$ci] );
$total_metric += $val;
}
1000/$total_metric # convert it to "large = good"
}
sub Ind::display
{
my $ind = shift;
my @cluster_points = map {
my $cl = $_;
[ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ]
} 0 .. $#clusters;
my $total_metric = 0;
for my $ci ( 0 .. $#cluster_points )
{
my $val = Point::set_metric( $cluster_points[$ci] );
$total_metric += $val;
printf "$ci: Cluster $clusters[$ci]: %5.2f ( ", $val;
print join ' ', map { Point::as_string($_) }
@datapoints[@{$cluster_points[$ci]}];
print " )\n";
}
printf "Total metric: %.2f\n", $total_metric;
$ind
}
######################################################################
+#
if(0)
{
@datapoints = shuffle( 11..14, 21..24, 31..34, 41..44 );
*Point::set_metric = \&Point::ScalarNumber::set_metric;
*Point::as_string = \&Point::ScalarNumber::as_string;
}
else
{
@datapoints = shuffle(
[ 1, 2], [ 2, 1], [ 2, 3], [ 3, 2],
[ 1,12], [ 2,11], [ 2,13], [ 3,12],
[11, 2], [12, 1], [12, 3], [13, 2],
[11,12], [12,11], [12,13], [13,12],
);
*Point::set_metric = \&Point::NumberPair::set_metric;
*Point::as_string = \&Point::NumberPair::as_string;
}
@clusters = ( 1 .. 4 );
my @pop =
sort { $b->[0] <=> $a->[0] }
map { [ Ind::fitness($_), $_ ] }
map { Ind::new_randomized }
1 .. 100;
#print "Before:"; printf " %.1f", $_->[0] for @pop; print "\n";
# this clones an element of @pop
sub clone { [ $_[0]->[0], Ind::clone($_[0]->[1]) ] }
for my $iter ( 1 .. 200 )
{
# kill the bottom 30:
splice @pop, @pop-30, 30;
# make 10 new ones:
push @pop,
map { [ Ind::fitness($_), $_ ] }
map { Ind::new_randomized }
1 .. 10;
# clone the top 20:
push @pop, map clone($_), @pop[0 .. 19];
# mutate the top 20:
for my $e ( @pop[0 .. 19] )
{
my $n = 1;
unless ( int(rand 2) )
{
$n++;
unless ( int(rand 3) )
{
$n++;
unless ( int(rand 4) )
{
$n++;
}
}
}
#warn "mut $n\n";
Ind::mutate( $e->[1], $n );
$e->[0] = Ind::fitness( $e->[1] );
}
# sort by fitness again:
@pop = sort { $b->[0] <=> $a->[0] } @pop;
# print "Iter $iter: $pop[0][0]\n";
}
# print "\nAfter:"; printf " %.1f", $_->[0] for @pop; print "\n";
Ind::display( $pop[0][1] );
`

Note that, as it stands, it's not doing any crossover, only mutation, so
probably it isn't a GA, technically.

I'm sure improvements could be made in this area.

Comment onRe: Making sense of data: Clustering OR A coding challengeDownloadCode