use Carp; use List::Util qw( shuffle ); use strict; use warnings; # an individual represents a distribution of points among clusters. # that is, it is a specific allocation of points to clusters. # in the initial population, in each individual, the points are randomly assigned to clusters. # each individual is an array. # each element represents a point in the data set, and its value # is the number of the cluster to which it has been assigned. my @datapoints; # The subs in Point:: need to be customized for the type/representation of a "point". sub Point::set_metric; # "distance" or "area" or something like that. small values mean "close" sub Point::as_string; sub Point::ScalarNumber::set_metric { my $set = shift; my @set = @datapoints[@$set]; @set == 0 and return 1; @set == 1 and return 2; # RMS my $total = 0; my $n = 0; for my $i ( 1 .. $#set ) { for my $j ( $i .. $#set ) { my $dist = abs( $set[$i-1] - $set[$j] ); $total += $dist ** 2; $n++; } } sqrt( $total / $n ) } sub Point::ScalarNumber::as_string { $_[0] } sub Point::NumberPair::set_metric { my $set = shift; my @set = @datapoints[@$set]; @set == 0 and return 1; @set == 1 and return 2; # RMS my $total = 0; my $n = 0; for my $i ( 1 .. $#set ) { for my $j ( $i .. $#set ) { my $dist2 = ( ( $set[$i-1][0] - $set[$j][0] ) ** 2 ) + ( ( $set[$i-1][1] - $set[$j][1] ) ** 2 ); $total += $dist2; $n++; } } sqrt( $total / $n ) } sub Point::NumberPair::as_string { "[$_[0][0],$_[0][1]]" } ####################################################################### my @clusters; sub Ind::new_randomized { #@datapoints <= 0 and croak "No datapoints defined!\n"; #@datapoints < 1 and croak "Only one cluster defined!\n"; #@clusters <= 0 and croak "No clusters defined!\n"; #@clusters < 1 and croak "Only one cluster defined!\n"; [ map { int( rand @clusters ) } @datapoints ] } sub Ind::clone { my $ind = shift; [ @$ind ] } # optional arg: number of points to move sub Ind::mutate { my( $ind, $n ) = @_; for my $i ( 0 .. ($n||1) ) { my $j = int( rand @datapoints ); $ind->[$j] = int( rand @clusters ); } $ind } sub Ind::_crossover_points { my $l = @datapoints; my $seglen = 1 + int rand( $l - 1 ); my $start = int rand( $l - $seglen ); ( $start .. ($start+$seglen-1) ) } sub Ind::crossover { my( $ind1, $ind2 ) = @_; my @xo = Ind::_crossover_points(); for my $i ( @xo ) { ( $ind1->[$i], $ind2->[$i] ) = ( $ind2->[$i], $ind1->[$i] ) } } sub Ind::fitness { my $ind = shift; my @cluster_points = map { my $cl = $_; [ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ] } 0 .. $#clusters; my $total_metric = 0; for my $ci ( 0 .. $#cluster_points ) { my $val = Point::set_metric( $cluster_points[$ci] ); $total_metric += $val; } 1000/$total_metric # convert it to "large = good" } sub Ind::display { my $ind = shift; my @cluster_points = map { my $cl = $_; [ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ] } 0 .. $#clusters; my $total_metric = 0; for my $ci ( 0 .. $#cluster_points ) { my $val = Point::set_metric( $cluster_points[$ci] ); $total_metric += $val; printf "$ci: Cluster $clusters[$ci]: %5.2f ( ", $val; print join ' ', map { Point::as_string($_) } @datapoints[@{$cluster_points[$ci]}]; print " )\n"; } printf "Total metric: %.2f\n", $total_metric; $ind } ####################################################################### if(0) { @datapoints = shuffle( 11..14, 21..24, 31..34, 41..44 ); *Point::set_metric = \&Point::ScalarNumber::set_metric; *Point::as_string = \&Point::ScalarNumber::as_string; } else { @datapoints = shuffle( [ 1, 2], [ 2, 1], [ 2, 3], [ 3, 2], [ 1,12], [ 2,11], [ 2,13], [ 3,12], [11, 2], [12, 1], [12, 3], [13, 2], [11,12], [12,11], [12,13], [13,12], ); *Point::set_metric = \&Point::NumberPair::set_metric; *Point::as_string = \&Point::NumberPair::as_string; } @clusters = ( 1 .. 4 ); my @pop = sort { $b->[0] <=> $a->[0] } map { [ Ind::fitness($_), $_ ] } map { Ind::new_randomized } 1 .. 100; #print "Before:"; printf " %.1f", $_->[0] for @pop; print "\n"; # this clones an element of @pop sub clone { [ $_[0]->[0], Ind::clone($_[0]->[1]) ] } for my $iter ( 1 .. 200 ) { # kill the bottom 30: splice @pop, @pop-30, 30; # make 10 new ones: push @pop, map { [ Ind::fitness($_), $_ ] } map { Ind::new_randomized } 1 .. 10; # clone the top 20: push @pop, map clone($_), @pop[0 .. 19]; # mutate the top 20: for my $e ( @pop[0 .. 19] ) { my $n = 1; unless ( int(rand 2) ) { $n++; unless ( int(rand 3) ) { $n++; unless ( int(rand 4) ) { $n++; } } } #warn "mut $n\n"; Ind::mutate( $e->[1], $n ); $e->[0] = Ind::fitness( $e->[1] ); } # sort by fitness again: @pop = sort { $b->[0] <=> $a->[0] } @pop; # print "Iter $iter: $pop[0][0]\n"; } # print "\nAfter:"; printf " %.1f", $_->[0] for @pop; print "\n"; Ind::display( $pop[0][1] );