use Carp;
use List::Util qw( shuffle );
use strict;
use warnings;

# an individual represents a distribution of points among clusters.
# that is, it is a specific allocation of points to clusters.
# in the initial population, in each individual, the points are randomly assigned to clusters.

# each individual is an array. 
# each element represents a point in the data set, and its value
# is the number of the cluster to which it has been assigned.

my @datapoints;

# The subs in Point:: need to be customized for the type/representation of a "point".

sub Point::set_metric; # "distance" or "area" or something like that. small values mean "close"
sub Point::as_string;

sub Point::ScalarNumber::set_metric
{
	my $set = shift;
	my @set = @datapoints[@$set];
	@set == 0 and return 1;
	@set == 1 and return 2;
	# RMS
	my $total = 0;
	my $n = 0;
	for my $i ( 1 .. $#set )
	{
		for my $j ( $i .. $#set )
		{
			my $dist = abs( $set[$i-1] - $set[$j] );
			$total += $dist ** 2;
			$n++;
		}
	}
	sqrt( $total / $n )
}
sub Point::ScalarNumber::as_string
{
	$_[0]
}

sub Point::NumberPair::set_metric
{
	my $set = shift;
	my @set = @datapoints[@$set];
	@set == 0 and return 1;
	@set == 1 and return 2;
	# RMS
	my $total = 0;
	my $n = 0;
	for my $i ( 1 .. $#set )
	{
		for my $j ( $i .. $#set )
		{
			my $dist2 = ( ( $set[$i-1][0] - $set[$j][0] ) ** 2 )
			          + ( ( $set[$i-1][1] - $set[$j][1] ) ** 2 );
			$total += $dist2;
			$n++;
		}
	}
	sqrt( $total / $n )
}
sub Point::NumberPair::as_string
{
	"[$_[0][0],$_[0][1]]"
}

#######################################################################

my @clusters;

sub Ind::new_randomized
{
	#@datapoints <= 0 and croak "No datapoints defined!\n";
	#@datapoints <  1 and croak "Only one cluster defined!\n";
	#@clusters   <= 0 and croak "No clusters defined!\n";
	#@clusters   <  1 and croak "Only one cluster defined!\n";
	[ map { int( rand @clusters ) } @datapoints ]
}

sub Ind::clone
{
	my $ind = shift;
	[ @$ind ]
}

# optional arg: number of points to move
sub Ind::mutate
{
	my( $ind, $n ) = @_;
	for my $i ( 0 .. ($n||1) )
	{
		my $j = int( rand @datapoints );
		$ind->[$j] = int( rand @clusters );
	}
	$ind
}

	sub Ind::_crossover_points
	{
		my $l = @datapoints;
		my $seglen = 1 + int rand( $l - 1 );
		my $start = int rand( $l - $seglen );
		( $start .. ($start+$seglen-1) )
	}

sub Ind::crossover
{
	my( $ind1, $ind2 ) = @_;
	my @xo = Ind::_crossover_points();
	for my $i ( @xo )
	{
		( $ind1->[$i], $ind2->[$i] ) =
		( $ind2->[$i], $ind1->[$i] )
	}
}

sub Ind::fitness
{
	my $ind = shift;
	my @cluster_points = map {
		my $cl = $_;
		[ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ]
	} 0 .. $#clusters;

	my $total_metric = 0;
	for my $ci ( 0 .. $#cluster_points )
	{
		my $val = Point::set_metric( $cluster_points[$ci] );
		$total_metric += $val;
	}

	1000/$total_metric # convert it to "large = good"
}

sub Ind::display
{
	my $ind = shift;
	my @cluster_points = map {
		my $cl = $_;
		[ grep { $ind->[$_] eq $cl } 0 .. $#{$ind} ]
	} 0 .. $#clusters;

	my $total_metric = 0;
	for my $ci ( 0 .. $#cluster_points )
	{
		my $val = Point::set_metric( $cluster_points[$ci] );
		$total_metric += $val;
		printf "$ci: Cluster $clusters[$ci]: %5.2f ( ", $val;
		print join ' ', map { Point::as_string($_) }
			@datapoints[@{$cluster_points[$ci]}];
		print " )\n";
	}
	printf "Total metric: %.2f\n", $total_metric;

	$ind
}

#######################################################################

if(0)
{
@datapoints = shuffle( 11..14, 21..24, 31..34, 41..44 );
*Point::set_metric = \&Point::ScalarNumber::set_metric;
*Point::as_string = \&Point::ScalarNumber::as_string;
}
else
{
@datapoints = shuffle(
	[ 1, 2], [ 2, 1], [ 2, 3], [ 3, 2],
	[ 1,12], [ 2,11], [ 2,13], [ 3,12],
	[11, 2], [12, 1], [12, 3], [13, 2],
	[11,12], [12,11], [12,13], [13,12],
);
*Point::set_metric = \&Point::NumberPair::set_metric;
*Point::as_string = \&Point::NumberPair::as_string;
}

@clusters = ( 1 .. 4 );

my @pop =
	sort { $b->[0] <=> $a->[0] }
	map { [ Ind::fitness($_), $_ ] }
	map { Ind::new_randomized }
	1 .. 100;

#print "Before:"; printf " %.1f", $_->[0] for @pop; print "\n";

	# this clones an element of @pop
	sub clone { [ $_[0]->[0], Ind::clone($_[0]->[1]) ] }

for my $iter ( 1 .. 200 )
{
	# kill the bottom 30:
	splice @pop, @pop-30, 30;

	# make 10 new ones:
	push @pop,
	map { [ Ind::fitness($_), $_ ] }
	map { Ind::new_randomized }
	1 .. 10;

	# clone the top 20:
	push @pop, map clone($_), @pop[0 .. 19];

	# mutate the top 20:
	for my $e ( @pop[0 .. 19] )
	{
		my $n = 1;
		unless ( int(rand 2) )
		{
			$n++;
			unless ( int(rand 3) )
			{
				$n++;
				unless ( int(rand 4) )
				{
					$n++;
				}
			}
		}
		#warn "mut $n\n";
		Ind::mutate( $e->[1], $n );
		$e->[0] = Ind::fitness( $e->[1] );
	}

	# sort by fitness again:
	@pop = sort { $b->[0] <=> $a->[0] } @pop;
	# print "Iter $iter: $pop[0][0]\n";
}

# print "\nAfter:"; printf " %.1f", $_->[0] for @pop; print "\n";

Ind::display( $pop[0][1] );