#!/usr/bin/perl use warnings; use strict; # fcm: fuzzy c-means implementation in Perl # usage: $fcm [number_of_clusters] [fuzzification_factor] # [max_iter] [tolerace] # returns: prototypes, partition_matrix # # # reading data # my ( @data, @tmp, $rows, $columns ); while (defined(my $line = )) { chomp ($line); @tmp = split /\s+/, $line; push @data, [ @tmp ]; } $columns = @tmp; $rows = @data; # # assigning other variables # my $number_of_clusters = shift @ARGV; my $fuzzification_factor = shift @ARGV; my $max_iter = shift @ARGV; my $tolerance = shift @ARGV; unless (defined($number_of_clusters)) { $number_of_clusters = 2; } unless (defined($fuzzification_factor)) { $fuzzification_factor = 2.0; } unless (defined($max_iter)) { $max_iter = 4000; } unless (defined($tolerance)) { $tolerance = 0.00001; } # # initializing partition matrices # my @previous_partition_matrix; my @current_partition_matrix = initialize_partition_matrix($number_of_clusters, $rows); # # fuzzy c means implementation # my ($iter, @dist, $sum_numerator, $sum_denominator, @prototypes); $iter = 0; while ( 1 ){ # computing each prototype for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $k = 0; $k < $columns; $k++) { $sum_numerator = 0; $sum_denominator = 0; for (my $j = 0; $j < $rows; $j++) { $sum_numerator += $data[$j][$k] * ($current_partition_matrix[$i][$j] ** $fuzzification_factor); $sum_denominator += $current_partition_matrix[$i][$j] ** $fuzzification_factor; } $prototypes[$i][$k] = $sum_numerator / $sum_denominator; } } # copying partition matrix for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $j = 0; $j < $rows; $j++) { $previous_partition_matrix[$i][$j] = $current_partition_matrix[$i][$j]; } } # updating the partition matrix my ($sum, @pattern_is_prototype); for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $j = 0; $j < $rows; $j++) { $dist[$i][$j] = distance( $prototypes[$i], $data[$j] ); } } for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $j = 0; $j < $rows; $j++) { $sum = 0.0; if ( $dist[$i][$j] == 0 ) { $current_partition_matrix[$i][$j] = 1.0; } else { for (my $l = 0; $l < $number_of_clusters; $l++) { $sum += ( $dist[$i][$j]/$dist[$l][$j] ) ** ( 2.0 / ($fuzzification_factor - 1.0) ) unless ($dist[$l][$j] == 0); } $current_partition_matrix[$i][$j] = 1.0 / $sum unless ($sum == 0); } } } # checking stop conditions last if ( ++$iter == $max_iter ); my ($dif, $max_dif); $max_dif = 0; CLUSTER: for (my $i = 0; $i < $number_of_clusters; $i++){ for (my $j = 0; $j < $rows; $j++){ $dif = abs( $current_partition_matrix[$i][$j] - $previous_partition_matrix[$i][$j] ); $max_dif = $dif if ($dif > $max_dif); last CLUSTER if ( $max_dif > $tolerance ); } } # print "max dif= $max_dif\n"; last if ($max_dif < $tolerance); } # # Performance Index calculation # my $performance_index; for (my $i = 0; $i < $number_of_clusters; $i++){ for (my $j = 0; $j < $rows; $j++){ my $dij = distance( $prototypes[$i], $data[$j] ); $performance_index += ($current_partition_matrix[$i][$j] ** $fuzzification_factor) * $dij; } } print "Clustering completed ...\n"; for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $k = 0; $k < $columns; $k++) { print "Prototype[$i][$k]: $prototypes[$i][$k]\n"; } } print "performance index: $performance_index\n"; print "number of iterations: $iter\n"; for (my $i = 0; $i < $number_of_clusters; $i++) { for (my $j = 0; $j < $rows; $j++){ print "U[$i][$j] = $current_partition_matrix[$i][$j]\n"; } } # ================================ # initialize_partition_matrix # partition_matrix = # initialize_partition_matrix( # num_clusters, num_patterns) # ================================ sub initialize_partition_matrix { srand; my (@partition_matrix, @column_sum); for (my $i = 0; $i < $_[0]; $i++){ for (my $j = 0; $j < $_[1]; $j++){ $partition_matrix[$i][$j] = rand; $column_sum[$j] += $partition_matrix[$i][$j]; } } for (my $i = 0; $i < $_[0]; $i++){ for (my $j = 0; $j < $_[1]; $j++){ die "column [$j] sum is equal to zero\n" unless $column_sum[$j]; $partition_matrix[$i][$j] /= $column_sum[$j]; } } return @partition_matrix; } # ==================================== # compute distance between two vectors # dist = distance( vector1, vector2 ) # ==================================== sub distance { my $vector1 = shift; my $vector2 = shift; my $sum = 0; for ( my $i = 0; $i < scalar @{$vector1}; $i++ ){ my $difference = ${ $vector1 }[$i] - ${ $vector2 }[$i]; $sum += $difference * $difference; } return sqrt( $sum ); } __DATA__ 4.0 4.0 4.0 5.0 5.0 4.0 5.5 6.0 5.0 5.0 4.5 4.5 5.0 5.5 5.5 5.0 5.0 4.5 4.5 5.0 9.5 9.0 9.0 9.5 8.0 8.0 7.0 8.0 8.0 7.0 8.5 7.0 7.0 8.5 7.0 7.0 7.5 7.0 6.5 8.0 8.0 6.5 6.5 7.0 10.0 10.0 10.0 9.0 10.0 9.0 9.5 10.0 8.0 10.0 9.5 9.5 9.0 9.0 9.0 10.0