#!/usr/bin/perl use warnings; use strict; use PDL; use PDL::NiceSlice; # test_fcm: fuzzy c-means implementation in Perl # usage: test_fcm [number_of_clusters] [fuzzification_factor] # [max_iter] [tolerace] # returns: prototypes, partition_matrix # # ================================ # initialize_partition_matrix # partition_matrix = # initialize_partition_matrix( # num_clusters, num_patterns) # ================================ sub initialize_partition_matrix { my $partition_matrix = random($_[1],$_[0]); my $idx = which( $partition_matrix == 0 ); $partition_matrix( $idx ) .= 1e-10; $partition_matrix /= sumover($partition_matrix->xchg(1, 0)); return $partition_matrix; } # ================================ # fcm # ( $performance_index, $prototypes, $current_partition_matrix) = # fcm( $patterns, $partition_matrix, $fuzzification_factor, # $tolerance, $max_iter ) # ================================ sub fcm { # # fuzzy c means implementation # my ( $patterns, $current_partition_matrix , $fuzzification_factor, $tolerance , $max_iter ) = @_; my ( $number_of_patterns, $number_of_clusters ) = $current_partition_matrix->dims(); my ( $prototypes, $performance_index ); my $iter = 0; while (1) { # computing each prototype my $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor; my $temp_prototypes = ($temporal_partition_matrix x $patterns)->xchg(1,0) / sumover($temporal_partition_matrix); $prototypes = $temp_prototypes->xchg(1,0); # copying partition matrix my $previous_partition_matrix = $current_partition_matrix->copy; # updating the partition matrix my $dist = zeroes($number_of_patterns , $number_of_clusters); for my $j (0..$number_of_clusters - 1){ my $diff = $patterns - $prototypes(:,$j)->dummy(1 , $number_of_patterns); $dist(:,$j) .= (sumover( $diff ** 2 )) ** 0.5; } my $temp_variable = $dist ** ( -2 / ( $fuzzification_factor - 1) ); $current_partition_matrix = $temp_variable / sumover($temp_variable->xchg(1,0)); # # Performance Index calculation # $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor; $performance_index = sum($temporal_partition_matrix * ( $dist ** 2 )); # checking stop conditions my $diff_partition_matrix = $current_partition_matrix - $previous_partition_matrix; $iter++; if ( ($diff_partition_matrix->max < $tolerance) || ($iter > $max_iter) ) { last; } print "iter = $iter\n"; } return ( $performance_index, $prototypes , $current_partition_matrix ); } # ================================ # read_data # (@symbols, $data) = read_data( $file_name ) # ================================ sub read_data { open FILE, '<', $_[0] || die "Couldn't open file: $!"; my @symbols; my @numerical_data; while ( ) { my @tmp; chomp; my @fields = split; push( @symbols, shift(@fields) ); $_ = shift( @fields ); # Last Trade Date push( @tmp, shift( @fields ) ); # Last Trade (Price Only) $_ = shift( @fields ); # Change in Percent if (/(\-?\d+\.?\d*)%/) { push( @tmp, $1 ); } else { push( @tmp, 0 ); } push( @tmp, shift( @fields ) ); # Book Value push( @tmp, shift( @fields ) ); # EPS Est. Current Yr push( @tmp, shift( @fields ) ); # EPS Est. Next Year push( @tmp, shift( @fields ) ); # Average Daily Volume push( @tmp, shift( @fields ) ); # Day's min $_ = shift( @fields ); # - push( @tmp, shift( @fields ) ); # Day's max push( @tmp, shift( @fields ) ); # 52 weeks min $_ = shift( @fields ); # - push( @tmp, shift( @fields ) ); # 52 weeks max push( @tmp, shift( @fields ) ); # 50-day Moving Avg push( @tmp, shift( @fields ) ); # 200-day Moving Avg $_ = shift( @fields ); # Market Capitalization if (/(\d+\.?\d*)M/) { push( @tmp, $1 * 1000000 ); } elsif (/(\d+\.?\d*)B/) { push( @tmp, $1 * 1000000000 ); } else { push( @tmp, 0 ); } $_ = shift( @fields ); # Pct Chg From 50-day Moving Avg if (/(\-?\d+\.?\d*)%/) { push( @tmp, $1 ); } else { push( @tmp, 0 ); } $_ = shift( @fields ); # Pct Chg From 200-day Moving Avg if (/(\-?\d+\.?\d*)%/) { push( @tmp, $1 ); } else { push( @tmp, 0 ); } $_ = shift( @fields ); # Pct Chg From 52-wk Low if (/(\-?\d+\.?\d*)%/) { push( @tmp, $1 ); } else { push( @tmp, 0 ); } $_ = shift( @fields ); # Pct Chg From 52-wk High if (/(\-?\d+\.?\d*)%/) { push( @tmp, $1 ); } else { push( @tmp, 0 ); } push @numerical_data, [ @tmp ]; } close( FILE ); my $data = pdl ( @numerical_data ); return ($data, @symbols); } # ================================ # normalize # ( $output_data, $mean_of_input, $stdev_of_input) = # normalize( $input_data ) # # processess $input_data so that $output_data # has 0 mean and 1 stdev # # $output_data = ( $input_data - $mean_of_input ) / $stdev_of_input # ================================ sub normalize { my ( $input_data ) = @_; my ( $mean, $stdev, $median, $min, $max, $adev ) = $input_data->xchg(0,1)->statsover(); my $idx = which( $stdev == 0 ); $stdev( $idx ) .= 1e-10; my ( $number_of_dimensions, $number_of_patterns ) = $input_data->dims(); my $output_data = ( $input_data - $mean->dummy(1, $number_of_patterns) ) / $stdev->dummy(1, $number_of_patterns); return ( $output_data, $mean, $stdev ); } # # reading data # my $quotes_filename = shift @ARGV; unless ( defined( $quotes_filename ) ) { die "Error you must provide a quotes filename!\n"; } my ($data, @symbols) = read_data( $quotes_filename ); my $number_of_patterns = $data->getdim(1); my ( $patterns, $mean_of_input, $stdev_of_input) = normalize( $data ); # # assigning other variables # my $number_of_clusters = shift @ARGV; my $fuzzification_factor = shift @ARGV; my $max_iter = shift @ARGV; my $tolerance = shift @ARGV; unless (defined($number_of_clusters)) { $number_of_clusters ||= 3; } unless (defined($fuzzification_factor)) { $fuzzification_factor ||= 2.0; } unless (defined($max_iter)) { $max_iter ||= 2000; } unless (defined($tolerance)) { $tolerance ||= 0.00001; } $number_of_clusters = abs($number_of_clusters); $fuzzification_factor = abs($fuzzification_factor); $max_iter = abs($max_iter); $tolerance = abs($tolerance); # # initializing partition matrices # my $previous_partition_matrix; my $current_partition_matrix = initialize_partition_matrix($number_of_clusters, $number_of_patterns); # # output variables # my ( $prototypes, $performance_index, $partition_matrix ); ( $performance_index, $prototypes, $partition_matrix) = fcm( $patterns, $current_partition_matrix, $fuzzification_factor, $tolerance, $max_iter ); print "=======================================\n"; print "clustering completed\n"; print "performance index = $performance_index\n"; print "prototypes = \n"; print $prototypes; print "partition matrix = \n"; print transpose( $partition_matrix ); my $new_prototypes = ( $prototypes * $stdev_of_input->dummy(1, $prototypes->getdim(1)) ) + $mean_of_input->dummy(1, $prototypes->getdim(1)); print "new prototypes = \n"; print $new_prototypes; #print $data; use PDL::Graphics::PGPLOT; use POSIX( 'floor', 'ceil'); my $opt = {Device => '/xs', XTitle => "50-day Moving Avg", YTitle => "200-day Moving Avg"}; my $win = PDL::Graphics::PGPLOT::Window->new($opt); $win->points($data(13, :), $data(14, :), {SYMBOL=>4, COLOR=>'BLACK'} ); $win->hold(); my (@x_list, @y_list, $x, $y, $text); @x_list = list($new_prototypes(13, 0)); @y_list = list($new_prototypes(14, 0)); $x = floor( $x_list[0] * 10) / 10; $y = floor( $y_list[0] * 10) / 10; $win->points($new_prototypes(13, 0), $new_prototypes(14, 0), {SYMBOL=>3, COLOR=>'RED', CHARSIZE=>3}); $win->points($new_prototypes(13, 0), $new_prototypes(14, 0), {SYMBOL=>4, COLOR=>'RED', CHARSIZE=>25}); $text = "( $x %, $y % )"; $win->text($text, int( $x ), int( $y ) + 2, {CHARSIZE=>1, COLOR=>'RED', Justification => 0.5} ); @x_list = list($new_prototypes(13, 1)); @y_list = list($new_prototypes(14, 1)); $x = floor( $x_list[0] * 10) / 10; $y = floor( $y_list[0] * 10) / 10; $win->points($new_prototypes(13, 1), $new_prototypes(14, 1), {SYMBOL=>3, COLOR=>'GREEN', CHARSIZE=>3}); $win->points($new_prototypes(13, 1), $new_prototypes(14, 1), {SYMBOL=>4, COLOR=>'GREEN', CHARSIZE=>25}); $text = "( $x %, $y % )"; $win->text($text, int( $x ), int( $y ) + 2, {CHARSIZE=>1, COLOR=>'GREEN', Justification => 0.5} ); @x_list = list($new_prototypes(13, 2)); @y_list = list($new_prototypes(14, 2)); $x = floor( $x_list[0] * 10) / 10; $y = floor( $y_list[0] * 10) / 10; $win->points($new_prototypes(13, 2), $new_prototypes(14, 2), {SYMBOL=>3, COLOR=>'BLUE', CHARSIZE=>3}); $win->points($new_prototypes(13, 2), $new_prototypes(14, 2), {SYMBOL=>4, COLOR=>'BLUE', CHARSIZE=>25}); $text = "( $x %, $y % )"; $win->text($text, int( $x ), int( $y ) + 2, {CHARSIZE=>1, COLOR=>'BLUE', Justification => 0.5} );