http://www.perlmonks.org?node_id=81678
Category: Miscelleanous
Author/Contact Info Michael K. Neylon mneylon-pm@masemware.com
Description: Implements a framework for a genetic programming algorithm with arbitary data structures. This is version 0.01, so it currently needs serious commentary and development, but it does do the GA for some test cases (with more to be tested).

Right now, I'm missing things that would make setting up some routines easier, but I wanted to make sure that the basics were down and with a good interface before adding extra features.

#!/usr/bin/perl -w

######################
#
# Algorithm::Genetic
# v0.03
# Michael K. Neylon
# mneylon-pm@masemware.com
# May 22, 2001
#
# Perl module for setting up a genetic algorithm framework.  See
# the POD for more details.
#
# Suggestions/Comments/Ideas are highly desired and can be 
# sent to the eamil address above.
#
# Change History:
#
# v0.03 - May 22, 2001
#         Changed order of populating system: reap, breed, then
#             mutate.
#         Breeded and mutated organisms are automatically deleted;
#             further versions will have LIFETIME parameter to allo
#             organizes to live for more than one generation
#         Added get_generation_number()
#         Added MUTATE_FRACTION, BREED_FRACTION, and REAP_FRACTION
#             settings
#
# v0.02 - May 19, 2001
#         MUTATOR does not need to be defined
#         Added BREEDER (coderef), BREED_CRITERIA (coderef), and
#            BREED_AMOUNT (value)
#         Fixed problem with mutator operation
#         Added AGE parameter for mutator/breeder/reaper
#
######################

package Algorithm::Genetic;

use strict;
use UNIVERSAL qw(isa);

BEGIN {
    use Exporter ();
    use vars qw( @EXPORT_OK);
}
use vars @EXPORT_OK;

$Genetic::VERSION = '0.03';

sub new {
    my ( $class, $props ) = @_;
    my $self = {} ;

    # An array of hash references; the "data" member of each hash is 
    # the user supplied data, while "fitness" is the calculated fitnes
+s
    # of that data member

    $self->{ population } = [];

    # The user-supplied fitness function.  Passed by the $props hash
    # setting FITNESS, or we die.

    die "FITNESS must be defined and as a coderef"
 unless ( defined $props->{ 'FITNESS' } 
   && isa( $props->{ 'FITNESS' } , 'CODE' ) );
    
    $self->{ fitness } = $props->{ 'FITNESS' };

    # The user-supplied function to determine which organisms are 
    # candidates for the reaper.  The sub should return non-false for
    # organisms that should be reaped.
    
    die "REAP_CRITERIA must be a coderef"
 unless ( !defined $props->{ 'REAP_CRITERIA' } ||
   isa( $props->{ 'REAP_CRITERIA' }, 'CODE' ) );
    $self->{ reap_criteria } = $props->{ 'REAP_CRITERIA' };

    # The user-supplied value for the maximum number
    # of organisms to be reaped out of those that are able to 
    # be repead # (as per REAP_CRITERIA)

    $self->{ reap_amount } = $props->{ 'REAP_AMOUNT' } || 0;

    # The user-supplied value for the fraction (0 <= x <= 1) of
    # organisms that are reaped from the list of those that can
    # be reaped.
    
    $self->{ reap_fraction } = $props->{ 'REAP_FRACTION' } || 1;

    # The user-supplied mutator function.  Passed by $props hash
    # MUTATOR.

    die "MUTATOR must be defined as a coderef"
 unless ( !defined $props->{ 'MUTATOR' } ||  
   isa( $props->{ 'MUTATOR' } , 'CODE' ) );
    $self->{ mutator } = $props->{ 'MUTATOR' };

    # The user-supplied function to determine which organisms are
    # candidates for mutation.  The sub should return non-false for
    # organisms that should be mutated.

    die "MUTATE_CRITERIA must be a coderef"
 unless ( !defined $props->{ 'MUTATE_CRITERIA' } ||
   isa( $props->{ 'MUTATE_CRITERIA' }, 'CODE' ) );
    $self->{ mutate_criteria } = $props->{ 'MUTATE_CRITERIA' };

    # These two as per the same for Reaping

    $self->{ mutate_amount } = $props->{ 'MUTATE_AMOUNT' } || 0;
    $self->{ mutate_fraction } = $props->{ 'MUTATE_FRACTION' } || 1;

    # The user-supplied breeder function.  Passed by $props hash
    # BREEDER.

    die "BREEDER must be defined as a coderef"
 unless ( !defined $props->{ 'BREEDER' } ||  
   isa( $props->{ 'BREEDER' } , 'CODE' ) );
    $self->{ breeder } = $props->{ 'BREEDER' };

    # The user-supplied function to determine which organisms are
    # candidates for breeding.  The sub should return non-false for
    # organisms that should be breeded.

    die "BREED_CRITERIA must be a coderef"
 unless ( !defined $props->{ 'BREED_CRITERIA' } ||
   isa( $props->{ 'BREED_CRITERIA' }, 'CODE' ) );
    $self->{ breed_criteria } = $props->{ 'BREED_CRITERIA' };

    # As per the same as Reaping.  In the case of breed_fraction,
    # we assume that the breeding will produce 2 organisms.

    $self->{ breed_amount } = $props->{ 'BREED_AMOUNT' } || 0;
    $self->{ breed_fraction } = $props->{ 'BREED_FRACTION ' } || 1;

    # A counter
    $self->{ counter } = 0;

    bless $self, $class;
    return $self;
}

# Inits the population

sub init_population
{
    # For each item we take in, create a hash, calculate the fitness,
    # and push the new item on to the list

    my $self = shift;
    $self->{ counter } = 0;
    foreach ( @_ ) {
 $self->_add_organism( $_ );
    }    
}

# Does one iteration

sub process_generation
{
    # Real simple for right now, remove the worst one, and mutate
    # the best one

    my $self = shift;

    my @reaped = $self->{ reap_criteria } ? 
 $self->_reap_population() : ();
    my @breeded = $self->{ breed_criteria } ?
 $self->_breed_population() : ();

    # With all transformations done, add the new organisms to the syst
+em
    
    $self->{ population } = ();

    foreach ( @breeded ) {
 $self->_add_organism ( $_ );
    }

    my @mutants = $self->{ mutate_criteria } ? 
 $self->_mutate_population() : ();

    foreach( @mutants ) {
 $self->_add_organism( $_ );
    }
    
    $self->{ counter }++;
}

sub get_population
{
    # Returns the best answers
    
    my $self = shift;
    return map { $_->{ data } } 
           sort { $b->{ fitness } <=> $a->{ fitness } }
           @{ $self->{ population } };
}

sub get_generation_number
{
    my $self = shift;
    return $self->{ counter };
}

sub _add_organism
{
    # Given the user data for an organism, add it to the population
    my $self = shift;
    my $user_data = shift;

    my %hash = ( data => $user_data,
                 birth => $self->{ counter },
   fitness => &{ $self->{ fitness } }( { DATA=>$user_data } ) );
    push @{ $self->{ population } }, \%hash;
}

sub _get_matched_organisms
{
    # Given the user sub to determine which nodes match for reaping,
    # mutating or breeding, return an array of hashes, which includes 
+the
    # node numbers and probability information

    my $self = shift;
    my $code = shift;

    # Our results
    my @matched;
    
    # We'll be passing a hash to each one of these; save time by precr
+eating
    # it and only changing what is needed.
    
    my %user_hash;

    foreach my $i ( 0..@{ $self->{ population } } - 1 ) {
 $user_hash{ DATA } = $self->{ population }->[ $i ]->{ data };
 $user_hash{ FITNESS } = $self->{ population }->[ $i ]->{ fitness };
        $user_hash{ AGE } = $self->{ counter } - 
     $self->{ population }->[ $i ]->{ birth };
 my $probability = &{ $code }( \%user_hash );
 
 # only add to those cases where the probability is greater than
 # zero
 push @matched, { index=>$i, probability=>$probability } 
     if ( $probability > 0 );
    }
    return @matched;
}


sub _reap_population
{
    # Given a population, perform a reap

    my $self = shift;
    
    # Get the organism index of those organisms that can be reaped.
    my @reapable = $self->_get_matched_organisms( $self->{ reap_criter
+ia } );

    # Where reaped organisms go to die
    my @reaped;

    if ( @reapable ) {
 # How many shall we do? 
 my $organisms_to_reap = ( $self->{ reap_amount } < 1 ) 
     ? int( @reapable * $self->{ reap_fraction } )
     : $self->{ reap_amount };

 # Storage for those organisms actually reaped.
 my @reaped_organisms;
 
 # List of probabilities...
 my @probabilities = map { $_->{ probability } } @reapable;

 # While we still have organisms to reap, select a random 
 # organism from the probability list above (modified
 # appropriately), and target it for reaping by removing it 
 # from the list
 
 while ( @reapable && ( $organisms_to_reap > 0 ) ) {
     my $index = _probability_selection( @probabilities );
     splice ( @probabilities, $index, 1 );
     push @reaped_organisms, splice( @reapable, $index , 1 );
     $organisms_to_reap--;
 }
 # In order to remove the right organisms, we need to remove them
 # in reverse order so that we don't wreck the lower indexes and
 # pull the wrong ones
 @reaped_organisms = sort { $b->{ index } <=> $a->{ index } } 
                     @reaped_organisms;
 foreach ( @reaped_organisms ) {
     push @reaped, 
         splice( @{ $self->{ population } }, $_->{ index }, 1 );
 }
    }
    return @reaped;
}

sub _mutate_population
{
    # Given a population, perform a mutation on it

    my $self = shift;
    
    # Get the node index of those organisms that can be mutated.
    my @mutable = $self->_get_matched_organisms( $self->{ mutate_crite
+ria } );

    # Where mutated organisms live until needed
    my @mutations;

    if ( @mutable ) {
 my $organisms_to_mutate = ( $self->{ mutate_amount } < 1 ) 
            ? int( @mutable * $self->{ mutate_fraction } )
     : $self->{ mutate_amount };

 # Storage for those organisms actually mutated.
 my @mutated_organisms;
 
 # List of probabilities...
 my @probabilities = map { $_->{ probability } } @mutable;

 # While we still have organisms to reap, select a random node from
 # the probability list above (modified appropriately), and 
 # target it for reaping by removing it from the list
 
 while ( @mutable && ( $organisms_to_mutate > 0 ) ) {
     my $index = _probability_selection( @probabilities );
     splice ( @probabilities, $index, 1 );
     push @mutated_organisms, splice( @mutable, $index, 1 );
     $organisms_to_mutate--;
 }

 # Do the mutations...

 foreach ( @mutated_organisms ) {
     my $clone = &{ $self->{ mutator } } 
  ( { DATA =>
       $self->{ population }->[ $_->{ index } ]->{ data } } );
     push @mutations, $clone;
 }

 # Then remove the old items from the population...
 @mutated_organisms = sort { $b->{ index } <=> $a->{ index } } 
                     @mutated_organisms;
 foreach ( @mutated_organisms ) {
     splice( @{ $self->{ population } }, $_->{ index }, 1 );
 }
    }
    return @mutations;
}

sub _breed_population
{
    # Given a population, perform breeding on it

    my $self = shift;
    
    # Get the node index of those organisms that can be breeded.
    my @breedable = $self->_get_matched_organisms( $self->{ breed_crit
+eria } );

    # Where breeds organisms live until needed
    my @breedings;

    # Where organisms that have been breeded will live
    my @breeded;

    if ( @breedable ) {
 # How many shall we do?   Note that in the fractional case,
 # we divide by two (two items used on every breed, so if
 # 80 out of 100 items were to breed, we'd do 40 breedings
 my $organisms_to_breed = ( $self->{ breed_amount } < 1 ) 
            ? int ( @breedable * $self->{ breed_fraction } / 2 )
     : $self->{ breed_amount };

 # Storage for those organisms actually reaped.
 my @breeded_organisms;
 
 # List of probabilities...
 my @probabilities = map { $_->{ probability } } @breedable;

 # Try to select two random organisms...
 
 while ( (@breedable > 2) && ( $organisms_to_breed > 0 ) ) {
     my $index1 = _probability_selection( @probabilities );
     my $index2 = $index1;
     $index2 = _probability_selection( @probabilities ) while 
  $index2 == $index1;
     push @breeded_organisms, [
      $breedable[ $index1 ],
      $breedable[ $index2 ]
      ];
     $organisms_to_breed--;
 }

 # Here, we expect an array of children to be returned from
 # this operation

 foreach ( @breeded_organisms ) {
     my @breeds = &{ $self->{ breeder } } 
  ( { PARENT1 =>
       $self->{ population }->[ 
                         $_->[0]->{ index } ]->{ data },
      PARENT2 =>
                     $self->{ population }->[
                         $_->[1]->{ index } ]->{ data } } );
     push @breedings, @breeds;
 }
    }
    return @breedings;
}


sub _probability_selection { 
    
    # Given a list of numbers, returns a random index, with 
    # larger numbers having a better chance of being selected 
    # over smaller ones

    my $sum = 0;
    $sum += $_ foreach ( @_ );
    
    my $r = rand $sum;

    my $i = 0;
    my $total = 0;
    do { $total += $_[ $i++ ] } until $total > $r;

    return $i-1;
}

1;


__END__

=head1 NAME

Algorithm::Genetic - A framework for genetic programming

=head1 SYNPOSIS

  use Algorithm::Genetic;

  my $algo = new Algorithm::Genetic( {
      FITNESS => sub { 10000 - ( $_[0]->{ DATA } - 50 )**2 },
      MUTATOR => sub { $_[0]->{ DATA } - 5 + int rand 10 },
      REAP_CRITERIA => sub { 10000 - $_[ 0 ]->{ FITNESS } },
      MUTATE_CRITERIA => sub { $_[ 0 ]->{ FITNESS } } 
  } );
    
  $algo->init_population( map { int rand 100 } (1..10) );

  for (1..100) {
      print join ",", $algo->get_population();
      $algo->process_generation();
      print "\n";
  }

=head1 DESCRIPTION

This modules provides a basic framework for creating genetic programs
under perl.  While typical genetic programming works with a series
of 'genes' (typcially represented by a string of characters), this 
module allows any data structure to be used.  The programmer can 
create their own mutation, breeding, and fitness functions to match
their data.

=head1 USAGE

The algorithm is created by calling the new() function, which has a
hash as an argument which is used to direct the algorithm to the 
desired user modules and any other configuration data that is needed.
The algorithm is then feed with an array through init_population.
A generation is processed each time process_generation() is called.
At any time, get_population can be called, which will return the 
current population in the algorithm in decreasing fitness order.

=head1 C<new>

Creates a new instance of a genetic algorithm.  Multiple instances can
+ 
be created without harmful effects.  The configuration of the algorith
is defined by the hash that should be the sole argument to C<new>.  
The various keys and associated values for this hash are:

  FITNESS - a coderef to a subroutine to define the fitness of a
            individual of a population.  B<This must be defined>.
            The code, when called, will have a hash passed as it's
            sole argument; with the hash, the current data item will
            be stored in DATA.  This function should return the fitnes
+s
            that you want to assign to this item.  

  MUTATOR - a coderef to a subroutine to mutate a data item.
            Leaving this undefined will result in no mutation step
            from occuring.  The code will recieve a hash as it's 
            sole arguement, with the DATA value being the data item
            that is to be mutated.  You should create a new item based
            on this one and return that; B<Do not modify the original
            item>.

  BREEDER - a coderef to a subrouting to breed new data from
            two parents.  Leaving this undefined will result
            in no breeding step.  The code will recieve a hash as 
            it's sole arguement; the two parents will be in the
     values of keys PARENT1 and PARENT2.  The coderef should
     return an array (not a reference to an array) of any
     new children that should be added through breeding.
     B<Do not modify the original items>.

  REAP_CRITERIA - a coderef to a subroutine that determines if a given
                  node should be reaped.  The code will be passed a 
                  hash as it's sole argument; within this hash, the 
    following keys are set with the given values:
        DATA - the data item in question
        FITNESS - the fitness for the data item as 
    last calculated
        AGE - the number of generations that the
       data item has exists.
                  The code should return a number greater than 0 if 
    the data item can be reaped; the larger this 
    number (relative to other reaped organisms) the 
    better chance that it will be reaped.  If 0 or
                  a negative number is returned, that item will not be
                  considered for reaping.

  MUTATE_CRITERIA - a coderef, similar to REAP_CRITERIA, that will
                  determine which items are available for mutation.  
                  The code is called and used in a similar fashion as
                  REAP_CRITERIA.

  BREED_CRITERIA - a coderef, similar to REAP_CRITERIA, that will
            determine which items are available for breeding.
     The code is called and used in a similar fashion as 
     REAP_CRITERIA.

  REAP_AMOUNT, REAP_FRACTION - Scalars; if REAP_AMOUNT is set greater
                   than 0, then the reaping step will reap this exact 
                   number of data items.  If REAP_AMOUNT is 0 or less,
                   then the number of data items that will be reaped
                   will by REAP_FRACTION * total number of reapable da
+ta 
                   items in population as determined from REAP_CRITERI
+A.  
                   If neither is set, then REAP_AMOUNT will be 0, and 
                   REAP_FRACTION will be 1, meaning that any reapable
                   data item will be reaed.

  MUTATE_AMOUNT, MUTATE_FRACTION - The same as with REAP_AMOUNT 
                   and REAP_FRACTION, but for the mutable data items

  BREED_AMOUNT, BREED_FRACTION - The same as with REAP_AMOUNT and 
                 REAP_FRACTION, but for the breedable data items.  
                 Note that if entered as a fraction, this will be 
                 the fraction of data items that will be breeded.
   If 100 data items are available and BREED_AMOUNT is set
   to .8 (80%), then there will be 40 breeding operations
   on 80 different data items.  A typical breeding will
                 produce 2 offspring, so in this case, we'll get back
                 80 new data items.

=head1 C<init_population>

Initialized the population for the genetic algorithm with the passed
array of data.

=head1 C<process_generation>

Executes one generation for the current population in the algorithm. 
The processing is done by first finding and removing any reaped 
data items, then creating and adding new mutations.

=head1 C<get_population>

Returns the current population in order of decreasing fitness.

=head1 C<get_generation_number>

Returns the current generation number.

=over

=head1 BUGS

This is sub 1.0 version software?  And you're asking about bugs???  
If you do find any, please please please let me know at
mneylon-pm@masemware.com

=head1 AUTHOR AND COPYRIGHT

Copyright 2001, Michael K. Neylon

Licensed under the GPL.

=cut
Replies are listed 'Best First'.
Re: Algorithm::Genetic
by Masem (Monsignor) on May 19, 2001 at 07:17 UTC
    As an addition test case that I'm using, I've taken the concepts in Genetic Programming or breeding Perls, and used them to develop the following code:
    #!/usr/bin/perl -w use strict; use Algorithm::Genetic; use Data::Dumper; my @genes = qw{ $x+=1; $x=$y; $y=$x; $x|=$y; $x+=$y; }; my $target = 100; my $algo = new Algorithm::Genetic( { FITNESS => \&fitness, MUTATOR => \&mutate, REAP_CRITERIA => sub { $_[ 0 ]->{ FITNESS } }, MUTATE_CRITERIA => sub { (10000-$_[ 0 ]->{ FITNESS } )**2 } } ); my @initcode; foreach ( 0..10 ) { my @bits = map { int rand @genes } ( 0..10 ); $initcode[ $_ ] = \@bits; }; $algo->init_population( @initcode ); for (1..100) { print "GENERATION $_\n"; print "-------------\n"; print join "\n", map { eval_code( get_code( @$_ ) ).' : '.get_code +( @$_ ) } reverse $algo->get_population(); print "\n"; $algo->process_generation(); print "\n"; } sub mutate { my @clone = @{ $_[0]->{ DATA } }; if ( int( rand() + 0.5 ) ) { # mutate by switching a new op in my $pos = int rand @clone; my $newop = int rand @genes; while ( $newop == $clone[ $pos ] ) { $newop = int rand @genes; } $clone[ $pos ] = $newop; } else { # mutate by adding a new op in push @clone, $genes[ int rand @genes ]; } return \@clone; } sub fitness { my $code = $_[0]->{ DATA }; # Calculate the fitness; my $string = get_code( @$code ); my $calc = eval_code( $string ); return ( $calc - $target )**2; } sub get_code { my $string = 'my $x = 1; my $y = 1; '; $string = join '', $string, map { $genes[ $_ ] } @_; return $string; } sub eval_code { return eval( $_[0] ); }

    While probably not as robust as the original entry, the solutions I'm getting are converging to the target value even after 100 generations, so something is working right...


    Dr. Michael K. Neylon - mneylon-pm@masemware.com || "You've left the lens cap of your mind on again, Pinky" - The Brain
(ar0n: perl-ai list) Re: Algorithm::Genetic
by ar0n (Priest) on May 30, 2001 at 04:46 UTC