Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Comment on

( #3333=superdoc: print w/ replies, xml ) Need Help??
#!/usr/bin/perl -w ###################### # # Algorithm::Genetic # v0.03 # Michael K. Neylon # mneylon-pm@masemware.com # May 22, 2001 # # Perl module for setting up a genetic algorithm framework. See # the POD for more details. # # Suggestions/Comments/Ideas are highly desired and can be # sent to the eamil address above. # # Change History: # # v0.03 - May 22, 2001 # Changed order of populating system: reap, breed, then # mutate. # Breeded and mutated organisms are automatically deleted; # further versions will have LIFETIME parameter to allo # organizes to live for more than one generation # Added get_generation_number() # Added MUTATE_FRACTION, BREED_FRACTION, and REAP_FRACTION # settings # # v0.02 - May 19, 2001 # MUTATOR does not need to be defined # Added BREEDER (coderef), BREED_CRITERIA (coderef), and # BREED_AMOUNT (value) # Fixed problem with mutator operation # Added AGE parameter for mutator/breeder/reaper # ###################### package Algorithm::Genetic; use strict; use UNIVERSAL qw(isa); BEGIN { use Exporter (); use vars qw( @EXPORT_OK); } use vars @EXPORT_OK; $Genetic::VERSION = '0.03'; sub new { my ( $class, $props ) = @_; my $self = {} ; # An array of hash references; the "data" member of each hash is # the user supplied data, while "fitness" is the calculated fitnes +s # of that data member $self->{ population } = []; # The user-supplied fitness function. Passed by the $props hash # setting FITNESS, or we die. die "FITNESS must be defined and as a coderef" unless ( defined $props->{ 'FITNESS' } && isa( $props->{ 'FITNESS' } , 'CODE' ) ); $self->{ fitness } = $props->{ 'FITNESS' }; # The user-supplied function to determine which organisms are # candidates for the reaper. The sub should return non-false for # organisms that should be reaped. die "REAP_CRITERIA must be a coderef" unless ( !defined $props->{ 'REAP_CRITERIA' } || isa( $props->{ 'REAP_CRITERIA' }, 'CODE' ) ); $self->{ reap_criteria } = $props->{ 'REAP_CRITERIA' }; # The user-supplied value for the maximum number # of organisms to be reaped out of those that are able to # be repead # (as per REAP_CRITERIA) $self->{ reap_amount } = $props->{ 'REAP_AMOUNT' } || 0; # The user-supplied value for the fraction (0 <= x <= 1) of # organisms that are reaped from the list of those that can # be reaped. $self->{ reap_fraction } = $props->{ 'REAP_FRACTION' } || 1; # The user-supplied mutator function. Passed by $props hash # MUTATOR. die "MUTATOR must be defined as a coderef" unless ( !defined $props->{ 'MUTATOR' } || isa( $props->{ 'MUTATOR' } , 'CODE' ) ); $self->{ mutator } = $props->{ 'MUTATOR' }; # The user-supplied function to determine which organisms are # candidates for mutation. The sub should return non-false for # organisms that should be mutated. die "MUTATE_CRITERIA must be a coderef" unless ( !defined $props->{ 'MUTATE_CRITERIA' } || isa( $props->{ 'MUTATE_CRITERIA' }, 'CODE' ) ); $self->{ mutate_criteria } = $props->{ 'MUTATE_CRITERIA' }; # These two as per the same for Reaping $self->{ mutate_amount } = $props->{ 'MUTATE_AMOUNT' } || 0; $self->{ mutate_fraction } = $props->{ 'MUTATE_FRACTION' } || 1; # The user-supplied breeder function. Passed by $props hash # BREEDER. die "BREEDER must be defined as a coderef" unless ( !defined $props->{ 'BREEDER' } || isa( $props->{ 'BREEDER' } , 'CODE' ) ); $self->{ breeder } = $props->{ 'BREEDER' }; # The user-supplied function to determine which organisms are # candidates for breeding. The sub should return non-false for # organisms that should be breeded. die "BREED_CRITERIA must be a coderef" unless ( !defined $props->{ 'BREED_CRITERIA' } || isa( $props->{ 'BREED_CRITERIA' }, 'CODE' ) ); $self->{ breed_criteria } = $props->{ 'BREED_CRITERIA' }; # As per the same as Reaping. In the case of breed_fraction, # we assume that the breeding will produce 2 organisms. $self->{ breed_amount } = $props->{ 'BREED_AMOUNT' } || 0; $self->{ breed_fraction } = $props->{ 'BREED_FRACTION ' } || 1; # A counter $self->{ counter } = 0; bless $self, $class; return $self; } # Inits the population sub init_population { # For each item we take in, create a hash, calculate the fitness, # and push the new item on to the list my $self = shift; $self->{ counter } = 0; foreach ( @_ ) { $self->_add_organism( $_ ); } } # Does one iteration sub process_generation { # Real simple for right now, remove the worst one, and mutate # the best one my $self = shift; my @reaped = $self->{ reap_criteria } ? $self->_reap_population() : (); my @breeded = $self->{ breed_criteria } ? $self->_breed_population() : (); # With all transformations done, add the new organisms to the syst +em $self->{ population } = (); foreach ( @breeded ) { $self->_add_organism ( $_ ); } my @mutants = $self->{ mutate_criteria } ? $self->_mutate_population() : (); foreach( @mutants ) { $self->_add_organism( $_ ); } $self->{ counter }++; } sub get_population { # Returns the best answers my $self = shift; return map { $_->{ data } } sort { $b->{ fitness } <=> $a->{ fitness } } @{ $self->{ population } }; } sub get_generation_number { my $self = shift; return $self->{ counter }; } sub _add_organism { # Given the user data for an organism, add it to the population my $self = shift; my $user_data = shift; my %hash = ( data => $user_data, birth => $self->{ counter }, fitness => &{ $self->{ fitness } }( { DATA=>$user_data } ) ); push @{ $self->{ population } }, \%hash; } sub _get_matched_organisms { # Given the user sub to determine which nodes match for reaping, # mutating or breeding, return an array of hashes, which includes +the # node numbers and probability information my $self = shift; my $code = shift; # Our results my @matched; # We'll be passing a hash to each one of these; save time by precr +eating # it and only changing what is needed. my %user_hash; foreach my $i ( 0..@{ $self->{ population } } - 1 ) { $user_hash{ DATA } = $self->{ population }->[ $i ]->{ data }; $user_hash{ FITNESS } = $self->{ population }->[ $i ]->{ fitness }; $user_hash{ AGE } = $self->{ counter } - $self->{ population }->[ $i ]->{ birth }; my $probability = &{ $code }( \%user_hash ); # only add to those cases where the probability is greater than # zero push @matched, { index=>$i, probability=>$probability } if ( $probability > 0 ); } return @matched; } sub _reap_population { # Given a population, perform a reap my $self = shift; # Get the organism index of those organisms that can be reaped. my @reapable = $self->_get_matched_organisms( $self->{ reap_criter +ia } ); # Where reaped organisms go to die my @reaped; if ( @reapable ) { # How many shall we do? my $organisms_to_reap = ( $self->{ reap_amount } < 1 ) ? int( @reapable * $self->{ reap_fraction } ) : $self->{ reap_amount }; # Storage for those organisms actually reaped. my @reaped_organisms; # List of probabilities... my @probabilities = map { $_->{ probability } } @reapable; # While we still have organisms to reap, select a random # organism from the probability list above (modified # appropriately), and target it for reaping by removing it # from the list while ( @reapable && ( $organisms_to_reap > 0 ) ) { my $index = _probability_selection( @probabilities ); splice ( @probabilities, $index, 1 ); push @reaped_organisms, splice( @reapable, $index , 1 ); $organisms_to_reap--; } # In order to remove the right organisms, we need to remove them # in reverse order so that we don't wreck the lower indexes and # pull the wrong ones @reaped_organisms = sort { $b->{ index } <=> $a->{ index } } @reaped_organisms; foreach ( @reaped_organisms ) { push @reaped, splice( @{ $self->{ population } }, $_->{ index }, 1 ); } } return @reaped; } sub _mutate_population { # Given a population, perform a mutation on it my $self = shift; # Get the node index of those organisms that can be mutated. my @mutable = $self->_get_matched_organisms( $self->{ mutate_crite +ria } ); # Where mutated organisms live until needed my @mutations; if ( @mutable ) { my $organisms_to_mutate = ( $self->{ mutate_amount } < 1 ) ? int( @mutable * $self->{ mutate_fraction } ) : $self->{ mutate_amount }; # Storage for those organisms actually mutated. my @mutated_organisms; # List of probabilities... my @probabilities = map { $_->{ probability } } @mutable; # While we still have organisms to reap, select a random node from # the probability list above (modified appropriately), and # target it for reaping by removing it from the list while ( @mutable && ( $organisms_to_mutate > 0 ) ) { my $index = _probability_selection( @probabilities ); splice ( @probabilities, $index, 1 ); push @mutated_organisms, splice( @mutable, $index, 1 ); $organisms_to_mutate--; } # Do the mutations... foreach ( @mutated_organisms ) { my $clone = &{ $self->{ mutator } } ( { DATA => $self->{ population }->[ $_->{ index } ]->{ data } } ); push @mutations, $clone; } # Then remove the old items from the population... @mutated_organisms = sort { $b->{ index } <=> $a->{ index } } @mutated_organisms; foreach ( @mutated_organisms ) { splice( @{ $self->{ population } }, $_->{ index }, 1 ); } } return @mutations; } sub _breed_population { # Given a population, perform breeding on it my $self = shift; # Get the node index of those organisms that can be breeded. my @breedable = $self->_get_matched_organisms( $self->{ breed_crit +eria } ); # Where breeds organisms live until needed my @breedings; # Where organisms that have been breeded will live my @breeded; if ( @breedable ) { # How many shall we do? Note that in the fractional case, # we divide by two (two items used on every breed, so if # 80 out of 100 items were to breed, we'd do 40 breedings my $organisms_to_breed = ( $self->{ breed_amount } < 1 ) ? int ( @breedable * $self->{ breed_fraction } / 2 ) : $self->{ breed_amount }; # Storage for those organisms actually reaped. my @breeded_organisms; # List of probabilities... my @probabilities = map { $_->{ probability } } @breedable; # Try to select two random organisms... while ( (@breedable > 2) && ( $organisms_to_breed > 0 ) ) { my $index1 = _probability_selection( @probabilities ); my $index2 = $index1; $index2 = _probability_selection( @probabilities ) while $index2 == $index1; push @breeded_organisms, [ $breedable[ $index1 ], $breedable[ $index2 ] ]; $organisms_to_breed--; } # Here, we expect an array of children to be returned from # this operation foreach ( @breeded_organisms ) { my @breeds = &{ $self->{ breeder } } ( { PARENT1 => $self->{ population }->[ $_->[0]->{ index } ]->{ data }, PARENT2 => $self->{ population }->[ $_->[1]->{ index } ]->{ data } } ); push @breedings, @breeds; } } return @breedings; } sub _probability_selection { # Given a list of numbers, returns a random index, with # larger numbers having a better chance of being selected # over smaller ones my $sum = 0; $sum += $_ foreach ( @_ ); my $r = rand $sum; my $i = 0; my $total = 0; do { $total += $_[ $i++ ] } until $total > $r; return $i-1; } 1; __END__ =head1 NAME Algorithm::Genetic - A framework for genetic programming =head1 SYNPOSIS use Algorithm::Genetic; my $algo = new Algorithm::Genetic( { FITNESS => sub { 10000 - ( $_[0]->{ DATA } - 50 )**2 }, MUTATOR => sub { $_[0]->{ DATA } - 5 + int rand 10 }, REAP_CRITERIA => sub { 10000 - $_[ 0 ]->{ FITNESS } }, MUTATE_CRITERIA => sub { $_[ 0 ]->{ FITNESS } } } ); $algo->init_population( map { int rand 100 } (1..10) ); for (1..100) { print join ",", $algo->get_population(); $algo->process_generation(); print "\n"; } =head1 DESCRIPTION This modules provides a basic framework for creating genetic programs under perl. While typical genetic programming works with a series of 'genes' (typcially represented by a string of characters), this module allows any data structure to be used. The programmer can create their own mutation, breeding, and fitness functions to match their data. =head1 USAGE The algorithm is created by calling the new() function, which has a hash as an argument which is used to direct the algorithm to the desired user modules and any other configuration data that is needed. The algorithm is then feed with an array through init_population. A generation is processed each time process_generation() is called. At any time, get_population can be called, which will return the current population in the algorithm in decreasing fitness order. =head1 C<new> Creates a new instance of a genetic algorithm. Multiple instances can + be created without harmful effects. The configuration of the algorith is defined by the hash that should be the sole argument to C<new>. The various keys and associated values for this hash are: FITNESS - a coderef to a subroutine to define the fitness of a individual of a population. B<This must be defined>. The code, when called, will have a hash passed as it's sole argument; with the hash, the current data item will be stored in DATA. This function should return the fitnes +s that you want to assign to this item. MUTATOR - a coderef to a subroutine to mutate a data item. Leaving this undefined will result in no mutation step from occuring. The code will recieve a hash as it's sole arguement, with the DATA value being the data item that is to be mutated. You should create a new item based on this one and return that; B<Do not modify the original item>. BREEDER - a coderef to a subrouting to breed new data from two parents. Leaving this undefined will result in no breeding step. The code will recieve a hash as it's sole arguement; the two parents will be in the values of keys PARENT1 and PARENT2. The coderef should return an array (not a reference to an array) of any new children that should be added through breeding. B<Do not modify the original items>. REAP_CRITERIA - a coderef to a subroutine that determines if a given node should be reaped. The code will be passed a hash as it's sole argument; within this hash, the following keys are set with the given values: DATA - the data item in question FITNESS - the fitness for the data item as last calculated AGE - the number of generations that the data item has exists. The code should return a number greater than 0 if the data item can be reaped; the larger this number (relative to other reaped organisms) the better chance that it will be reaped. If 0 or a negative number is returned, that item will not be considered for reaping. MUTATE_CRITERIA - a coderef, similar to REAP_CRITERIA, that will determine which items are available for mutation. The code is called and used in a similar fashion as REAP_CRITERIA. BREED_CRITERIA - a coderef, similar to REAP_CRITERIA, that will determine which items are available for breeding. The code is called and used in a similar fashion as REAP_CRITERIA. REAP_AMOUNT, REAP_FRACTION - Scalars; if REAP_AMOUNT is set greater than 0, then the reaping step will reap this exact number of data items. If REAP_AMOUNT is 0 or less, then the number of data items that will be reaped will by REAP_FRACTION * total number of reapable da +ta items in population as determined from REAP_CRITERI +A. If neither is set, then REAP_AMOUNT will be 0, and REAP_FRACTION will be 1, meaning that any reapable data item will be reaed. MUTATE_AMOUNT, MUTATE_FRACTION - The same as with REAP_AMOUNT and REAP_FRACTION, but for the mutable data items BREED_AMOUNT, BREED_FRACTION - The same as with REAP_AMOUNT and REAP_FRACTION, but for the breedable data items. Note that if entered as a fraction, this will be the fraction of data items that will be breeded. If 100 data items are available and BREED_AMOUNT is set to .8 (80%), then there will be 40 breeding operations on 80 different data items. A typical breeding will produce 2 offspring, so in this case, we'll get back 80 new data items. =head1 C<init_population> Initialized the population for the genetic algorithm with the passed array of data. =head1 C<process_generation> Executes one generation for the current population in the algorithm. The processing is done by first finding and removing any reaped data items, then creating and adding new mutations. =head1 C<get_population> Returns the current population in order of decreasing fitness. =head1 C<get_generation_number> Returns the current generation number. =over =head1 BUGS This is sub 1.0 version software? And you're asking about bugs??? If you do find any, please please please let me know at mneylon-pm@masemware.com =head1 AUTHOR AND COPYRIGHT Copyright 2001, Michael K. Neylon Licensed under the GPL. =cut

In reply to Algorithm::Genetic by Masem

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • Outside of code tags, you may need to use entities for some characters:
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others browsing the Monastery: (8)
    As of 2014-12-26 07:47 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?

      Is guessing a good strategy for surviving in the IT business?





      Results (168 votes), past polls