Clear[GPSRegress]; BeginPackage["GPSRegress`"]; (* This is a work-alike of GP symbolic regression (Koza's style). The local selection mechanism uses a 1-D ring topology and simple best-two-local-parents criteria. Also, the current best indiv. is always preserved. These defaults may not be best in many cases. Population size defaults at 50, about ok, IMHO. Seems like many different runs with small populations are better than very large population (IMHO 300 is already "large", but haven't tried larger.) *) (* To try a run with this GP symbolic regression as it is now, you just load the package, and type in RunGP[ "someString" ], then . The number of populations and the size of population have been set small so you can try short, quick runs. RunGP::usage = "RunGP[filesuffix] starts a run of genetic programming with predefined constructors, recombinors, evaluators, sampler-selectors and environments. Standard formats of filenames to save full generations and various summary information are built-in, with date-stamp. 'filesuffix' is a short string (possibly empty) that you want to append into all filename strings for a GP run." If you want to try GP on some raw data then you first modify the Environments data list in this package so that it will contain the list of variable(s) and input-output data you want to be fit (a quadratic equation with noise added is given here as a default data for people to try GP initially), then load the package and run. It's simple to modify the code so you can input your data outside the package, once you know how this GP toy works. Look at the structure of the data list Environments and how it's constructed, and everything should be clear on how to put in your own data. If you only want to try GP with "data" sample from some known function instead, then it's simpler: you just have to change the function IdealResponse to the function you want. I put a portion of the code here: ---- IdealResponse[x_] := (x - 10)^2 + 30; Environments = List[ SetVar[Variable, Table[x, {x, 0, VarRange}]], Table[ LaplacianNoise[Random[], MeanAbsErr] \ + IdealResponse[x], {x, 0, VarRange}] ]; Domain = Map[(Variable //. #)&, Environments[[1]]]; IdealBehavior = Transpose[Append[List[Domain], Environments[[2]] ]]; ---- Note: at this present form, this GP symbolic regression tends to be naturally biased in such a way that its programs are dominated by those with output behavior clamped to 0.0 at X = 0.0, because multiplication of any finite function foo[X] by X, a very "natural" operation in GP, will result in the aforementioned behaviors. We can fix this in some ways, for example, one can add a new type of "mutation" which is effectively a simple change-of-variable; like X --> plus[X, random-constant] applied to foo[X], for example. Anyhow, IMHO, the present GP is interesting more because of its particular Darwinian style of searching for symbolic regression solutions, and its capacity to accomodate arbitrary error metrics. But to effectively solve symbolic regression problems one will probably need to upgrade this GP some more. One can see that the sample data given in this GP package is not of the easiest kind of data for it to fit, but I share the view that GP can definitely help in many cases; the best domains for GP being problems that cannot be analyzed and solved analytically, for which one might do better with other methods. *) Generations::usage = "Generations is a list containing all generations (populations evolved) when RunGP is running, then it's saved into an archive file." BestBehaviors::usage = "BestBehaviors contains the list of all best the best individuals in their respective generations." GPbuildTree::usage = "GPbuildTree[maxdepth] gives a random algebraic expression tree built by a call to GPbuild, and with appropriate Eval substitutions." GPbuild::usage = "GPbuild[startdepth, maxdepth] returns a randomly constructed binary tree that is in fact an algebraic expression. The expression can be evaluated by simple substitution of the functors' names. These expression trees are useful for the genetic programming method of symbolic regression. Startdepth is normally 0." GPgenerate::usage = "GPgenerate[Num] gives a list consiting of Num expressions." rInteger::usage = "rInteger evaluates to a random integer within some range." Variable::usage = "Variable is the X variable" Atoms::usage = "Atoms[] gives a random atom, which is either a variable symbol, or a random number (in this case, random integer)" Functors::usage = "Functors[] gives a randomly chosen funtors from a set of given functors." Eval::usage = "Eval is the list of substitution names for the functors" RandomPick::usage = "RandomPick[list] gives a list element picked randomly from given list" Environments::usage = "Environments gives the list of input-output rule mappings for the IdealBehavior, plus noise." (* Fitness Evaluation *) Domain::usage = "Domain gives the list of independent variable's values." IdealBehavior::usage = "IdealBehavior is the noise-less ideal input-output function that the program can achieve (zero-error case)." Fitness::usage = "Fitness[ behavior, environments ] gives the evaluation of the error of fit between the given function/behavior against the given environments. This is just the sum of the absolute errors." (* Creating and processing generations of individuals *) CreatePopulation::usage = "CreatePopulation[wantedsize, seedprograms] gives a randomly generated population of valid algebraic expressions." Cross::usage = "Cross[A, Afrag, B, Bfrag, CrossType] gives a list of two new individuals resulting from 'crsossing' A and B, exchanging the 2 fragments between the two base expressions. CrossType indicates which type of crossover operation to be done, to keep the birth-record for the two new individuals." CrossSubFunctions::usage = "CrossSubFunctions[A, B] calls Cross with CrossType being string 'CrossSubs'." CrossAtAny::usage = "CrossAtAny[A, B] calls Cross with CrossType = 'CrossAtAny'." AtomCross::usage = "AtomCross[A, Afrag, B, CrossType] when B is just a single atom to cross with A, a tree. B exchages its place with Afrag, a subtree, then the two new expressions are returned." AllParts::usage = "AllParts[expression] gives all subtrees of the expression." AllSubFunctions::usage = "AllSubFunctions[expression] gives all subfunctions, that is, subtrees with depths greater than 1, of the expression." (********************** Local Sampler and Selector ************************) NeighborSampling::usage = "NeighborSampling[point, mean, selfchance, margin] gives a random step of mean length 'mean', within a neighborhood around 'point'. 'selfchance' is the probability of staying at the starting point 'point' after one stepping." AllSites::usage = "AllSites[PopLength] gives a list of all indexed sites from 1 to PopLength, each marked 'Available' for subsequent sampling without replacement by PickMates and PickSites." PickMates::usage = "PickMates[sites, quota, num1sidepicks, totalsites, OpName] gives a list of prepared sites sampled from a given list of available sites. Each prepared site is ready for some 'mating operation' by the method specified by OpName. 'Quota' is the maximum number of sites to be sampled, 'num1sidepicks' is the number of local pickings of each of the two list of parent sites (from which 2 best parents will later be selected by SelectBestMates.) 'totalsites' is the total number of sites in a full population." PickSites::usage = "PickSites[sites, quota, num1sidepicks, numtotal, OpName] behaves similarly to PickMates, except that there is only one list of parents consisting of 2*num1sidepicks picks. PickSites is used to sample sites prepared for pure reproduction (pure copy) or for mutation." SelectBestMates::usage = "SelectBestMates[site, pop] receives a list of site out from PickMates, it gives back a modified breedsite that now contains the indexes of the 2 best parents ready for mating." SelectBest::usage = "SelectBest[site, pop] gives the best indiv in the list of local sample indices. This is useful for Mutation or Reproduction sampling." BestIndexInPop::usage = "BestIndexInPop[population] gives the index of the best individual in population. If there are many best ones, only the smallest index is returned." BestIndexInSamples::usage = "BestIndexInSamples[sampleindices, pop] receives a list of indices and the population, and gives back the best index within that index list. This function is used to chose local parents." (********************** Local Breeder and Processor ***********************) RunGP::usage = "RunGP[filesuffix] starts a run of genetic programming with predefined constructors, recombinors, evaluators, sampler-selectors and environments. Standard formats of filenames to save full generations and various summary information are built-in, with date-stamp. 'filesuffix' is a short string to append into all filename strings." ExecuteGenerations::usage = "ExecuteGenerations[popul, numofgens, criterion] receives a starting population and iterates for 'numofgens' generations more, evaluating, selecting and breeding. 'criterion' is the fitness level at which the evolution cycles may stop before 'numofgens'." LocalSamplerBreeder::usage = "LocalSamplerBreeder[population, genNum] given the old population and the index for the new generation, it breeds the new population by local selection/mating." UniqueReIndex::usage = "UniqueReIndex[pop, generationNumber] receives a sorted list of new individuals (first indexed 1 to PopLength) and increments each index by generationNUmber*PopLength to make globally unique indices for all these new individuals." CrossOverAtSite::usage = "CrossOverAtSite[site, population, Method] receives a prepared breedsite with two best local parents. It gives the resulting site occupied by the better of the two offsprings from the crossover operation. 'Method' specifies the crossover method." ReproduceAtSite::usage = "ReproduceAtSite[site, population] gives back the site now occupied by the best indiv chosen from the local sampling process." MutateAtSite::usage = "MutateAtSite[site, population] gives back the site with either a mutated individual (if the mutation is viable), or the best local indiv in the same manner of ReproduceAtSite. We take the simplfying view that a bad mutation dies giving the place for a good indiv nearby to occupy." (******************** Fitness Reporting and Graphic Plots *****************) ReportOnGeneration::usage = "ReportOnGeneration[population] gives a report of the best indiv, its location (index) and expression, the mean and standard deviation of fitness in the population." ReportOnRun::usage = "ReportOnRun[] gives a summary list of all the best fitness and indices in all of the generations in a GP run." ReplayOldGenerations::usage = "ReplayOldGenerations[generations] receives a previous GP run's list of all generations, and reporting the generations progressing." ShowBestOfGeneration::usage = "ShowBestOfGeneration[generation] gives a graphic plot of the best individual function in the given population." DrawIndivFunction::usage = "DrawIndivFunction[individual] prepare a graphic plot of the individual's behavior funtion compared to the optimal funtion specified by the environments. This is a function used for many contexts so it's simplified for efficiency." ShowIndivFunction::usage = "ShowIndivFunction[individual] display the plot prepared by DrawIndivFunction." BehaviorFunction::usage = "BehaviorFunction[program] gives a numerical listing of the input-output function of the program." PopulationRawFitness::usage = "PopulationRawFitness[pop] gives a list of the fitness values of all individuals in population 'pop'." PopulationRawDistribution::usage = "PopulationRawDistribution[pop] gives a normalized cumulative distribution of raw fitness values in the population." ShowPopRawFitness::usage = "ShowPopRawFitness[population, quantile] receives a population and a quantile number (0-100%) and output a graphic plot of the cummulative distribution of raw fitness values, truncating the possibly long tail beyond the 'quantile' quantile." ShowFitnessOfAPop::usage = "ShowFitnessOfAPop[genindex, allgenerations, quantile] receives a list of many generations and an index of the generation to be graphed. It calls on ShowPopRawFitness to plot." (***********************************************************************) Begin["GPSRegress`Private`"]; (* Templates *) (* Individual = {expression, {fitness}, {birthrecord}}; expression = Individual[[1]]; fitness = Individual[[2,1]]; birthrecord = Individual[[3]]; {birthrecord} = {indiv-index, CrossType, {1st-parent-index, 1st-frag, 2nd-parent-index, 2nd-frag} || "Reproduced", {parent} || "LastBest", {parent} || "Mutated", {parent, fragment, new-frag} } Some note: the birth records contain enough information to recover the exact operations on the individual creation: what crossover operations, on which parents and where. This would help in exhaustive tracking of 'genetic' material flows thru all the generations, as well as complete forward- and backward tracking of descendants and ancestors of any individual At present, I have not done any 'tracking' study on GP runs, but it should be a straightforward extension, and could be valuable to monitor and detect 'stalled' conditions in GP, like premature convergence to a poor-fit, highly homogeneous population that frequently takes very long to progress further. *) Generations = {}; (* { {pop1...}, {pop2...}, ... {popN...}} *) SeedPrograms = {}; (* { program1, program2, ...} *) BestBehaviors = {}; (* { {gen[i], prog[i], behavior[i]}, ...} *) (* BestBehaviors contains the indexed behaviors of best programs *) (* Parameters *) NumberOfGenerations = 30; SizeOfPopulation = 50; NumOneSidePicks = 4; MaxDepthOfNewIndividual = 4; MaxDepthOfNewMutantSubTrees = 5; MaxDepthOfNewCrossOverIndividual = 8; CrossOverAtSubsFraction = 0.4; CrossOverAtAnyPointFraction = 0.45; MutationFraction = 0.05; FitnessGoal = 100; (* needs flexible samplings and range of independent variables *) VarRange = 30; (* variable X within [0, 30] interval *) IdealResponse[x_] := (x - 10)^2 + 30; MeanAbsErr = 10; (* Mean AbsErrors of Laplacian noise generator *) epsMargin = 0.005; (* margin for Log evaluation safety *) LaplacianNoise[point_, mean_] := Sign[point - 0.5]*Round[mean*Log[0.5/(Abs[point - 0.5] + epsMargin)]] \ /; (point >= 0.0)&&(point <= 1.0) Environments = List[ SetVar[Variable, Table[x, {x, 0, VarRange}]], Table[ LaplacianNoise[Random[], MeanAbsErr] \ + IdealResponse[x], {x, 0, VarRange}] ]; SetVar[variable_, values_List] := Map[Rule[variable, #]&, values]; Domain = Map[(Variable //. #)&, Environments[[1]]]; IdealBehavior = Transpose[Append[List[Domain], Environments[[2]] ]]; Fitness[behavior_, environments_] := Block[{inputs = environments[[1]], idealresponse = environments[[2]], functioning, response, function2nd, errterm1st}, functioning = behavior //. Eval; (* tries to catch infinite expressions, returning string "BadVal" *) response = Check[Map[(functioning //. #)& , inputs], "BadVal"]; Which[StringQ[response], errterm1st = response, True, errterm1st = Check[Apply[Plus, Map[Abs, response - idealresponse ]], "BadVal"] ]; Return[{behavior, {errterm1st//N}}]; ]; (****************** Expression Tree Constructor *****************************) GPbuildTree[ maxDepth_] := GPbuild[0, maxDepth]; GPbuild[depth_, MaxDepth_] := ( fIndex = Random[Integer, {1, Length[Functors[]]}]; functor = Functors[][[fIndex, 1]]; numofargs = Length[Functors[][[fIndex, 2]]]; Which[(depth < MaxDepth), If [ numofargs == 2, Return[functor[GPbuild[depth+1, MaxDepth], GPbuild[depth+1, MaxDepth]] ], Return[functor[GPbuild[depth+1, MaxDepth]] ]], True, RandomPick[Atoms[]]] ); rConstant := Random[Real, {-20, 20}]; Variable = Global`X; Atoms[] := {Variable, rConstant}; Functors[] := {{Global`times, {Real, Real}}, {Global`plus, {Real, Real}}, {Global`minus, {Real}}, {Global`divide, {Real, Real}}}; Eval = {Global`times->Times, Global`plus->Plus, Global`minus->Minus, Global`divide->Divide}; (***************** Creating and evolving generations ********************) CreatePopulation[wantedsize_, seedprograms_List] := Block[{population = {}, seednumber = Length[seedprograms], i, subpop = {}, subpoplength, needed, passcount = 0, count = 0, startTime = Take[Date[], -2], stopTime}, Which[(seednumber >= 0)&&(seednumber < wantedsize), subpop = Join[seedprograms, GPgenerate[wantedsize - seednumber]], (seednumber >= wantedsize), subpop = Take[seedprograms, wantedsize] ]; (* next, incrementally generate random programs until enough valid ones *) subpop = Select[Map[Fitness[#, Environments]&, subpop], NumberQ[#[[2,1]]]& ]; For[i = 1, i <= Length[subpop] , i++, AppendTo[population, AppendTo[subpop[[i]], List[i+count, "FirstGen"]]] ]; (* tagging the individual for later tracking *) count = count + Length[subpop]; While [ count < wantedsize, passcount++; Print[passcount, "th pass, ", count, " indivs so far"]; (* generate more than the needed number of new programs each pass to compensate for non-valid ones --- usually more than half of random progams are not valid, often containing division by zero, etc. *) subpop = Select[Map[Fitness[#, Environments]&, GPgenerate[Round[2*(wantedsize - count)]]], NumberQ[#[[2,1]]]&]; For[i = 1, i <= Length[subpop] , i++, AppendTo[population, AppendTo[subpop[[i]], List[i+count, "FirstGen"]]] ]; (* tagging the individual for later tracking *) count = count + Length[subpop]; ]; stopTime = Take[Date[], -2]; Print[""]; Print["___ time elasped: ", 60*(stopTime[[1]]-startTime[[1]]) \ + stopTime[[2]] - startTime[[2]], " seconds to create the 0th \ generation"]; Return[Take[population, wantedsize]]; ]; GPgenerate[quota_] := Table[GPbuildTree[ MaxDepthOfNewIndividual ], {quota}]; (********** CrossOver Recombinations between expressions *****************) AllPoints[expr_] := Block[{points, fragments}, points = Select[Position[expr, _], (# != {})&]; fragments = Apply[expr[[##]]&, points, 1]; Return[Transpose[Append[List[fragments], points]]] ]; AllParts[expr_] := Select[AllPoints[expr], (Last[#[[2]]] != 0)&]; AllLeaves[expr_] := Select[AllParts[expr], (Depth[#[[1]]] == 1)&]; AllSubFunctions[expr_] := Append[Select[AllParts[expr], (Depth[#[[1]]] > 1)&], List[expr, {}]]; Cross[Aparent_, Afrag_, Bparent_, Bfrag_, CrossType_String] := Block[{newA, newB}, newA = List[ReplacePart[Aparent[[1]], Bfrag[[1]], Afrag[[2]] ], {}, {"Index?", CrossType, {Aparent[[3, 1]], Afrag[[2]], Bparent[[3, 1]], Bfrag[[2]]}}]; newB = List[ReplacePart[Bparent[[1]], Afrag[[1]], Bfrag[[2]] ], {}, {"Index?", CrossType, {Bparent[[3, 1]], Bfrag[[2]], Aparent[[3, 1]], Afrag[[2]]}}]; Return[{newA, newB}] ]; CrossSubFunctions[Aparent_, Bparent_] := Block[{Adepth, Bdepth, Afragment={}, Bfragment={}}, (* Print["___inside CrossSubFunctions "]; *) Adepth = Depth[Aparent[[1]]]; Bdepth = Depth[Bparent[[1]]]; If [Adepth > 1, Afragment = RandomPick[AllSubFunctions[Aparent[[1]] ]] ]; If [Bdepth > 1, Bfragment = RandomPick[AllSubFunctions[Bparent[[1]] ]] ]; Which[ (Adepth > 1)&&(Bdepth > 1), Return[Cross[Aparent, Afragment, Bparent, Bfragment, "CrossSubs"]], (* splicing instead *) (Adepth > 1), Return[AtomCross[Aparent, Afragment, Bparent, "CrossSubs"]], (Bdepth > 1), Return[AtomCross[Bparent, Bfragment, Aparent, "CrossSubs"]], True, Return[{}] (* don't want crossing 2 atoms *) ] ]; CrossAtAny[Aparent_, Bparent_] := Block[{Adepth, Bdepth, Afragment, Bfragment}, (* Print["___inside CrossAtAny"]; *) Adepth = Depth[Aparent[[1]]]; Bdepth = Depth[Bparent[[1]]]; If[Adepth > 1, Afragment = RandomPick[AllParts[Aparent[[1]]] ]]; If[Bdepth > 1, Bfragment = RandomPick[AllParts[Bparent[[1]]] ]]; Which[ (Adepth > 1)&&(Bdepth > 1), Return[Cross[Aparent, Afragment, Bparent, Bfragment, "CrossAny"]], (* splicing instead *) (Adepth > 1), Return[AtomCross[Aparent, Afragment, Bparent, "CrossAny"]], (Bdepth > 1), Return[AtomCross[Bparent, Bfragment, Aparent, "CrossAny"]], True, Return[{}] (* don't want crossing atoms *) ]; ]; (* AtomCross: a single atom exchanged status with a subtree, both returned *) AtomCross[Aparent_, Afrag_, Bparent_, CrossType_String] := Block[{newA, newB}, newA = List[ReplacePart[Aparent[[1]], Bparent, Afrag[[2]] ], {}, {"Index?", CrossType, {Aparent[[3, 1]], Afrag[[2]], Bparent[[3, 1]], {0}} }]; newB = List[Afrag[[1]], {}, {"Index?", CrossType, {Bparent[[3, 1]], {0}, Aparent[[3, 1]], Afrag[[2]]}}]; Return[{newA, newB}] ]; (********************* Breeder - Population Processors ******************) (**** RunGP is the top level function to run GP, at present with all default settings in this package, for simplicity. You can change it to expect GPbuild, Fitness, Environments, and other low-level functions ******) RunGP[IDtag_String] := Block[{population, allpopulations, dataDir, recordS = "runText", envS = "envDat", genS = "runGens", date, conD = "_", , conP = "-", Dtag, Ptag, ParVarEnv, startTime = Take[Date[], -4], stopTime}, Ptag = StringJoin[ToString[SizeOfPopulation], "G", ToString[NumberOfGenerations]]; date = Map[ToString[#]&, Take[Date[], {2, 4}]]; Dtag = StringJoin[date[[1]], conD, date[[2]], conD, date[[3]]]; dataDir = "GPdata/"; recordFile = StringJoin[dataDir, recordS, Dtag, conP, Ptag, IDtag]; gensFile = StringJoin[dataDir, genS, Dtag, conP, Ptag, IDtag]; envFile = StringJoin[dataDir, envS, Dtag, conP, Ptag, IDtag]; PrependTo[$Echo, recordFile]; PrependTo[$Output, recordFile]; PrependTo[$Messages, recordFile]; Print[]; Print[" date stamp: ", Date[], " ", recordFile]; Print[]; Print[""]; Print["********* Creating the first generation ******"]; Print[" of ", SizeOfPopulation, " individuals"]; Print[""]; Print[" cross-at-any %: ", CrossOverAtAnyPointFraction]; Print[" cross-at-subs %: ", CrossOverAtSubsFraction]; Print[" ave. mutation %: ", MutationFraction]; Print[" fitness goal: ", FitnessGoal]; Print[" generations limit = ", NumberOfGenerations]; Print[]; Print[" NumOneSidePicks = ", NumOneSidePicks, "; MeanWander = ", MeanWander]; population = CreatePopulation[SizeOfPopulation, SeedPrograms]; ExecuteGenerations[population, NumberOfGenerations, FitnessGoal]; stopTime = Take[Date[], -4]; Print[""]; Print["___ Run Time = ", 60*(60*( 24*(stopTime[[1]] - startTime[[1]]) \ + (stopTime[[2]] - startTime[[2]])) + stopTime[[3]] - startTime[[3]]) \ + stopTime[[4]] - startTime[[4]], " Seconds"]; SaveTo[gensFile, Generations]; SaveTo[envFile, IdealBehavior]; Print["___ max memory used: ", N[MaxMemoryUsed[]/10^6], " MBytes"]; Print["___ memory in use: ", N[MemoryInUse[]/10^6], " MBytes"]; Print[""]; Print[ReportOnRun[]]; $Echo = Drop[$Echo, 1]; $Output = Drop[$Output, 1]; $Messages = Drop[$Messages, 1]; Return[BestBehaviors]; ]; SaveTo[file_String, data_] := {OpenWrite[file]; Write[file, data]; Close[file];} ExecuteGenerations[popul_, numofgens_, criterion_] := Block[{populations = List[popul], currentpop, n = 0, currentbestindex, currentbestfitness, oldbestfitness}, currentpop = popul; currentbestindex = BestIndexInPop[currentpop]; currentbestfitness = currentpop[[ currentbestindex, 2, 1]]; AppendTo[Generations, currentpop]; (* ? needs generation index here *) ReportOnGeneration[currentpop]; oldbestfitness = currentbestfitness; While[(n < numofgens)&&(currentbestfitness > criterion), n++; currentpop = LocalSamplerBreeder[currentpop, n]; currentbestindex = BestIndexInPop[currentpop]; currentbestfitness = currentpop[[ currentbestindex, 2, 1]]; AppendTo[Generations, currentpop]; (* ? needs generation index here *) ReportOnGeneration[currentpop]; If [currentbestfitness < oldbestfitness, {ShowBestOfGeneration[ currentpop ]; (* plot new improved indiv *) oldbestfitness = currentbestfitness}]; ]; ]; LocalSamplerBreeder[population_, genNum_] := Block[{newCrossAny={}, newCrossSubs={}, newMutants={}, newCopies={}, popcount, bestIndex, oldBest, Sites, temp1, temp2, temp3, temp4, NumCrossAtAny, NumCrossAtSubs, NumProportionate, NumMutation, newPop}, popcount = Length[population]; NumCrossAtAny = Floor[popcount*CrossOverAtAnyPointFraction]; NumCrossAtSubs = Floor[popcount*CrossOverAtSubsFraction]; NumMutation = Min[ Floor[popcount*(-Log[1.0 - Random[]])*MutationFraction], (popcount - 1 - (NumCrossAtAny + NumCrossAtSubs))]; NumProportionate = Max[0, (popcount - 1 - (NumCrossAtAny + NumCrossAtSubs + NumMutation))]; Print[""]; Print["inside BreedNewGeneration"]; Print[" crossAny: ", NumCrossAtAny, "; crossAtSubs: ", NumCrossAtSubs, "; mutations: ", NumMutation]; bestIndex = BestIndexInPop[population]; oldBest = population[[bestIndex]]; Sites = Delete[AllSites[popcount], bestIndex]; (* the current best indiv is saved to avoid being lost in the breeding process *) temp1 = PickMates[Sites, NumCrossAtAny, NumOneSidePicks, \ popcount, "CrossAny"]; newCrossAny = temp1[[1]]; temp2 = PickMates[temp1[[2]], NumCrossAtSubs, NumOneSidePicks, \ popcount, "CrossSubs"]; newCrossSubs = temp2[[1]]; temp3 = PickSites[temp2[[2]], NumProportionate, NumOneSidePicks, popcount, "Reproduced"]; newCopies = temp3[[1]]; temp4 = temp3[[2]]; If [ NumMutation > 0, newMutants = PickSites[temp4, NumMutation, \ NumOneSidePicks, popcount, "Mutated"][[1]] ]; newPop = Join[ \ {Join[Take[ population[[bestIndex]], 2], \ List[{bestIndex, "lastBest", {bestIndex}}]]}, \ Map[CrossOverAtSite[#, population, CrossAtAny]&, \ Map[SelectBestMates[#, population]&, newCrossAny] ], \ Map[CrossOverAtSite[#, population, CrossSubFunctions]&, \ Map[SelectBestMates[#, population]&, newCrossSubs] ], \ Map[ReproduceAtSite[#, population]&, \ Map[SelectBest[#, population]&, newCopies] ], \ Map[MutateAtSite[#, population]&, Map[SelectBest[#, population]&, newMutants] ]]; newPop = UniqueReIndex[Sort[newPop, (#1[[3,1]] < #2[[3,1]])&], genNum]; (* sort index to order, then increment by poplength*genNum to make unique individual indices *) Return[newPop] ]; UniqueReIndex[pop_, generation_] := Block[{newpop = {}, poplength = Length[pop], i}, For[i = 1, i <= poplength, i++, AppendTo[newpop, \ ReplacePart[pop[[i]], (generation*poplength + i), {3,1} ]] ]; Return[newpop]; ]; CrossOverAtSite[site_, population_, Method_Symbol] := Block[{parentM, parentF, temp, done = 0}, parentM = population[[ site[[3]] ]]; parentF = population[[ site[[4]] ]]; While[ done == 0, (* Method is either CrossAtAny or CrossSubFunctions *) temp = Select[ Method[parentM, parentF], (Depth[#[[1]]] <= MaxDepthOfNewCrossOverIndividual)& ]; (* don't accept too deep expressions *) (* discards invalid expressions and then sort by fitness, accepting the one with least error (smallest fitVal) *) temp = Sort[Select[ \ Map[ Join[Fitness[ #[[1]], Environments], Drop[#, 2]]&, temp], (NumberQ[#[[2,1]]])&], ( #1[[2, 1]] < #2[[2, 1]] )& ]; If [ Length[temp] > 0, done = 1]; (* if any acceptable indiv found *) ]; Return[temp[[1]]//. "Index?"->site[[1]]]; (* tagging the site index *) ]; ReproduceAtSite[site_, population_] := {} /; (site == {}); ReproduceAtSite[site_, population_] := Join[ Take[ population[[ site[[3]] ]], 2], List[ {site[[1]], site[[2]], {site[[3]]}} ] ]; MutateAtSite[site_, pop_] := {} /; (site == {}); MutateAtSite[site_, pop_] := Block[{mutant, individual, allpoints, oldfragment, newfragment}, individual = pop[[ site[[1]] ]]; program = individual[[1]]; oldfragment = RandomPick[AllParts[ program ]]; Which[ oldfragment != {}, (* normal case, some fragment obtained *) oldfragdepth = Depth[oldfragment[[1]]]; newfragment = GPbuildTree[ Random[Integer, {1, Max[1, Min[(MaxDepthOfNewCrossOverIndividual - oldfragdepth), MaxDepthOfNewMutantSubTrees]]}] ]; mutant = ReplacePart[program, newfragment, oldfragment[[2]]], oldfragment == {}, (* parent indiv is a single atom; creates new indiv expression *) mutant = GPbuildTree[ Random[Integer, {1, MaxDepthOfNewCrossOverIndividual}] ] ]; mutant = Fitness[ mutant, Environments]; Which [ NumberQ[mutant[[2, 1]]], (* case of successful mutation *) mutant = Join[mutant, { {site[[1]], "Mutated", \ {site[[1]], oldfragment[[2]]}} } ], True, (* mutated one dies; local best takes its place *) mutant = ReproduceAtSite[site, pop] ]; Return[mutant]; ]; (**************************** Sampler-Selector ***************************) SelfPickChance = 0.1; MeanWander = 1.5; epsMargin = 0.005; NeighborSampling[point_, mean_, selfchance_, margin_] := Ceiling[ mean*Log[(0.5 - selfchance/2.)/(1 - point + margin)] ] \ /; (point >= 0.5 + selfchance/2.)&&(point <= 1.0) NeighborSampling[point_, mean_, selfchance_, margin_] := Floor[ -mean*Log[(0.5 - selfchance/2.)/(point + margin)] ] \ /; (point >= 0.0)&&(point <= 0.5 + selfchance/2.) NeighborSampling[point_, mean_, selfchance_, margin_] := 0 \ /; (point > 0.5 - selfchance/2.0)&&(point < 0.5 + selfchance/2.0) RandomPick[list_List] := list[[ Random[Integer, {1, Length[list]}] ]] /; Length[list] > 0; RandomPick[list_List] := list /; Length[list] == 0; WrapPt[length_Integer, number_Integer] := length /; (number == length) WrapPt[length_Integer, number_Integer] := Mod[number-1, length] + 1 \ /; (number != length) AllSites[PopLength_] := Map[List[#, Available]&, Range[PopLength]]; PickMates[sites_, quota_, num1sidepicks_, numtotal_, OpName_String] := Block[{availsites, i, pick, results = {}, pickpoint, lgth}, lgth = numtotal; (* should be SizeOfPopulation in a GP run *) availsites = sites; (* assuming all given sites available *) (* availsites = Select[sites, (#[[2]] == Available)&]; *) Which[ Length[availsites] < quota, Return[results], True, For[ i = 1, i <= quota, i++, pick = RandomPick[availsites]; pickpoint = Position[availsites, pick]; availsites = Delete[availsites, pickpoint[[1, 1]] ]; AppendTo[results, List[pick[[1]], OpName, List[ Map[WrapPt[lgth, #]&, (pick[[1]] + Table[NeighborSampling[Random[], MeanWander, SelfPickChance, epsMargin], {num1sidepicks}]) ], Map[WrapPt[lgth, #]&, (pick[[1]] + Table[NeighborSampling[Random[], MeanWander, SelfPickChance, epsMargin], {num1sidepicks}])]]]] ] ]; Return[{results, availsites}] ]; PickSites[sites_, quota_, num1sidepicks_, numtotal_, OpName_String] := Block[{availsites, i, pick, results = {}, pickpoint, lgth}, lgth = numtotal; availsites = sites; (* assuming all given sites available *) Which[ Length[availsites] < quota, Return[results], True, For[ i = 1, i <= quota, i++, pick = RandomPick[availsites]; pickpoint = Position[availsites, pick]; availsites = Delete[availsites, pickpoint[[1, 1]] ]; AppendTo[results, List[pick[[1]], OpName, Map[WrapPt[lgth, #]&, (pick[[1]] + Table[NeighborSampling[Random[], MeanWander, SelfPickChance, epsMargin], {2*num1sidepicks}]) ] ]]; ] ]; Return[{results, availsites}] ]; SelectBestMates[site_, pop_] := Join[Take[site, 2], Map[BestIndexInSamples[#, pop]&, site[[3]]] ]; SelectBest[site_, pop_] := Join[Take[site, 2], List[BestIndexInSamples[site[[3]], pop]] ]; BestIndexInPop[population_] := Block[{minVal = Infinity, index, bestindex}, For[index = 1, index <= Length[population], index++, If [minVal > population[[index, 2, 1]], {minVal = population[[index, 2, 1]]; bestindex = index;}]; ]; Return[bestindex]; ]; (* returns the best among the given sample individuals in population *) BestIndexInSamples[sampleindex_, pop_] := Block[{minVal = Infinity, , fitVal, i, bestindex}, For[i = 1, i <= Length[sampleindex], i++, fitVal = pop[[sampleindex[[i]], 2, 1]]; If [minVal > fitVal, {minVal = fitVal; bestindex = sampleindex[[ i ]]} ]; ]; Return[bestindex]; ]; (************* Reporting on Individuals and Generations ********************) ReportOnGeneration[ population_] := Block[{bestindex = BestIndexInPop[population], bestindiv, bestprogram, poplength = Length[population], mean = 0.0, stddev = 0.0, generationnumber = Quotient[ population[[1,3,1]], poplength]}, Print[""]; Print["GENERATION: ", generationnumber]; bestindiv = population[[ bestindex ]]; bestprogram = Simplify[bestindiv[[1]] //. Eval]; Print[""]; Print[" bestindex: ", bestindex, "; birthtype: ", bestindiv[[3,2]]]; Print[" best program: ", bestprogram]; Print[" best raw fitness: ", bestindiv[[2, 1]]]; mean = (Apply[Plus, Map[#[[2,1]]&, population]]/poplength)//N; stddev = Sqrt[(Apply[Plus, Map[((#[[ 2,1 ]] - mean)^2)&, population]]/poplength)//N]; Print[" pop mean = ", mean]; Print[" pop std dev = ", stddev]; AppendTo[BestBehaviors, List[generationnumber, mean, stddev, bestindex, bestindiv[[2,1]], \ bestprogram]]; ]; ReportOnRun[] := Map[Take[#, -3]&, BestBehaviors]; ReplayOldGenerations[ generations_] := Block[{n = 0, currentbestindex, currentbestfitness, oldbestfitness, numofgens = Length[generations]}, oldbestfitness = Infinity; While[(n < numofgens), n++; currentpop = generations[[n]]; currentbestindex = BestIndexInPop[currentpop]; currentbestfitness = currentpop[[ currentbestindex, 2, 1]]; ReportOnGeneration[n, currentbestindex, currentpop]; If [currentbestfitness < oldbestfitness, {ShowBestOfGeneration[ currentpop ]; (* plot new improved indiv *) oldbestfitness = currentbestfitness}]; ]; ]; ShowBestOfGeneration[generation_] := ( bestofgenIndex = BestIndexInPop[generation]; Show[AppendTo[DrawIndivFunction[ generation[[bestofgenIndex]] ], Graphics[Text[StringJoin[ToString[ Quotient[generation[[1, 3, 1]], Length[generation]] ], "th generation BestIndiv"], Scaled[{0.5, 0.73}]] ]], Axes->True, AxesOrigin->{0,0}, PlotRange->All]; ) DrawIndivFunction[individual_] := List[Graphics[{Thickness[.001], \ Line[BehaviorFunction[ individual[[1]] ]] }], Graphics[Join[{PointSize[0.02]}, Map[Point, IdealBehavior]]] , Graphics[{Text[ToString[Simplify[ individual[[ 1 ]]//.Eval]], Scaled[{0.45, 0.85}]], Text["X", Scaled[{0.98, 0.1}] ], Text["f(x)", Scaled[{0.08, 0.97}]]} ] ]; ShowIndivFunction[indiv_] := Show[DrawIndivFunction[indiv], Axes->True, AxesOrigin->{0,0}, PlotRange->All]; BehaviorFunction[program_] := Transpose[Append[List[Map[#[[2]]&, Environments[[1]]]], N[Map[(program //.Eval//. #)&, Environments[[1]] ]] ]]; PopulationRawFitness[pop_] := Map[#[[ 2,1 ]]&, pop]; PopulationRawDistribution[pop_] := Block[{fitnesses, length}, fitnesses = PopulationRawFitness[pop]; length = Length[fitnesses]; Return[Transpose[Join[List[ Sort[fitnesses]], List[ N[Range[length]/length]]] ]] ]; ShowPopRawFitness[population_, quantile_] := Show[Graphics[{Thickness[.001], Line[Take[PopulationRawDistribution[ population ], Round[quantile*Length[ population ]/100] ]] } ], Graphics[{Text["raw fitness", Scaled[{0.86, 0.08}] ], Text["Cumulative ", Scaled[{0.12, 0.97}]], Text[StringJoin[ToString[ Quotient[population[[1, 3, 1]], Length[population]] ], "th population"], Scaled[{0.7, 0.8}]]} ], Axes->True, AxesOrigin->{0,0}]; ShowFitnessOfAPop[genindex_, allgenerations_, quantile_] := ShowPopRawFitness[allgenerations[[genindex]], quantile]; End[]; EndPackage[];