(* $Id: GeneticProgramming.m, v 2.1 1995/5/16 RBN $ *) (*:Version: Mathematica 2.2 *) (*:Name: GeneticProgramming *) (*:Title: GeneticProgramming *) (*:Summary: Functions for genetic programming. *) (*:Keywords: *) (*:Requirements: No special system requirements. *) (*:Warning: None. *) (*:Sources: John R. Koza, "Genetic Programming: On the Programming of Computers by Means of Natural Selection" MIT Press, 1992. Graham Spencer, "Automatic Generation of Programs for Crawling and Walking" in "Advances in Genetic Programming," Kenneth E. Kinnear, Jr., ed. MIT Press, 1994. *) Print["GeneticProgramming.m, version 2.1."] ; (* set up the package context, included any imports *) BeginPackage["GeneticProgramming`", "Utilities`FilterOptions`"] (* usage messages for the exported functions and the context itself *) GeneticProgramming::usage = "GeneticProgramming.m is a package for finding optimal computer programs." Size::usage = "Size[expr] returns the size (i.e., number of nodes) of the expression expr." PrintExpression::usage = "PrintExpression[expr] prints the expression expr heirarchically." RandomExpression::usage = "RandomExpression[depth,funcs,terms] returns a random expression with elements from funcs for internal nodes and from terms for external nodes. All paths from the root to each external node are of constant depth." VariableRandomExpression::usage = "VariableRandomExpression[maxdepth,funcs,terms] returns a random expression with elements from funcs for internal nodes and from terms for external nodes. Paths from the root to each external node are of variable depth." DrawExpression::usage = "DrawExpression[expr] draws the tree structure of an expression and labels the internal nodes with the corresponding functions and the external nodes with the terminals." Roulette::usage = "Roulette is a selection method that specifies that the probability of selection is proportional to the magnitude of the adjusted fitness." Rank::usage = "Rank is a selection method that specifies that the probability of selection is proportional to the rank of the adjusted fitness." Reproduce::usage = "Reproduce[expr] is a genetic operator that returns a copy of the expression expr." CrossOver::usage = "CrossOver[{mom,dad}] is a genetic operator that returns a list of two new expressions made by exchanging a subexpression from dad with a subexpression from mom." CrossOverKoza::usage = "CrossOverKoza[{mom,dad},pint] is a genetic operator that returns a list of two new expressions made by exchanging a subexpression from dad with a subexpression from mom with probability pint that the exchange takes place at internal nodes." Mutate::usage = "Mutate[expr,funcs,terms] is a genetic operator that returns a copy of the expression expr with one of the subexpressions replaced by a random expression built from funcs and terms." Perturb::usage = "Perturb[expr] is a genetic operator that returns a copy of the expression expr with one of the constants perturbed." GeneticProgram::usage = "GeneticProgram[cases, vars, funcs, terms, prepData, standardFit, successQ] optimizes a population of expressions for the data in cases ({...,{xi1,xi2,...,xin,yi},...}). vars is the list of symbols corresponding to the xi. funcs is the list of functions to use and their number of arguments (e.g., {{Plus,2}, {Sin,1}, ...}). terms is the list of terminals to use (e.g., {x, y, Pi, ...}). prepData[cases,vars] prepares the data in cases for use by standardFit. standardFit[data,vars,pop] is the function that computes the standardized fitness given the data in data, the variables in vars, and a set of expressions pop, which the algorithm strives to minimize. successQ[stdFit] is a predicate that tests for the presence of a successful standard fitness." (* usage messages for input options *) PopulationSize::usage = "PopuationSize is an input option for GeneticProgram that indicates the number of expression trees to use." MaxGenerations::usage = "MaxGenerations is an input option for GeneticProgram that indicates the maximum number of generations to run." GeneticOperators::usage = "GeneticOperators is an input option for GeneticProgram that indicates the the list of genetic operators to use. Each element of the list is a triple of then name of the fuction, the number of parents, and the relative probability." ProbInternal::usage = "ProbInternal is an input option for GeneticProgram that indicates the probability of crossover at internal point for CrossOverKoza." MaxDepthInitial::usage = "MaxDepthInitial is an input option for GeneticProgram that indicates the maximum depth of expression tree in initial population." MaxDepthCreated::usage = "MaxDepthCreated is an input option for GeneticProgram that indicates the maximum depth of expression tree in subsequent populations." Selection::usage = "Selection is an input option for GeneticProgram that indicates the method for selecting expression trees for genetic operations." CollectStatistics::usage = "CollectStatistics is an input option for GeneticProgram that indicates that statistics should be collected over the run." PrintInterval::usage = "PrintInterval is an input option for GeneticProgram that indicates how often to print fitness, cpu, & memory." InitialGeneration::usage = "InitialGeneration is an input option for GeneticProgram that indicates the number of the initial generation when continuing a run." InitialPopulation::usage = "InitialPopulation is an input option for GeneticProgram that indicates the initial population to use when continuing a run." InitialRandomState::usage = "InitialRandomState is an input option for GeneticProgram that indicates the initial random state to use when continuing or restarting a run. InitialRandomState is also an output option that points to the state of Random[] when the run began." (* usage messages for output options *) BestOfRun::usage = "BestOfRun is an output option for GeneticProgram that points to the best expression tree of the run." AdjustedFitnessOverRun::usage = "AdjustedFitnessOverRun is an output option for GeneticProgram that points to the list of {min,Q1,median,Q3,max} of the adjusted fitness over the run." ExpressionSizeOverRun::usage = "ExpressionSizeOverRun is an output option for GeneticProgram that points to the list of {min,Q1,median,Q3,max} of expression size/complexity over the run." ExpressionDepthOverRun::usage = "ExpressionDepthOverRun is an output option for GeneticProgram that points to the list of {min,Q1,median,Q3,max} of expression depth over the run." VarietyOverRun::usage = "VarietyOverRun is an output option for GeneticProgram that points to the list of {min,Q1,median,Q3,max} of the population variety over the run." LastGeneration::usage = "LastGeneration is an output option for GeneticProgram that points to the ordinal number of the last generation." FinalFit::usage = "FinalFit is an output option for GeneticProgram that points to the raw fitness of the final population of expressions." FinalPop::usage = "FinalPop is an output option for GeneticProgram that points to the final population of expressions." FinalRandomState::usage = "FinalRandomState is an output option for GeneticProgram that points to the state of Random[] when the run ended" Spacing::usage = "Spacing is an option for DrawExpression that specifies the horizontal and vertical spacing between nodes in the tree." Margin::usage = "Margin is an option for DrawExpresson that specifies the relative horizontal and vertical margins around the tree." ExpressionStyle::usage = "ExpressionStyle is an option for DrawExpression that specifies the graphics directives for sub expressions. A list of pairs of expression parts and graphics directives is expected: {..., {parti, gdiri}, ...}, where parti is a list of indices and gdiri is a list of directives." ExpressionLabel::usage = "ExpressionLabel is an option for DrawExpression that specifies the text for the tree label." (* options for the exported objects *) Options[DrawExpression]= {Spacing->Automatic, Margin->Automatic, ExpressionStyle->Automatic, ExpressionLabel->None} ; Options[GeneticProgram]= {PopulationSize->500 (* population size *) ,MaxGenerations->50 (* maximum number of generations *) ,GeneticOperators-> (* {operator, # parents, relative probability} *) {{CrossOver,2, 0.85} (* crossover *) ,{Reproduce,1, 0.01} (* reproduction *) ,{Mutate, 1, 0.01} (* mutation *) ,{Perturb, 1, 0.13} (* constant perturbation *) } ,ProbInternal->0.90 (* probability of crossover at internal point *) ,MaxDepthInitial->7 (* maximum depth of expression tree in initial population *) ,MaxDepthCreated->17 (* maximum depth of expression tree in subsequent populations *) ,Selection->Roulette (* selection method *) ,CollectStatistics->False (* collect statistics over the run *) ,PrintInterval->5 (* print fitness, cpu, & memory at every nth generation *) ,InitialGeneration->0 (* initial generation ordinal number *) ,InitialPopulation->Automatic (* initial population *) ,InitialRandomState->Automatic (* initial random state *) } Begin["`Private`"] (* begin the private context *) (* unprotect any system functions for which rules will be defined *) protected = Unprotect[ReplacePart] (* definition of auxiliary functions and local (static) variables *) getSpacing[Automatic]:=getSpacing[{Automatic,Automatic}] getSpacing[{Automatic,dy_}]:=getSpacing[{1,dy}] getSpacing[{dx_,Automatic}]:=getSpacing[{dx,2}] getSpacing[{dx_?NumberQ,dy_?NumberQ}]:={dx,dy} getSpacing[_]:=$Failed getMargin[Automatic]:=getMargin[{Automatic,Automatic}] getMargin[{Automatic,ym_}]:=getMargin[{0.05,ym}] getMargin[{xm_,Automatic}]:=getMargin[{xm,0.10}] getMargin[{xm_?NumberQ,ym_?NumberQ}]:={xm,ym} getMargin[_]:=$Failed randomElement[list_] := list[[ Random[Integer,{1,Length[list]}] ]] randomInt[n_]:=Random[Integer,{n,n}] randomInt[{min_,max_}]:=Random[Integer,{min,max}] makePop[funcs_, terms_, nPop_, maxDepth_, minDepth_:3]:= Module[{pop, d=maxDepth-minDepth+1, i, depth, r}, Table[depth=minDepth+Floor[(i-1)d/nPop] ; If[OddQ[i], While[Depth[r=RandomExpression[depth, funcs,terms]]=0 perturbConstant[n_Integer] := Random[Integer, {Min[n-3, Floor[1.25 n]], Max[Ceiling[0.80 n], n+3]}] /; n<0 toss[p_:0.5]:=Random[]<=p toRank[x_List]:= Module[{f=Union[x,SameTest->SameQ], c, r=Range[Length[x]], g, b, nr}, c=Count[x,#]& /@ f ; g=(b=Take[r,#]; r=Drop[r,#]; b)& /@ c ; nr=(Plus@@#)/Length[#]& /@ g ; nr[[Flatten[Position[f,#]& /@ x]]] ] selectOne[prob_]:= Module[{lo=1, hi=Length[prob], mid, r=Random[]*Last[prob]}, While[lo!=hi-1, mid=Round[(lo+hi)/2] ; If[rIf[m<10,":0",":"]<>ToString[m]<>If[s<10,":0",":"]<>ToString[s] ] (* error messages for the exported objects *) GeneticProgram::vnott = "Warning, the variables `1` are not present in the list of terminals." GeneticProgram::badvar = "`1` is not a valid variable for GeneticProgram." GeneticProgram::badopt = "`1` is not a valid option for GeneticProgram." GeneticProgram::badsel = "Selection method `1` must be one of `2`." DrawExpression::dspace = "Value of option Spacing -> `1` is not Automatic or an appropriate list of Spacing specifications." DrawExpression::dmargn = "Value of option Margin -> `1` is not Automatic or an appropriate list of margin specifications." DrawExpression::style = "ExpressionStyle `1` is not Automatic or a list of parts and graphics directives." DrawExpression::stprt = "Part `1` of ExpressionStyle (`2`) is invalid." (* definition of the exported functions *) RandomExpression[depth_?Positive, funcs_, terms_] := Module[{f, n}, {f, n} = randomElement[funcs] ; If[n > 0, f @@ Table[RandomExpression[depth - 1, funcs, terms], {n}], (* else *) f ] ] RandomExpression[1, funcs_, terms_] := randomElement[terms] Attributes[RandomExpression] = {HoldRest} ; VariableRandomExpression[depth_?Positive, funcs_, terms_] := Module[{f, range}, If[toss[1/depth^2], randomElement[terms], (* else *) {f,range} = randomElement[funcs] ; f @@ Table[VariableRandomExpression[depth - 1, funcs, terms], {randomInt[range]}] ] ] VariableRandomExpression[1, funcs_, terms_] := randomElement[terms] Attributes[VariableRandomExpression]={HoldRest} ; Size[_[args__]] := 1 + Plus @@ Map[Size,{args}] Size[_?AtomQ] := 1 PrintExpression[expr_, n_:4] := PrintExpression[expr, 0, n] PrintExpression[f_[o__], d_, n_] := (printIndented[f, n d]; Scan[PrintExpression[#, d+1, n]&, {o}]) PrintExpression[expr_?AtomQ, d_, n_] := printIndented[expr, n d] printIndented[p_, n_] := Print[StringJoin @@ Table[" ", {n}], p] DrawExpression[expr_, options___Rule]:= Module[{texpr=expr, g, x, y, dx, dy, xmin, xmax, ymin, ymax, optlist, spacing, delx, dely, margin, xm, ym, label, styles, p, st, op, showOptions}, optlist={options} // Flatten ; spacing=Spacing /. optlist /. Options[DrawExpression] ; If[({delx,dely}=getSpacing[spacing])===$Failed, Message[DrawExpression::dspace,spacing] ; Return[$Failed] ] ; margin=Margin /. optlist /. Options[DrawExpression] ; If[({xm,ym}=getMargin[margin])===$Failed, Message[DrawExpression::dmargn,margin] ; Return[$Failed] ] ; {xm,ym}=1+2 {xm,ym} ; styles=ExpressionStyle /. optlist /. Options[DrawExpression] ; If[styles=!=Automatic && Head[styles]=!=List, Message[DrawExpression::style,styles] ; Return[$Failed]] ; For[i=1, i<=Length[styles], i++, If[Length[styles[[i]]]!=2, Message[DrawExpression::stprt,i,styles[[i]]] ; Return[$Failed]] ; {p,st}=styles[[i]] ; p=If[Head[p]=!=List,{p},p] ; st=If[Head[st]=!=List,{st},st] ; op=texpr[[Sequence@@p]] ; If[Head[op]===$g, st=Append[First[op],st] ; op=Last[op]] ; texpr=ReplacePart[texpr, $g[st,op], p] ] ; label=ExpressionLabel /. optlist /. Options[DrawExpression] ; showOptions=FilterOptions[Graphics,Sequence@@optlist] ; g=Last@DrawExpression[texpr,0,0,delx,dely] ; If[label=!=None, g=Prepend[g,Text[label,{0,4}]]] ; {x,y}=(g[[Sequence@@#]]& /@ Position[g,{_?NumberQ,_?NumberQ}]) // Transpose ; dx=((xmax=Max[x])-(xmin=Min[x])) xm ; dy=((ymax=Max[y])-(ymin=Min[y])) ym ; xmin=(xmin+xmax-dx)/2 ; xmax=xmin+dx ; ymin=(ymin+ymax-dy)/2 ; ymax=ymin+dy ; Show[Graphics[g], showOptions, PlotRange->{{xmin,xmax},{ymin,ymax}}, AspectRatio->Automatic ] ] DrawExpression[$g[dir_,expr_],args__]:= Module[{w, g}, {w, g}=DrawExpression[expr,args] ; {w, If[Head[g]===List, Join[dir,g], Append[dir,g]]} ] DrawExpression[p_[c__],x_,y_,dx_,dy_]:= Module[{text=ToString[p], xw, tw, d, xc, xn, gc, xp, yp}, {xw,gc}=(DrawExpression[#,0,y-dy,dx,dy]& /@ {c}) // Transpose ; tw=Plus@@xw ; d=FoldList[Plus,0,Drop[xw,-1]]+xw/2 ; xc=x-tw/2 + d ; tw=Max[tw,Length@Characters[text]] ; {tw+1,Join[Prepend[Line[{{x,y-0.6},{#,y-dy+0.6}}]& /@ xc, Text[text,{x,y}]], MapThread[(#1 /. {{xp_?NumberQ,yp_?NumberQ}:>{xp+#2,yp}})&, {gc,xc}] ]} ] DrawExpression[p_?AtomQ,x_,y_,dx_,dy_]:= Module[{text=ToString[p]}, {Length@Characters[text]+1, Text[ToString[text],{x,y}]} ] CrossOver[parents_, maxDepth_:17] := Module[{ind, sub, children}, ind = randomElement[Position[#, _, Heads->False]]& /@ parents ; sub = MapThread[Part[#1, Sequence@@#2]&, {parents, ind}] ; children = MapThread[ReplacePart, {parents, Reverse[sub], ind}] ; MapThread[If[Depth[#1]<=maxDepth, #1, crossFailures++; #2]&, {children, parents}] ] CrossOverKoza[parents_, pInt_, maxDepth_:17]:= Module[{pint,pext, ind, sub, children}, pint=Position[#,_[__]]& /@ parents ; pext=Position[#,_?AtomQ]& /@ parents ; ind=MapThread[If[toss[pInt] && Length[#1]>0, randomElement[#1], randomElement[#2]]&, {pint,pext}] ; sub = MapThread[Part[#1, Sequence@@#2]&, {parents, ind}] ; children = MapThread[ReplacePart, {parents, Reverse[sub], ind}] ; MapThread[If[Depth[#1]<=maxDepth, #1, crossFailures++; #2]&, {children, parents}] ] Mutate[expr_, funcs_, terms_, maxDepth_:17] := Module[{ind, depth, newSub}, ind = randomElement[Position[expr, _, Heads->False]] ; depth = Random[Integer, (maxDepth - 1 - Length[ind])/4] + 1 ; newSub = VariableRandomExpression[depth, funcs, terms] ; ReplacePart[expr, newSub, ind] ] Attributes[Mutate]={HoldRest} ; Perturb[expr_] := Module[{constants, ind}, constants = Position[expr, _?NumberQ] ; If[Length[constants]>0, ind = randomElement[constants] ; ReplacePart[expr, perturbConstant[ expr[[Sequence@@ind]]], ind], (* else *) expr ] ] GeneticProgram[cases_, vars_, funcs_, terms_, prepData_, standardFit_, successQ_, options___]:= Module[{optlist, validOptions, ok, t0, popSize, maxGen, genOpers, probOper, pInternal, select, maxInitial, maxCreated, collect, pg, data, X, Y, varlist, VnotT, g0, g, pop, best, q1, median, q3, newPop, i, j, k, op, np, p, parents, children, stdFit, adjFit, probExpr, sortFit, sortSize, sortDepth, v, initialRS, fitness={}, size={}, depth={}, variety={}, bestOfRun, nDrop}, t0=TimeUsed[] ; varlist = If[Head[vars]===List, vars, {vars}] ; If[(data=prepData[cases, varlist])===$Failed, Return[$Failed]]; ok=True ; Scan[If[Head[#]=!=Symbol, ok=False ; Message[GeneticProgram::badvar,#]]&, varlist] ; If[!ok, Return[$Failed]] ; If[(VnotT=Complement[varlist,Intersection[varlist,terms]])!={}, Message[GeneticProgram::varterm,vnott] ] ; optlist={options} // Flatten ; validOptions=First /@ Options[GeneticProgram] ; ok=True ; Scan[If[FreeQ[validOptions,#], ok=False ; Message[GeneticProgram::badopt,#]]&, First /@ optlist] ; If[!ok,Return[$Failed]] ; popSize=PopulationSize /. optlist /. Options[GeneticProgram] ; g0=InitialGeneration /. optlist /. Options[GeneticProgram] ; maxGen=MaxGenerations /. optlist /. Options[GeneticProgram] ; maxInitial=MaxDepthInitial /. optlist /. Options[GeneticProgram] ; maxCreated=MaxDepthCreated /. optlist /. Options[GeneticProgram] ; pInternal=ProbInternal /. optlist /. Options[GeneticProgram] ; genOpers=GeneticOperators /. optlist /. Options[GeneticProgram] ; probOper=FoldList[Plus,0, Apply[#3/#2&,genOpers,{1}]] // Rest ; genOpers=Drop[#,-1]& /@ genOpers ; genOpers=genOpers /. { Reproduce->Identity, Mutate->({Mutate[Sequence@@#,funcs,terms,maxCreated]}&), CrossOver->(CrossOver[#,maxCreated]&), CrossOverKoza->(CrossOverKoza[#,pInternal,maxCreated]&), Perturb->({Perturb[Sequence@@#]}&) } ; select=Selection /. optlist /. Options[GeneticProgram] ; If[FreeQ[{Roulette,Rank},select], Message[GeneticProgram::badsel,select,{Roulette,Rank}] ; Return[$Failed] ] ; collect=CollectStatistics /. optlist /. Options[GeneticProgram] ; (* statistics collected for each generation are: best, 1st quartile, median, 3rd quartile, & worst values for adjusted fitness, expression depth, & expression size/complexity; population variety. *) pg=PrintInterval /. optlist /. Options[GeneticProgram] ; initialRS=InitialRandomState /. optlist /. Options[GeneticProgram] ; If[initialRS===Automatic, initialRS=$RandomState, $RandomState=initialRS] ; pop=InitialPopulation /. optlist /. Options[GeneticProgram] ; If[pop===Automatic, pop=makePop[funcs,terms,popSize,maxInitial], (* else *) popSize=Length[pop] ; Share[] ; ] ; g=g0 ; stdFit=standardFit[data,varlist,pop] ; adjFit=1/(1 + stdFit) ; best=Position[adjFit,Max[adjFit]][[1,1]] ; bestOfRun={adjFit[[best]],pop[[best]]} ; If[collect, fitness=Table[Null,{maxGen+1}] ; depth=Table[Null,{maxGen+1}] ; size=Table[Null,{maxGen+1}] ; variety=Table[Null,{maxGen+1}] ; q1=Floor[popSize/4] ; median=Round[popSize/2] ; q3=Ceiling[3popSize/4] ; sortFit=Sort[adjFit] ; sortDepth=Depth/@pop // Sort ; sortSize=Size/@pop // Sort ; v=Length[Union[pop]]/popSize // N ; fitness[[g+1]]=sortFit[[{1,q1,median,q3,popSize}]] ; depth[[g+1]]=sortDepth[[{1,q1,median,q3,popSize}]] ; size[[g+1]]=sortSize[[{1,q1,median,q3,popSize}]] ; variety[[g+1]]=v ] ; If[pg>0 && Mod[g,pg]==0, Print["g=",g,", run best = ",1/First[bestOfRun]-1, ", cpu = ",ts[TimeUsed[]-t0], ", mem = ",Ceiling[MemoryInUse[]/1024]," K"] ] ; While[gFirst[bestOfRun], bestOfRun={adjFit[[best]],pop[[best]]} ] ; If[collect, sortFit=Sort[adjFit] ; sortDepth=Depth/@pop // Sort ; sortSize=Size/@pop // Sort ; v=Length[Union[pop]]/popSize // N ; fitness[[g+1]]=sortFit[[{1,q1,median,q3,popSize}]] ; depth[[g+1]]=sortDepth[[{1,q1,median,q3,popSize}]] ; size[[g+1]]=sortSize[[{1,q1,median,q3,popSize}]] ; variety[[g+1]]=v ] ; If[pg>0 && Mod[g,pg]==0, Print["g=",g,", run best = ",1/First[bestOfRun]-1, ", cpu = ",ts[TimeUsed[]-t0], ", mem = ",Ceiling[MemoryInUse[]/1024]," K"] ] ; ] ; If[pg>0 && Mod[g,pg]!=0, Print["g=",g,", run best = ",1/First[bestOfRun]-1, ", cpu = ",ts[TimeUsed[]-t0], ", mem = ",Ceiling[MemoryInUse[]/1024]," K"] ] ; bestOfRun[[1]]=1/bestOfRun[[1]]-1 ; nDrop=maxGen-g ; {BestOfRun->bestOfRun, AdjustedFitnessOverRun-> If[collect,Drop[fitness,nDrop],fitness], ExpressionDepthOverRun-> If[collect,Drop[depth,nDrop],depth], ExpressionSizeOverRun-> If[collect,Drop[size,nDrop],size], VarietyOverRun-> If[collect,Drop[variety,nDrop],variety], LastGeneration->g, FinalFit->stdFit, FinalPop->pop, InitialRandomState->initialRS, FinalRandomState->$RandomState} ] Attributes[GeneticProgram]={HoldAll} ; (* rules for system functions *) ReplacePart[expr_, new_, {}] := new Protect[ Evaluate[protected] ] (* restore protection of system symbols *) End[] (* end the private context *) Protect[] (* protect exported symbols *) EndPackage[] (* end the package context *)