gp2lattice.awk

Unix script to convert genetic programs written in lisp style to gnuplot script and data file for displaying GP tree as a circular lattice [daida:2005:GPEM].
produced by: gawk -f gp2lattice.awk -v 'data=full127.dat' full127 > full127.gnu; gnuplot full127.gnu

Example

Program as circular lattice graph

Example of graph produced by gnuplot from data file
This example produced by
gawk -f gp2lattice.awk -v "data=gp.dat" example.txt > gp.gnu
gnuplot gp.gnu

example.txt (as lisp2dot)

#example 21 Jul 2004 http://www.cs.ucl.ac.uk/staff/W.Langdon

(EQ 
    D1 
    (NOT
	(IFLTE D1 D2 D3 D4)))


gp.dat

#Created by gp2lattice.awk Revision: 1.1 $ Mon Feb 22 11:03:37 GMT 2010
#from file example.txt
# (EQ  	     D1  	     (NOT (IFLTE D1 D2 D3 D4))) 

#5 D2
1.22461e-16, 2 1
1.14805, 2.77164 1

#2 NOT
0, 0 1
6.12303e-17, 1 1

#6 D3
1.22461e-16, 2 1
-1.14805, 2.77164 1

#1 D1
0, 0 1
6.12303e-17, -1 1

#4 D1
1.22461e-16, 2 1
2.77164, 1.14805 1

#7 D4
1.22461e-16, 2 1
-2.77164, 1.14805 1

#3 IFLTE
6.12303e-17, 1 1
1.22461e-16, 2 1

#0 EQ
0, 0 1
0, 0 1

gp.gnu

Assumes gnuplot 4.2 or later.

#Created by gp2lattice.awk Revision: 1.1 $ Mon Feb 22 11:03:37 GMT 2010
#from file example.txt
#size 8 depth 4
set label "EQ" at 0, 0 center front
set label "D1" at 6.12303e-17, -1 center front
set label "NOT" at 6.12303e-17, 1 center front
set label "IFLTE" at 1.22461e-16, 2 center front
set label "D1" at 2.77164, 1.14805 center front
set label "D2" at 1.14805, 2.77164 center front
set label "D3" at -1.14805, 2.77164 center front
set label "D4" at -2.77164, 1.14805 center front
#set nolabel
#Choose desired output type. Remember to disable pause -1
#set terminal postscript eps colour 22; set output 'gp.eps'
#set terminal png; set output 'gp.png'
set title " (EQ D1 (NOT (IFLTE D1 D2 D3 D4))) "
set nokey
set noxtics
set noytics
set noborder
#set pointsize 1.5
set size 0.8,1.0 #square
set xrange[-2.77164:2.77164]
set yrange[-1:4.54328]
plot "gp.dat" with linesp pt 7
pause -1

code gp2lattice.awk

#gawk script to create data file for gnuplot to display GP lisp s-expressions at circular lattice
#Jason M. Daida and Adam M. Hilss and David J. Ward and Stephen L. Long. Visualizing Tree Structures in Genetic Programming. Genetic Programming and Evolvable Machines, 6(1):79-110, 2005.
#http://www.cs.bham.ac.uk/~wbl/biblio/gp-html/daida_2005_GPEM.html
#WBL 17 Feb 2010 created $Revision: 1.2 $"
#based on RE_gp2dot.awk r1.12
#It self based on http://www.cs.ucl.ac.uk/staff/W.Langdon/lisp2dot.html lisp2dot.awk r1.3
#and Steven M Gustafson's awk/gnuplot code

#Inputs: file containing program as text

#examples
#gawk -f gp2lattice.awk -v "data=gp.dat" lisp2dot_example.txt > t.gnu
#gnuplot t.gnu

BEGIN { 
  datafile = (data!="")? data : "/tmp/points";

#Arity is given by function name via these four strings NOT brackets
#all non-terminals from GProc1.8 sym.cc r1.124
#You may need to edit this file or override by command line.

  if(arity1=="") arity1 =",NOT,SMNOT,XC,YC,ANN,Int,Frac,INT,FRAC,";
  if(arity2=="") arity2 =",ADD,SUB,MUL,DIV,AND,OR,NAND,NOR,XOR,EQ,APPROX,DIF,GT,GTEQ,LT,LTEQ,SMAND,SMOR,SMEQ,SMDIF,SMGE,SMLE,Max,Min,MaxA,MinA,";
  if(arity3=="") arity3 =",IF,";
  if(arity4=="") arity4 =",IFLTE,";
##else arity0, ie terminal

  twopi  = 8 * atan2(1,1);
  printf("#Created by gp2lattice.awk %s %s\n",
	 substr("$Revision: 1.2 $",2),strftime());
  printf("#Created by gp2lattice.awk %s %s\n",
	 substr("$Revision: 1.2 $",2),strftime()) > datafile
}

(FNR==1) {
  printf("#from file %s\n",FILENAME);
  printf("#from file %s\n",FILENAME) > datafile
}

(index($1,"(")==1 && code=="") { #start of program
  code = $0;
  next;
}

($1=="poly0" && (n1=index($0,"="))>1) { #GProc start of program
  code = substr($0,n1+1);
  next;
}

(index($0,"#")==0 && ($1!="poly0") ) {
  code = sprintf("%s %s",code,$0);
  next;
}

END{
    printf("#%s\n",code) > datafile;
    print_data(code); #print_dot(substr($0,n1));

    print_allpos();
    printf("#set nolabel\n");
    printf("#Choose desired output type. Remember to disable pause -1\n");
    printf("#set terminal postscript eps colour 22; set output '%s'\n",
	   gnuoutfile((data!="")? data : FILENAME,"eps"));
    printf("#set terminal png; set output '%s'\n",
	   gnuoutfile((data!="")? data : FILENAME,"png"));
    printf("set title \"%s\"\n",gensub(/[[:space:][:cntrl:]]+/," ","g",code));
    printf("set nokey\n");
    printf("set noxtics\n");
    printf("set noytics\n");
    printf("set noborder\n");
    printf("#set pointsize 1.5\n");
    printf("set size 0.8,1.0 #square\n");
    side = max(xmax-xmin,ymax-ymin);
    printf("set xrange[%s:%s]\n",xmin,xmin+side);
    printf("set yrange[%s:%s]\n",ymin,ymin+side);
    printf("plot \"%s\" with linesp pt 7\n",datafile);
    printf("pause -1\n");
}
function gnuoutfile(infile,type, n,t) {
  if(type=="") type="eps";
  if(infile=="") return sprintf("gp2lattice.%s",type);
  n=split(infile,t,".");
  if(n<2) return sprintf("%s.%s",infile,type);
  return sprintf("%s.%s",substr(infile,1,length(infile)-length(t[n])-1),type);
}

#shared node_name,node_dpth,node_back,node_nxt,node_arity,separation,
function print_data(code,  inst,l,size,pc,parent,args,i,
		          node_child,
		          j,max_depth,
		          p,q,n,t) {
  for(i in node_dpth)  delete node_dpth[i];
  for(i in node_name)  delete node_name[i];
  for(i in node_back)  delete node_back[i];
  for(i in node_arity) delete node_arity[i];
  for(i in node_nxt)   delete node_nxt[i];
  for(i in printed)    delete printed[i];

  l=split(code,inst,"[ \t()]"); #remove brackets and layout
# printf("#'%s'\n",code);
# printf("#%d4",l);for(i=1;i<=l;i++) printf("'%s'",inst[i]);printf("\n");
  size = 0;
  pc=0;
  parent=-1;
  args=1; #sanity check only
  for(i=1;i<=l;i++) {
    if(length(inst[i])>0) {
      node_name[pc]=inst[i];
      node_nxt[pc]=0;
      node_arity[pc]=arity(node_name[pc]);
      args += node_arity[pc]-1;
#      printf("//i=%3d pc=%3d %2d %d args=%2d `%s' %d %20s %3d\n",    \
#	     i,                                          \
#	     pc,                                         \
#	     arity(node_name[pc]),			 \
#	     node_arity[pc],                             \
#	     args,sym,n,				 \
#	     sprintf(",%s,",node_name[pc]),              \
#	     parent);

      assert(args>=0,
	     sprintf("badly formed tree. Check arity1-4? args %d i=%d pc=%d %s",
		     args,i,pc,inst[i]));
      node_back[pc]=parent;
      node_dpth[pc]=node_dpth[parent];
      size++;
      node_dpth[pc]++;
      if(node_dpth[pc]>max_depth) max_depth = node_dpth[pc];
      node_nxt[parent]++;
      node_child[parent,node_nxt[parent]]=pc;
      node_coord[pc]=sprintf("%s,%d",node_coord[parent],node_nxt[parent]);
      if(node_arity[pc]>0) parent=pc;
      else {
	for(j=parent; node_nxt[j]>=node_arity[j] && j>=0; ){
	  j=node_back[j];
	  parent=j;
#	  printf("//j=%3d parent=%3d\n",j,parent);
	}
      }
      pc++;
    }
  }
  printf("#size %d depth %d\n",size,max_depth);
  assert(args==0,
	 sprintf("ERROR2 badly formed tree2.Check arity1-4? args %d i=%d pc=%d %s",
		 args,i,pc,inst[i]));
  print_pos(0);
  for(p=0;p<pc;p++){
      for(i=1;i<=node_arity[p];i++) {
	  q=node_child[p,i];
	  print_pos(q);
      }
  }
}

function print_pos(pc,  n,t,i,A,B,B0,p,sibs,middle) {
  #if(node_nxt[pc]==0) return;
  #printf("#print_pos(%d) %s `%s' ",pc,node_name[pc],node_coord[pc]);
  n=split(node_coord[pc],t,",");
  p = pc;
  #printf("n=%d\n",n);
  for(i=n;i>2;i--) {#ignore leading comma separator and root
    assert(p in node_back,"opps p not in node_back");
    p = node_back[p];
    assert(node_arity[p]>0,"opps no arity");
    sibs[i]=node_arity[p];
    #printf("#sibs[%d] %d\n",i,sibs[i]);
  }
  A = 1;
  B0 = B = 0;
  print_2pos(0,B0,0,B,pc);
  for(i=3;i<=n;i++) {#ignore leading comma separator and root
    middle = (sibs[i]-1)/2;
    B += A*(t[i]-1-middle)/sibs[i];
    A /= sibs[i];
    #printf("#t[%d]=%d,sibs[%d]=%d A=%s B=%s\n",i,t[i],i,sibs[i],A,B);
    print_2pos(i-3,B0,i-2,B,pc);
    B0 = B;
  }
  #printf("#%s A=%s B=%s %s %s\n",node_name[pc],A,B,1.0/A,(B!=0)? 1.0/B : "inf");
  printf("set label \"%s\" at %s center front\n",node_name[pc],position(n-2,B));
}	 
function print_allpos(  i,t) {
  for(i in Print_2pos) {
    split(i,t,SUBSEP);	
    printf("\n#%s\n",Print_2pos_name[i]) > datafile; #gnuplot separator
    print_pos2(t[1],t[2],Print_2pos[i]);
    print_pos2(t[3],t[4],Print_2pos[i]);
  }
}

function print_2pos(i0,B0,i,B,pc) {
  if((i0,B0,i,B) in printed) return;
  printed[i0,B0,i,B]=1;
  #save pc and name of first occurrence of this limb of the tree
  if(!((i0,B0,i,B) in Print_2pos))
     Print_2pos_name[i0,B0,i,B] = sprintf("%d %s",pc,node_name[pc]);
  Print_2pos[i0,B0,i,B]++;
}

function position(i,B,  x,y) {
  x=i*cos(twopi*B);
  y=i*sin(twopi*B);
  if(x>xmax) xmax=x;
  if(x<xmin) xmin=x;
  if(y>ymax) ymax=y;
  if(y<ymin) ymin=y;
  return sprintf("%s, %s",x,y);
}
function print_pos2(i,B,count) {
  printf("%s %d\n",position(i,B),count) > datafile;
}

function arity(name, t_,s_) {
  last =name;
    t_ = sprintf(",%s,",name);
#   printf("#t_'%s'\n",t_);
    if(index(arity1,t_)>0) return 1;
    if(index(arity2,t_)>0) return 2;
    if(index(arity3,t_)>0) return 3;
    if(index(arity4,t_)>0) return 4;
    s_ = sprintf("(%s",name);
    if(index(code,s_)) suspect_leaf = s_;
    return 0;
}

function min(a,b) {
  return (a>b)? a : b;
}

#from syntax.awk r1.37
function max(a,b) {
  return (a>b)? a : b;
}
function assert(true,id) {
  if(!true) {
    printf("ERROR %s\n%s File %s\n",id,suspect_leaf,FILENAME) > "/dev/stderr";
    exit 1;
  }
}


Examples created using pre.awk

W.B.Langdon 22 February 2010 (last updated 2 Aug 2012)