#WBLangdon 13 Aug 2015 $Revision: 1.9 $ #gawk program to calculate Shepperd and MacDonell's # Mean Absolute Error of Baseline Predictor exactly # (MARP0, see their 2012 article in IST). # MARP0 is not a real predictor but just a (random guessing) # baseline to compare real predictors. They use it to normalise # their standardised accuracy measure (SA). # #gawk script used to create data in #"Exact Mean Absolute Error of Baseline Predictor, MARP0" #William B. Langdon, Javier Dolado, Federica Sarro, Mark Harman #Information and Software Technology, 2016 doi:10.1016/j.infsof.2016.01.003 # #based on monte_carlo_marp0.awk r1.2 #for histogram.gnu #WBL 13 Jan 2016 Add documentation, remove unneeded options #WBL 19 Nov 2015 Add tex option for output (also supresses calc of distribution) #usage: #eg gawk -f exact_marp0.awk atkinson.txt > atkinson.out BEGIN { if(col=="") { col=2; print "Taking actual data from default col",col > "/dev/stderr"; } v = "$Revision: 1.9 $"; printf("#exact_marp0.awk %s col=%s %s ", substr(v,2,length(v)-2),col,strftime("%d %b %Y")); for(i=1;i "/dev/stderr"; data[++N] = $col } END{ printf("N=%d\n",N); sum=0; for(i=1;i<=N;i++) { for(j=1;j<=N;j++) { #do not worry about exploiting symmetry of abs() #Allow i==j (error is zero) pred = data[j]; a = abs(pred - data[i]); #print i,data[i],j,data[j],a; sum += a; } } print sum/(N*N), "exact value of mean abs(error) base line predictor" #N*N print "#Average value of Shepperd over estimate", sum/(N*(N-1)) #N*(N-1); #WBL 13 Jan 2016 removed code only used to create histogram #in our IST,2016 paper as not needed when just calculating MARP0 } function abs(x){ return (x>=0)? x : -x;}