#WBL 2 Sep 2012 convert ip-to-country.csv ip-to-country.txt style #usage gawk -f ip-to-country_reformat.awk ip-to-country.csv BEGIN{ # printf("#ip-to-country_reformat.awk %s",substr("$Revision: 1.1 $",2)); # printf(" by WBL http://www.cs.ucl.ac.uk/staff/W.Langdon/ "); # printf("%s\n",strftime()); } (split($0,t,",")==5) { if(ntrim(t[1])>ntrim(t[2])) { print "ERROR bad range",ntrim(t[1]),ntrim(t[2]),"line"FNR,$0; error = 3; exit error; } setip(trim(t[1]),trim(t[3])); setip(inc(trim(t[2])),"**"); } END{ #exit; if(error) exit error; for(i in IP) xip[I++]=sprintf("%010d,%d,%s",i,i,IP[i]); n=asort(xip); for(i=1;i<=n;i++) { split(xip[i],t,","); I=i; if(old[2]+1 != t[2] || old[3] != t[3]) { printf("%d%s\n",t[2],t[3]); printed=i; } old[2] = t[2]; old[3] = t[3]; } if(I!=printed) printf("%d%s\n",t[2],t[3]); } function setip(N,shortcode) { #limited sanity checking:-( if(length(shortcode)!=2 || toupper(shortcode)!=shortcode) { print "ERROR bad shortcode setip("N","shortcode")",ip[N],"line"FNR; error = 1; exit error; } if(N in ip) { if(ip[N]=="**") IP[N]=shortcode; else { print "ERROR repeated ip setip("N","shortcode")",ip[N],"line"FNR; error = 2; exit error; } } else IP[N]=shortcode; #printf("setip(%s,%s) IP[%s]=`%s'\n",N,shortcode,N,IP[N]); } function trim(text) { #strip quote return substr(text,2,length(text)-2); } function ntrim(text) { #so string comparison is as number comparison return sprintf("%010d",trim(text)); } function inc1(text) { if(text=="0") return "1"; if(text=="1") return "2"; if(text=="2") return "3"; if(text=="3") return "4"; if(text=="4") return "5"; if(text=="5") return "6"; if(text=="6") return "7"; if(text=="7") return "8"; if(text=="8") return "9"; if(text=="9") return "10"; if(text=="10") return "11"; } function inc(text, L,i,n,a,carry) {#add 1 even if string represents a big num L=length(text); for(i=L;i>0;i--) { n=substr(text,i,1); if(i==L) n=inc1(n); if(carry) n=inc1(n); carry = 0; if(length(n)>1) { carry = 1; n = substr(n,2); } a = sprintf("%s%s",n,a); } #printf("inc(%s)%s\n",text,a); return a; }