#WBL 28 Jan 2015 #take fasta file sizes calculated last year in #~/barracuda/1000/ bt t.bat stored in wc_fastq_10-jun-2014 and #randomly select 200000 sequences from ten file five _1 and five _2 #NB do not immediately re-use ERR239463 gawk 'function rnd(d) { return int(d*rand()); } \ BEGIN{seed=083631; srand(seed); \ print "#WBL validation.bat seed",seed,strftime(); \ } \ (NF==1){name=$1;used=0} \ (NF==3){ \ size=$1/4; \ start=4*rnd(size-200000); \ if(name=="ERR239463") printf("#") \ printf("head %10d %s_%d.filt.fastq | tail -%d > %s_%d.200k.fastq\n", \ -(start+(200000*4)),name,++used,(200000*4),name,used); \ if(++out==12)print "exit" \ }' \ wc_fastq_10-jun-2014 exit echo ERR239463 gunzip -c ERR239463_1.filt.fastq.gz | wc gunzip -c ERR239463_2.filt.fastq.gz | wc echo ERR242845 gunzip -c ERR242845_1.filt.fastq.gz | wc gunzip -c ERR242845_2.filt.fastq.gz | wc echo ERR239771 gunzip -c ERR239771_1.filt.fastq.gz | wc gunzip -c ERR239771_2.filt.fastq.gz | wc echo ERR241251 gunzip -c ERR241251_1.filt.fastq.gz | wc gunzip -c ERR241251_2.filt.fastq.gz | wc echo ERR239938 gunzip -c ERR239938_1.filt.fastq.gz | wc gunzip -c ERR239938_2.filt.fastq.gz | wc exit ###################################################################### echo ERR251575 gunzip -c ERR251575_1.filt.fastq.gz | wc gunzip -c ERR251575_2.filt.fastq.gz | wc echo ERR242120 gunzip -c ERR242120_1.filt.fastq.gz | wc gunzip -c ERR242120_2.filt.fastq.gz | wc echo ERR240195 gunzip -c ERR240195_1.filt.fastq.gz | wc gunzip -c ERR240195_2.filt.fastq.gz | wc echo ERR205172 gunzip -c ERR205172_1.filt.fastq.gz | wc gunzip -c ERR205172_2.filt.fastq.gz | wc echo ERR245879 gunzip -c ERR245879_1.filt.fastq.gz | wc gunzip -c ERR245879_2.filt.fastq.gz | wc #