Source code for fastqtools.fastqSplit.fastqSplit

from ..fastqReader.fastqReader import fastqReader
from ..fastqReader.fastqWriter import fastqWriter
from getfileNum import getfileNum
from collections import OrderedDict

[docs]def fastqSplit(fq1,fq2,splitNum,prefix): """split Fastq in to small ones to accelerate downstreaming analysis... """ lineNum1 = getfileNum(fq1) lineNum2 = getfileNum(fq2) if lineNum1 != lineNum2 : return isfastq = lineNum1 % 4 if isfastq : return batchSize = lineNum1 / ( splitNum * 4 ) reads = fastqReader(fq1,fq2) j = 1 fqs = OrderedDict() fqName = prefix + "-" +str(j) fqs[fqName] = [fqName+"_R1.fastq",fqName+"_R2.fastq"] for idx,read in enumerate(reads): idx = idx + 1 fastqWriter(read,fqName) if idx % batchSize == 0: j = j + 1 if j > splitNum : j = splitNum fqName = prefix + "-" +str(j) fqs[fqName] = [fqName+"_R1.fastq",fqName+"_R2.fastq"] return fqs