diff --git a/README b/README index 0c36eca9643a33386885e673e949c2c36958bad2..be4d9f418d506a5dfa2521e96542d47c5f586da9 100644 --- a/README +++ b/README @@ -30,7 +30,6 @@ git pull origin master # Requirements # ################ -- Python 2 - Python 3 - BioPython for Python3 diff --git a/SVsim b/SVsim index 6aabb07415e9a439ba0a6f75877d358df29616e0..619f3aef3ad31c3eb6729e7a778919041edd7c5a 100755 --- a/SVsim +++ b/SVsim @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- Mode: Python -*- import sys @@ -30,8 +30,8 @@ def handleException(err): (foo, bar, tb) = sys.exc_info(); tbarr = traceback.extract_tb(tb); (filename, lineno, funcname, foo) = tbarr[-1]; - print >>sys.stderr, str(err); - print >>sys.stderr, "Error occurred in line", lineno, "of function", funcname, "of Python script", filename; + print(str(err), file=sys.stderr); + print("Error occurred in line", lineno, "of function", funcname, "of Python script", filename, file=sys.stderr); return; # Global Variables @@ -49,13 +49,13 @@ maxSeqNameLen = 0; def complement(base): try: inx = "ACGTacgt".index(base); - except StandardError as err: + except Exception as err: sys.stderr.write("Invalid base passed to complement (" + base + ")\n"); return base; return "TGCAtgca"[inx]; def ensureACGTRegion(instr, off, len): - for i in xrange(off, off+len): + for i in range(off, off+len): base = instr[i]; if ("ACGTacgt".find(base) == -1): return False; @@ -85,7 +85,7 @@ def makeEventID(lreg, rreg): if (len(lreg.events) == 0): eventID += lreg.seq.name; else: - for i in xrange(0, len(lreg.events)): + for i in range(0, len(lreg.events)): eventID += lreg.events[i].eventID(); if (i != len(lreg.events) - 1): eventID += ":"; @@ -93,7 +93,7 @@ def makeEventID(lreg, rreg): if (len(rreg.events) == 0): eventID += rreg.seq.name; else: - for i in xrange(0, len(rreg.events)): + for i in range(0, len(rreg.events)): eventID += rreg.events[i].eventID(); if (i != len(rreg.events) - 1): eventID += ":"; @@ -125,9 +125,9 @@ class sequence (object): def reportRandomLocationError(): if (GlobalOptions.WGM and GlobalOptions.distinct): - raise StandardError("Unable to find valid region for event. Overloading event density in WGM distinct mode."); + raise Exception("Unable to find valid region for event. Overloading event density in WGM distinct mode."); else: - raise StandardError("Unable to find valid region for event."); + raise Exception("Unable to find valid region for event."); # The library of all reference sequences class refLib (object): @@ -145,7 +145,7 @@ class refLib (object): seqlen = int(splitLine[1]); sstring = referenceFile.fetch(seqname); temp = [] - for i in xrange(1, ploidy+1): + for i in range(1, ploidy+1): seq = sequence(seqname, seqlen, sstring, i); # seq = sequence(seqname, seqlen, "", i); temp.append(seq); @@ -159,7 +159,7 @@ class refLib (object): def printDebug(self): for seqlist in self.seqs: for seq in seqlist: - print seq.name, seq.num, seq.length; + print(seq.name, seq.num, seq.length); def reset(self): for seqlist in self.seqs: @@ -174,7 +174,7 @@ class refLib (object): return self.getRandomOrigRegion(pad, length); def getRandomOrigRegion(self, pad, length): - for i in xrange(0, 1000): + for i in range(0, 1000): off = random.randint(0, self.totalOrigLength - 1); loc = self.findSeqLocation(off); soff = loc.offset - pad; @@ -192,7 +192,7 @@ class refLib (object): seqnum = random.randint(0, GlobalOptions.ploidy - 1); print seqnum; ''' - for i in xrange(0, 1000): + for i in range(0, 1000): off = random.randint(0, self.totalMutLength - 1) loc = self.findRLLocation(off, seqnum); soff = loc.offset - pad; @@ -249,7 +249,7 @@ class refLib (object): lbreak = sbreaksSeq[0]; ebreaksSeq = []; ebreaksSeq.append(lbreak); - for i in xrange(1, len(sbreaksSeq)): + for i in range(1, len(sbreaksSeq)): rbreak = sbreaksSeq[i]; if (abs(lbreak[1] - rbreak[1]) > 1): ebreaksSeq.append(rbreak); @@ -342,7 +342,7 @@ class refLib (object): # Now add up the states. count = 0; retval = [] - for i in xrange(0, maxstates): + for i in range(0, maxstates): if (states[i]): count += 1; retval.append(str(i)); @@ -389,7 +389,7 @@ class refLib (object): # Now add up the states. count = 0; retval = [] - for i in xrange(0, maxstates): + for i in range(0, maxstates): if (states[i]): count += 1; retval.append(str(i)); @@ -482,7 +482,7 @@ class region: totalBasesOutput += outlen; if (self.inverted): ERO = self.SRO(); - for i in xrange(soff, soff + outlen): + for i in range(soff, soff + outlen): fastaFile.write(complement(self.seq.seq[ERO - i])); else: fastaFile.write(self.seq.seq[self.rSRO + soff:self.rSRO + soff + outlen]); @@ -571,7 +571,7 @@ class regionList: def findAllRegLocs(self, seq, offset): retval = []; - for i in xrange(0, len(self.regList)): + for i in range(0, len(self.regList)): reg = self.regList[i]; if (reg.seq == seq and offset >= reg.rSRO and offset <= reg.rERO()): rlOffset = reg.MROFF(offset); @@ -584,7 +584,7 @@ class regionList: # TODO: This should be doing a binary search. # Maybe Bisect can be used, but how will it know what is the comparison function? def findOffset(self, offset, startinx = 0): - for i in xrange(startinx, len(self.regList)): + for i in range(startinx, len(self.regList)): reg = self.regList[i]; if (offset >= reg.rlSRO and offset < reg.rlSRO + reg.len): return (i, offset - reg.rlSRO); @@ -608,7 +608,7 @@ class regionList: if (not self.regList[rinx].ensureValid(0, roff)): return False; # Do whatever is left in the middle. - for i in xrange(linx+1, rinx): + for i in range(linx+1, rinx): if (not self.regList[i].ensureValid()): return False; return True; @@ -622,7 +622,7 @@ class regionList: (rinx, roff) = self.findOffset(offset + length, linx); # Copy the regions of interest into a new list. newRL = regionList(); - for i in xrange(linx, rinx+1): + for i in range(linx, rinx+1): newreg = (self.regList[i].copy()); newRL.append(newreg); # Do the cutting. @@ -683,7 +683,7 @@ class regionList: self.maybeMergeRegions(inx); # Now we need to fix up the rlSROs length = retregion.len; - for i in xrange(inx, len(self.regList)): + for i in range(inx, len(self.regList)): reg = self.regList[i]; reg.rlSRO -= length; self.totlen -= length; @@ -706,7 +706,7 @@ class regionList: del self.regList[delstart:rinx+1]; self.maybeMergeRegions(linx); # Fix up the trailing rlSROs - for i in xrange(delstart, len(self.regList)): + for i in range(delstart, len(self.regList)): reg = self.regList[i]; reg.rlSRO -= length; self.totlen -= length; @@ -756,7 +756,7 @@ class regionList: if (len(self.regList) < 2): return breakSet; lreg = self.regList[0]; - for i in xrange(1, len(self.regList)): + for i in range(1, len(self.regList)): rreg = self.regList[i]; if (self.checkMergeable(lreg, rreg)): lreg = rreg; @@ -791,7 +791,7 @@ class regionList: if (GlobalOptions.leftonly): endlen = 2; lreg = self.regList[0]; - for i in xrange(1, endlen): + for i in range(1, endlen): rreg = self.regList[i]; if (self.checkMergeable(lreg, rreg)): lreg = rreg; @@ -819,7 +819,7 @@ class regionList: else: (inx, off) = self.findOffset(soff); curlen = 0; - for i in xrange(inx, len(self.regList)): + for i in range(inx, len(self.regList)): reg = self.regList[i]; reglen = reg.len - off; # sys.stderr.write(str(reglen) + '\n'); @@ -853,16 +853,16 @@ class regionList: addlen = len(newRL.regList); selflen = len(self.regList); # Expand the list so we can copy up. - self.regList.extend(xrange(0,addlen)); # xrange gives bogus iterable as we don't care what the values are. + self.regList.extend(range(0,addlen)); # xrange gives bogus iterable as we don't care what the values are. # We need to go backwards to allow the copy up without smashing. - for i in xrange(selflen-1, inx-1, -1): + for i in range(selflen-1, inx-1, -1): reg = self.regList[i]; reg.rlSRO += addbases; self.regList[i+addlen] = reg; self.totlen += addbases; reflib.totalMutLength += addbases; # Now do the insertions with rlSRO fixups as we go. - for i in xrange(0, addlen): + for i in range(0, addlen): reg = newRL.regList[i]; reg.rlSRO = currlSRO; currlSRO += reg.len; @@ -1026,7 +1026,7 @@ class simpleLocationEvent (object): elocp = random.choice(EROlocs); slocp = self.findClosest(elocp, SROlocs); else: - raise StandardError("Invalid endpoint select mode: " + GlobalOptions.select); + raise Exception("Invalid endpoint select mode: " + GlobalOptions.select); (sloc, sinv) = slocp; (eloc, einv) = elocp; @@ -1082,7 +1082,7 @@ class simpleLocationEvent (object): return True; def performWGMEvent(self): - chromorder = range(0, len(self.seqlist)); + chromorder = list(range(0, len(self.seqlist))); if (GlobalOptions.select != "F"): random.shuffle(chromorder); for i in chromorder: @@ -1396,7 +1396,7 @@ def simulateSV(): if (GlobalOptions.reference is None): myError("Reference File (-r) argument is Missing", parser); reflib = refLib(GlobalOptions.reference, GlobalOptions.ploidy); - print >>sys.stderr, "Input", reflib.seqcount, "sequences totaling", reflib.totalOrigLength, "bases"; + print("Input", reflib.seqcount, "sequences totaling", reflib.totalOrigLength, "bases", file=sys.stderr); # Set up the random number generator random.seed(GlobalOptions.seed); @@ -1423,7 +1423,7 @@ def simulateSV(): continue; s = line.strip().split(); op = s[0]; - for i in xrange(0, GlobalOptions.repeat): + for i in range(0, GlobalOptions.repeat): # Delete, Duplicate, InsertRandom and Invert all have the same arguments. if (op == "DEL" or op == "DUP" or op == "INR" or op == "INV"): startlen = int(s[1]); @@ -1434,7 +1434,7 @@ def simulateSV(): if (len(s) >= 4): inc = int(s[3]); endlen = int(s[2]) + inc; - for oplen in xrange(startlen, endlen, inc): + for oplen in range(startlen, endlen, inc): if (op == "INR"): e = insertEvent(op, oplen); else: @@ -1549,7 +1549,7 @@ def simulateSV(): chrom = e.performWGMEvent(); if (chrom == 0): failCount += 1; - print count, e.seqlist[0].name, e.rSRO, e.rERO, e.strand1, e.strand2, "Failed", failCount; + print(count, e.seqlist[0].name, e.rSRO, e.rERO, e.strand1, e.strand2, "Failed", failCount); continue; chromRL = e.seqlist[chrom-1].RL; chromLen = chromRL.totlen; @@ -1558,9 +1558,9 @@ def simulateSV(): numbreaks = len(breaks); states = reflib.countCNstates(); numstates = len(states); - print count, e.seqlist[0].name, e.type, e.rSRO, e.rERO, e.strand1, e.strand2, "Succeeded", failCount, chromLen, maxChromLen, numbreaks, numstates, states + print(count, e.seqlist[0].name, e.type, e.rSRO, e.rERO, e.strand1, e.strand2, "Succeeded", failCount, chromLen, maxChromLen, numbreaks, numstates, states) chromRL.printDebug(); - print + print() elif (GlobalOptions.loop == "P"): # Loop for doing probability runs. @@ -1570,7 +1570,7 @@ def simulateSV(): countsperbin = GlobalOptions.repeat; binwidth = 1; breakbins = [0] * totbins; - for i in xrange(totbins-1,totbins): + for i in range(totbins-1,totbins): # Use following for no failed events at the criteria. while (totout < GlobalOptions.repeat): # Use the following for correct number of breakpoints as the criteria. @@ -1602,7 +1602,7 @@ def simulateSV(): # breakbins[binnum] = breakbins[binnum] + 1; totout += 1; states = reflib.countBigCNstates(GlobalOptions.minseg); - print "\t".join([str(totout), str(binnum), str(totiter), str(len(breaks)), str(len(states)), str(failCount)]); + print("\t".join([str(totout), str(binnum), str(totiter), str(len(breaks)), str(len(states)), str(failCount)])); sys.stdout.flush(); random.shuffle(eventList); reflib.reset(); @@ -1619,8 +1619,8 @@ def simulateSV(): totbins = int(((len(eventList))/binwidth) + 0.5); sys.stderr.write("Filling " + str(totbins) + " bins.\n"); breakbins = [0] * totbins; - binrange = range(1,binwidth*totbins + 1, binwidth); - for i in xrange(1,totbins): + binrange = list(range(1,binwidth*totbins + 1, binwidth)); + for i in range(1,totbins): while (breakbins[i] < countsperbin): totiter += 1; failCount = 0; @@ -1640,7 +1640,7 @@ def simulateSV(): breakbins[binnum] = breakbins[binnum] + 1; totout += 1; states = reflib.countBigCNstates(GlobalOptions.minseg); - print "\t".join([str(totout), str(binnum), str(totiter), str(numbreaks), str(len(states)), str(failCount), str(max(maxChromLen))]); + print("\t".join([str(totout), str(binnum), str(totiter), str(numbreaks), str(len(states)), str(failCount), str(max(maxChromLen))])); sys.stdout.flush(); break; if (max(maxChromLen) > GlobalOptions.maxchromlen): @@ -1649,7 +1649,7 @@ def simulateSV(): random.shuffle(eventList); reflib.reset(); else: - raise StandardError("Invalid loop type: " + GlobalOptions.loop); + raise Exception("Invalid loop type: " + GlobalOptions.loop); return; @@ -1660,7 +1660,7 @@ def main(): except IOError as err: sys.stderr.write("IOError " + str(err) + '\n'); return; - except StandardError as err: + except Exception as err: sys.stderr.write("StandardError\n"); handleException(err); sys.exit(1); diff --git a/svinterval.py b/svinterval.py index 7632d976c09af744c3a2d92f113dd3d86ec10f49..0078b6b183c820a6092fda930b1426307f3d4dc1 100644 --- a/svinterval.py +++ b/svinterval.py @@ -1,8 +1,7 @@ #! /usr/bin/env python -import argparse, sys -import os.path +import sys from math import fabs import numpy diff --git a/svreader/__init__.py b/svreader/__init__.py index a15e4ec073d0d9e76acd74825a341e822c295974..1bc14d6cf76d053edb71734edcc708528ce300ce 100644 --- a/svreader/__init__.py +++ b/svreader/__init__.py @@ -1,14 +1,11 @@ -import sys import os import re -from subprocess import call - import vcf from pybedtools import BedTool from pybedtools import create_interval_from_list -from pysam import VariantFile, tabix_compress, tabix_index +from pysam import tabix_compress, tabix_index from svreader.vcf_utils import get_template from svrunner_utils import eprint, vcf_to_pybed diff --git a/svreader/vcfwrapper.py b/svreader/vcfwrapper.py index d49db5fc7be7c69a3b1457931703241be6abf98f..ce5bc9ee5600435c42f8a5f28a4fc756ad533bbf 100644 --- a/svreader/vcfwrapper.py +++ b/svreader/vcfwrapper.py @@ -1,8 +1,4 @@ -import logging -import sys -import os - -from pysam import VariantFile, tabix_compress, tabix_index +from pysam import VariantFile from svreader import SVRecord, SVReader, SVWriter