Commit 69f46f0f authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

If more than 75% of contigs have length < 1% of the full fasta, do the sort...

If more than 75% of contigs have length < 1% of the full fasta, do the sort automatically, Implements #72
parent 6a8bf528
......@@ -7,6 +7,7 @@ try:
from dgenies.bin.index import Index
except ImportError:
from index import Index
from pathlib import Path
from Bio import SeqIO
......@@ -49,12 +50,18 @@ class Filter:
n95_contig = None
n95_value = 0.95 * c_len
pos = -1
len_small_contigs = 0
len_1_pct = 0.01 * c_len
for contig in contigs_order:
pos += 1
sum_l += contigs[contig]
if contigs[contig] < len_1_pct:
len_small_contigs += contigs[contig]
if sum_l >= n95_value:
n95_contig = contig
break
if self.type_f == "query" and len_small_contigs >= 0.7 * 0.95 * c_len:
Path(os.path.join(os.path.dirname(self.fasta), ".do-sort")).touch()
# Min length of contigs
min_length = 0.05 * contigs[n95_contig]
......
......@@ -22,6 +22,7 @@ from dgenies.bin.index import index_file
from dgenies.bin.filter_contigs import Filter
from dgenies.bin.merge_splitted_chrms import Merger
from dgenies.bin.sort_paf import Sorter
from dgenies.lib.paf import Paf
import gzip
import io
import binascii
......@@ -705,6 +706,18 @@ class JobManager:
os.remove(self.paf_raw)
if self.target is not None and os.path.exists(self.target.get_path()):
os.remove(self.target.get_path())
if os.path.isfile(os.path.join(self.output_dir, ".do-sort")):
paf = Paf(paf=self.paf,
idx_q=self.idx_q,
idx_t=self.idx_t,
auto_parse=False)
paf.sort()
if not paf.parsed:
success = False
job = Job.get(Job.id_job == self.id_job)
job.status = "fail"
job.error = "Error while sorting query. Please contact us to report the bug"
if success:
job = Job.get(Job.id_job == self.id_job)
job.status = "success"
job.save()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment