Commit b31ba169 authored by Penom Nom's avatar Penom Nom
Browse files

rewriting of handling parameters, workflow cleaning

parent 1b1567a1
......@@ -17,6 +17,7 @@
import inspect
import os
import re
import sys
import pickle
import datetime
......@@ -28,8 +29,10 @@ from jflow.parameter import Parameter, date
from ng6.t3MySQLdb import t3MySQLdb
from ng6.project import Project
from ng6.run import Run
from ng6.sample import Sample
from ng6.utils import Utils
from workflows.types import adminlogin
from workflows.types import adminlogin, casava_dir
class BasicNG6Workflow (Workflow):
......@@ -43,7 +46,7 @@ class BasicNG6Workflow (Workflow):
def get_parameters_per_groups(self, parameters_section="parameters"):
name, description, parameters = self._get_from_config(parameters_section)
parameters.extend(self._required_parameters)
parameters.extend(self._get_required_parameters())
pgparameters, parameters_order = {}, []
for param in parameters:
if param.group not in parameters_order: parameters_order.append(param.group)
......@@ -53,6 +56,8 @@ class BasicNG6Workflow (Workflow):
pgparameters[param.group] = [param]
return [pgparameters, parameters_order]
def _get_required_parameters(self):
return self._required_parameters
def add_component(self, component_name, args=[], kwargs={}, component_prefix="default", parent=None, addto="run"):
# first build and check if this component is OK
......@@ -114,9 +119,16 @@ class NG6Workflow (BasicNG6Workflow):
def __init__(self, args={}, id=None, function= "process", parameters_section="parameters"):
BasicNG6Workflow.__init__(self, args, id, function, parameters_section)
self.samples = []
self._required_parameters.extend(self._add_required_parameters())
self._required_parameters.extend(self._add_sample_parameters())
self.parameters.extend(self._add_required_parameters())
self.parameters.extend(self._add_sample_parameters())
self.args = self._extend_and_format_args(self.parameters, args)
self.__all_reads1__ = []
self.__all_reads2__ = []
self.__samples_name__ = []
def _add_required_parameters(self):
orequired = []
......@@ -130,6 +142,55 @@ class NG6Workflow (BasicNG6Workflow):
orequired.append(Parameter("run_type", "What type of data is it (1 lane, 1 region)", "--type", required=True, group="Run information", display_name="Type"))
return orequired
def _add_sample_parameters(self):
osamples = []
subparams = []
subparams.append(Parameter("sample_name", "The name of the sample, MID for 454 data", "sample_name"))
subparams.append(Parameter("sample_description", "A brief description of the sample", "sample_description" ))
subparams.append(Parameter("read1", "Read 1 data file path", "read1", required=True, type = "localfile", action= "append" ))
subparams.append(Parameter("read2", "Read 2 data file path", "read2", type = "localfile", action="append" ))
osamples.append(Parameter("sample", "Definition of a sample", "--sample", action = "append", type="multiple", required = True, sub_parameters = subparams, group = "exclude-sample" ))
return osamples
def __preprocess_samples__(self):
if not (self.args.has_key("sample") and self.args['sample']):
sys.stderr.write( "--sample option is required\n" )
sys.exit()
mids_description = {}
for sd in self.args['sample'] :
sp_object = Sample(sd['sample_name'], sd['sample_description'], sd['read1'], sd['read2'])
if sp_object.name and sp_object.description :
mids_description[sp_object.name] = sp_object.description
if sp_object.name :
self.__samples_name__.append(sp_object.name)
self.__all_reads1__.extend(sp_object.reads1)
self.__all_reads2__.extend(sp_object.reads2)
self.samples.append(sp_object)
if mids_description :
self.runobj.add_mids_description(mids_description)
def get_samples(self):
return self.samples
def get_all_reads1(self):
return self.__all_reads1__
def get_all_reads2(self):
return self.__all_reads2__
def get_nospace_runname(self):
return "_".join(self.runobj.name.split())
def get_samples_name(self):
return self.__samples_name__
def is_paired_end(self):
return len(self.get_all_reads2()) > 0
def pre_process(self):
# start from an existing project
self.project = Project.get_from_id(self.args["project_id"])
......@@ -138,26 +199,148 @@ class NG6Workflow (BasicNG6Workflow):
if self.project is not None and not self.project.is_admin(self.args["admin_login"]):
sys.stderr.write( "The user login '" + self.args["admin_login"] + "' is not allowed to add data on project '" + self.project.name + "'.\n" )
sys.exit()
# build the run
self.runobj = Run(self.args["run_name"], self.args["run_date"], self.args["species"], self.args["data_nature"],
self.args["run_type"], self.args["run_description"], self.args["sequencer"])
self.runobj.admin_login=self.args["admin_login"]
# then add the run to the project
self.project.add_run(self.runobj)
# if it's a run with some index, let's write down description
if self.args.has_key("sample") and self.args["sample"]:
try:
samples = {}
for sample in self.args["sample"]:
samples[sample["sample_name"]] = sample["sample_description"]
self.runobj.add_mids_description(samples)
except: pass
self.metadata.append("run_id="+str(self.runobj.id))
self.__preprocess_samples__()
if not self.samples :
sys.stderr.write( "Please define at least one sample.\n" )
sys.exit()
def post_process(self):
# once everything done, sync directories
if self.runobj:
self.runobj.sync()
elif self.project:
self.project.sync()
class CasavaNG6Workflow(NG6Workflow):
def __init__(self, args={}, id=None, function= "process", parameters_section="parameters"):
NG6Workflow.__init__(self, args, id, function, parameters_section)
self._required_parameters.extend(self._add_casava_parameters())
self.parameters.extend(self._add_casava_parameters())
self.args = self._extend_and_format_args(self.parameters, args)
self.__is_casava__ = False
self.__group_prefix__ = None
self.__undetermined_reads1__ = []
self.__undetermined_reads2__ = []
def _add_casava_parameters(self):
params = []
subparams = []
subparams.append(Parameter("casava_directory", "Path to the CASAVA directory", "casava_directory", required=True, type= casava_dir ))
subparams.append(Parameter("lane", "The lane number", 'lane',required=True , type=int ))
params.append(Parameter("casava", "Definition of a casava directory for sample retrieving", "--casava", required = True, sub_parameters = subparams, type = "multiple", group = "exclude-sample" ))
return params
def __preprocess_samples__(self):
if self.args.has_key("casava") and self.args["casava"] and self.args["casava"]['casava_directory'] and self.args["casava"]['lane']:
self.__preprocess_casavadir(self.args["casava"]['casava_directory'], '_'.join( self.project.get_name().split() ), self.args["casava"]['lane'])
else :
NG6Workflow.__preprocess_samples__(self)
def get_all_undetermined_read1(self):
return self.__undetermined_reads1__
def get_all_undetermined_read2(self):
return self.__undetermined_reads1__
def is_casava(self):
return self.__is_casava__
def get_group_prefix(self):
return self.__group_prefix__
def __preprocess_casavadir(self, casava_directory, project_name, lane_number):
"""
Parse SampleSheep.mk file from a casava directory and return a list of Sample object
@param casava_directory : path to CASAVA output directory
@param project_name : files in each sample are part of this project
@param lane_number : files in each sample are sequenced on this lane
"""
self.__is_casava__ = True
samples = []
mids_description = {}
with open(os.path.join(casava_directory, "SampleSheet.mk")) as fh :
for line in fh :
if line.startswith("l" + lane_number + "_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = parts[1].split(" ")
for i in range(len(barcodes_list)):
samples.append({'barcode':barcodes_list[i]})
# Retrieve samples ids
elif line.startswith("l" + lane_number + "_SAMPLEIDS" ):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
for i in range(len(sample_ids_list)):
samples[i]['sample_id'] = sample_ids_list[i]
# Retrieve folder
elif line.startswith("l" + lane_number + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
for i in range(len(subdirs_list)):
samples[i]['subdir'] = subdirs_list[i]
# Filter on project name
aux_samples = []
for sample in samples:
if (re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) is not None) or (sample['subdir'].startswith("Undetermined_indices")):
aux_samples.append(sample)
samples = aux_samples
if len(samples) == 0:
raise ValueError, "The project '" + project_name + "' in lane '" + lane_number + "' doesn't exist in CASAVA directory " + casava_directory
for sample in samples:
# Write line in the index description
if (sample['barcode'] != "NoIndex") and (not sample['subdir'].startswith("Undetermined_indices")) :
mids_description[sample['barcode']] = sample['sample_id']
read1_files = []
read2_files = []
undetermined_read1_files = []
undetermined_read2_files = []
# Write files lists
for file in os.listdir(casava_directory + "/" + sample['subdir']):
if file.endswith(".fastq.gz") and re.search(".*_L00" + lane_number + "_.*", file):
if re.search(".*_R1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
read1_files.append(casava_directory + "/" + sample['subdir'] + "/" + file)
else:
undetermined_read1_files.append(casava_directory + "/" + sample['subdir'] + "/" + file)
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
read2_files.append(casava_directory + "/" + sample['subdir'] + "/" + file)
else:
undetermined_read2_files.append(casava_directory + "/" + sample['subdir'] + "/" + file)
sp_object = Sample(sample['sample_id'], sample['sample_id'], read1_files, read2_files)
sp_object.add_metadata('undetermined_read1_files', undetermined_read1_files)
sp_object.add_metadata('undetermined_read2_files', undetermined_read2_files)
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
if sp_object.name :
self.__samples_name__.append(sp_object.name)
self.__all_reads1__.extend(read1_files)
self.__all_reads2__.extend(read2_files)
self.__undetermined_reads1__.extend(undetermined_read1_files)
self.__undetermined_reads2__.extend(undetermined_read2_files)
self.samples.append(sp_object)
if mids_description :
self.runobj.add_mids_description(mids_description)
self.__group_prefix__ = (Utils.get_group_basenames(self.get_all_reads1()+self.get_all_reads2(), "read")).keys()
\ No newline at end of file
......@@ -286,6 +286,7 @@ class Run(object):
if self.__mids_description :
return self.__mids_description
else :
t3mysql = t3MySQLdb()
return t3mysql.select_run_samples(self.id)
def add_mids_description(self, mids_desc):
......
#
# Copyright (C) 2009 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
class Sample(object):
def __init__(self, name, description, reads1, reads2 = None):
self.name = name
self.description = description
self.reads1 = reads1
if isinstance(reads1, str) :
self.reads1 = [reads1]
self.reads2 = reads2
if isinstance(reads2, str) :
self.reads2 = [reads2]
self.metadata = {}
def add_metadata(self, key, value):
self.metadata[key] = value
def get_metadata(self, key):
if self.has_metadata(key):
return self.metadata[key]
def has_metadata(self, key):
return self.metadata.has_key(key)
......@@ -28,11 +28,6 @@ class AddRun (NG6Workflow):
Run the workflow
"""
# First check if files provided exists
files_to_save = []
for file in self.args["data_file"]:
if os.path.isfile(file):
files_to_save.append(file)
else:
sys.exit(2, "error: %s file does not exists\n" % (file))
files_to_save = self.get_all_reads1() + self.get_all_reads2()
# archive the files
addrawfiles = self.add_component("AddRawFiles", [self.runobj, files_to_save, self.args["compression"]])
\ No newline at end of file
......@@ -33,13 +33,6 @@ description = add a brand new run to a project
# .exclude [None]: will make sure that there is only one arguments provided
#
[parameters]
data_file.name = data_file
data_file.flag = --data-file
data_file.help = Which data files are linked to the run
data_file.required = True
data_file.action = append
data_file.type = localfile
compression.name = compression
compression.flag = --compression
compression.help = How should the data be compressed once archived (none|gz|bz2)
......
......@@ -109,7 +109,8 @@ class InsertsSizes (Analysis):
self._add_result_element(sample, "min_size", metrics[orientation]["MIN_INSERT_SIZE"], orientation)
self._add_result_element(sample, "max_size", metrics[orientation]["MAX_INSERT_SIZE"], orientation)
self._add_result_element(sample, "std_deviation", metrics[orientation]["STANDARD_DEVIATION"], orientation)
self._add_result_element(sample, "nb_inserts_sizes", metrics[orientation]["NB_INSERTS_SIZES"][:-1], orientation)
if metrics[orientation].has_key("NB_INSERTS_SIZES") :
self._add_result_element(sample, "nb_inserts_sizes", metrics[orientation]["NB_INSERTS_SIZES"][:-1], orientation)
# Save paired count
for pairs_count_file in self.pairs_count_files:
if os.path.splitext(os.path.basename(pairs_count_file))[0] == sample :
......
......@@ -15,7 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os, re
import os, re, sys
from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils
......@@ -23,7 +23,7 @@ from ng6.utils import Utils
class GeneDiversity (NG6Workflow):
def _get_clean_list( self, split_inputs, split_outputs, samples_names=None ):
def _get_clean_list( self, split_inputs, split_outputs, samples_names = [] ):
"""
@summary : Returns two lists. The sorted list of split component outputs.
The sorted and completed list of samples names which correspond to the sorted list of split component outputs.
......@@ -35,7 +35,7 @@ class GeneDiversity (NG6Workflow):
new_samples_names = list()
new_split_outputs = list()
for idx in range( len(split_inputs) ):
sample = samples_names[idx] if samples_names is not None else None
sample = samples_names[idx] if samples_names else None
basename = os.path.basename(split_inputs[idx])
basename_woext = basename.split(".")[0]
extensions = ".".join( basename.split(".")[1:] )
......@@ -44,7 +44,7 @@ class GeneDiversity (NG6Workflow):
if re.match(basename_woext + "_\d+." + extensions, out_split_basename) is not None:
new_samples_names.append( sample )
new_split_outputs.append( file )
if samples_names is None:
if not samples_names :
new_samples_names = None
return new_split_outputs, new_samples_names
......@@ -95,25 +95,16 @@ class GeneDiversity (NG6Workflow):
if error_msg != "":
raise ValueError( "Error in merge rules : " + error_msg )
def parse_arguments(self):
self.reads_1 = [elt.strip() for elt in self.args['read_1']]
self.reads_2 = [elt.strip() for elt in self.args['read_2']]
self.samples_names = None
self.merge_groups = None
if self.args['samples_names'] is not None and len(self.args['samples_names']) > 0:
self.samples_names = [elt.strip() for elt in self.args['samples_names']]
self.merge_groups = self._load_merge_arg( self.samples_names )
def process(self):
# Manage samples
self.parse_arguments()
merge_groups = self._load_merge_arg( self.get_samples_name() ) if self.get_samples_name() else None
# Add raw files
addrawfiles = self.add_component( "AddRawFiles", [self.runobj, self.reads_1 + self.reads_2, "none"] )
addrawfiles = self.add_component( "AddRawFiles", [self.runobj, self.get_all_reads1() + self.get_all_reads2(), "none"] )
# Trim sequences
trim_R1 = self.add_component("Trimmer", [self.reads_1, 1, self.args['trim_read_1']], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.reads_2, 1, self.args['trim_read_2']], component_prefix="R2")
trim_R1 = self.add_component("Trimmer", [self.get_all_reads1(), 1, self.args['trim_read_1']], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.get_all_reads2(), 1, self.args['trim_read_2']], component_prefix="R2")
# Make some statistics on raw file
fastqc = self.add_component("FastQC", [trim_R1.output_files + trim_R2.output_files, False, True])
......@@ -136,7 +127,7 @@ class GeneDiversity (NG6Workflow):
# Sequence traduction
split = self.add_component("SplitSeq", [chimera.nonchimeras, 6000])
split_outputs, new_samples_names = self._get_clean_list( chimera.nonchimeras, split.output_files, self.samples_names )
split_outputs, new_samples_names = self._get_clean_list( chimera.nonchimeras, split.output_files, self.get_samples_name() )
framebot = self.add_component("Framebot", [split_outputs, self.args["database"], self.args["protein_min_length"], False])
# Rename the pre-clusters to provide traceback after merge and cd-hit
......@@ -150,7 +141,7 @@ class GeneDiversity (NG6Workflow):
self.args["otu_cluster_most_similar"], 5, 'euclidean', 'average', ';size=', '|'], parent=chimera)
# Sampling
groups = None if self.merge_groups is None else [self.merge_groups]
groups = None if merge_groups is None else [merge_groups]
sampling = self.add_component("BiomSampling", [cdhit.biom_files, self.args["discard"], self.args["select"], self.args["round"],
self.args["obs_min"], cdhit.output_files, 'euclidean', 'average', groups], parent=cdhit)
......
......@@ -31,42 +31,25 @@ MiSeq
1 Lane
--species
Bacteria
--read-1
workflows/gene_diversity/data/cDNA-sample1-day-replicate1_GAACGA_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-day-replicate1_GAACGA_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample1-day-replicate2_GGTCGG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-day-replicate2_GGTCGG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample1-day-replicate3_GACTAG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-day-replicate3_GACTAG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample1-night-replicate1_GCATTG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-night-replicate1_GCATTG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample1-night-replicate2_TTGCGG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-night-replicate2_TTGCGG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample1-night-replicate3_TATGAG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample1-night-replicate3_TATGAG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample2-day-replicate1_GGATAT_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample2-day-replicate1_GGATAT_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample2-day-replicate2_ACGCTG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample2-day-replicate2_ACGCTG_L001_R2.fastq.gz
--read-1
workflows/gene_diversity/data/cDNA-sample2-day-replicate3_TCCGCG_L001_R1.fastq.gz
--read-2
workflows/gene_diversity/data/cDNA-sample2-day-replicate3_TCCGCG_L001_R2.fastq.gz
--sample
read1=workflows/gene_diversity/data/cDNA-sample1-day-replicate1_GAACGA_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-day-replicate1_GAACGA_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample1-day-replicate2_GGTCGG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-day-replicate2_GGTCGG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample1-day-replicate3_GACTAG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-day-replicate3_GACTAG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample1-night-replicate1_GCATTG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-night-replicate1_GCATTG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample1-night-replicate2_TTGCGG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-night-replicate2_TTGCGG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample1-night-replicate3_TATGAG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample1-night-replicate3_TATGAG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample2-day-replicate1_GGATAT_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample2-day-replicate1_GGATAT_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample2-day-replicate2_ACGCTG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample2-day-replicate2_ACGCTG_L001_R2.fastq.gz
read1=workflows/gene_diversity/data/cDNA-sample2-day-replicate3_TCCGCG_L001_R1.fastq.gz
read2=workflows/gene_diversity/data/cDNA-sample2-day-replicate3_TCCGCG_L001_R2.fastq.gz
--database
workflows/gene_diversity/data/database_dsrb.fasta
--taxonomy
......
......@@ -34,45 +34,20 @@ description = Analysis the composition and function of a microbial community fro
# .exclude [None]: will make sure that there is only one arguments provided
#
[parameters]
# Samples
read_1.group = SAMPLES section
read_2.group = SAMPLES section
samples_names.group = SAMPLES section
merge.group = SAMPLES section
# Parameter read-1
read_1.name = Reads 1
read_1.flag = --read-1
read_1.help = The path to the Read 1.
read_1.type = localfile
read_1.action = append
read_1.required = True
# Parameter read-2
read_2.name = Reads 2
read_2.flag = --read-2
read_2.help = The path to the Read 2.
read_2.type = localfile
read_2.action = append
read_2.required = True
# Parameter samples_names
samples_names.name = Samples names
samples_names.flag = --sample-name
samples_names.help = The samples names.
samples_names.action = append
# Parameter merge
merge.name = Merge groups
merge.flag = --merge
merge.help = The list of samples to merge. Format : 'sample_A = sample_A_1, sample_A_2 : sample_B = sample_B_1, sample_B_2'.
# Gene
database.group = GENE section
taxonomy.group = GENE section
# Parameter database
database.name = Gene database
database.flag = --database
database.help = The reference set contains protein representative sequences of the gene target and should be compiled to have a good coverage of diversity of the gene family.
database.type = localfile
database.required = True
# Parameter taxonomy
taxonomy.group = GENE section
taxonomy.name = Gene taxonomy
taxonomy.flag = --taxonomy
taxonomy.help = The gene taxonomy. Format : 'GENE_ID<tab>TAX; TAX; TAX;'.
......@@ -81,14 +56,13 @@ taxonomy.required = True
# Trim sequences
trim_read_1.group = 1- TRIM section
trim_read_2.group = 1- TRIM section
# Parameter read_1
trim_read_1.name = Maximum read 1 length
trim_read_1.flag = --trim-read-1
trim_read_1.help = Maximum length for reads 1.
trim_read_1.type = int
trim_read_1.default = 500
# Parameter read_2
trim_read_2.group = 1- TRIM section
trim_read_2.name = Maximum read 2 length
trim_read_2.flag = --trim-read-2
trim_read_2.help = Maximum length for reads 2.
......@@ -97,21 +71,20 @@ trim_read_2.default = 500
# Join pairs
mismatch_ratio.group = 2- JOIN section
min_overlap.group = 2- JOIN section
max_overlap.group = 2- JOIN section
# Parameter mismatch_ratio
mismatch_ratio.name = Mismatch ratio
mismatch_ratio.flag = --mismatch-ratio
mismatch_ratio.help = Maximum allowed ratio between the number of mismatched base pairs and the overlap length.
mismatch_ratio.type = float
mismatch_ratio.default = 0.1
# Parameter min_overlap
min_overlap.group = 2- JOIN section
min_overlap.name = Minimum overlap
min_overlap.flag = --min-overlap
min_overlap.help = The minimum required overlap length between two reads to provide a confident overlap.
min_overlap.type = int
min_overlap.default = 20
# Parameter max_overlap
max_overlap.group = 2- JOIN section
max_overlap.name = Maximum overlap