Commit 05565c14 authored by Penom Nom's avatar Penom Nom
Browse files

Add filter by replicates evidence.

parent cfaa03e0
......@@ -28,11 +28,13 @@ from weaver.function import PythonFunction
from weaver.abstraction import Map
def merge_samples( input_biom, merge_dump, output_biom ):
def merge_samples( input_biom, merge_dump, output_biom, min_evidence_nb=None ):
"""
@summary : Adds counts and merges metadata of a list of samples.
@param input_biom : [str] path to the biom file processed.
@param merge_dump : [str] path to the dump file with the rules to
@param merge_dump : [str] path to the dump file with the rules to merge.
@param min_evidence_nb : [int] the minimun number of replicates with the
observation (@see Biom.reset_count_by_replicates_evidence).
"""
import pickle
from workflows.gene_diversity.lib.Biom import BiomIO
......@@ -41,13 +43,18 @@ def merge_samples( input_biom, merge_dump, output_biom ):
merge = open(merge_dump, "rb")
merge_groups = pickle.load(merge)
merge.close()
# Merge
biom = BiomIO.from_json( input_biom )
# Filter on evidence
if min_evidence_nb is not None:
for merged_sample in merge_groups.keys():
biom.reset_count_by_replicates_evidence( merge_groups[merged_sample], int(min_evidence_nb) )
biom.filter_OTU_by_count( 1 )
# Merge
for merged_sample in merge_groups.keys():
biom.merge_samples( merge_groups[merged_sample], merged_sample )
BiomIO.write( output_biom, biom )
def filter_and_bootstrap( input_biom, output_biom, observation_threshold, nb_deleted, nb_selected, nb_round ):
"""
@summary :
......@@ -128,6 +135,7 @@ class BiomSampling (Analysis):
self.distance_method = distance_method
self.linkage_method = linkage_method
self.merge_groups = merge_groups
self.min_evidence_nb = 2
# Files
self.input_bioms = InputFileList( biom )
......@@ -149,6 +157,8 @@ class BiomSampling (Analysis):
self.software = "-"
self.options = "sampling deleted=" + str(self.nb_deleted) + " selected=" + str(self.nb_selected) + " round=" + str(self.nb_round) + " obs_min=" + str(self.observation_threshold) +\
";hierarchical_clustering distance=" + self.distance_method + " linkage=" + self.linkage_method
if self.merge_groups is not None:
self.options = "replicate_filter min_evidence=" + str(self.min_evidence_nb) + ";" + self.options
def get_version(self):
return "-"
......@@ -185,7 +195,7 @@ class BiomSampling (Analysis):
merge_dump = open( merge_dump_path, "wb" )
pickle.dump( self.merge_groups[idx], merge_dump )
merge_dump.close()
merge = PythonFunction( merge_samples, cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr )
merge = PythonFunction( merge_samples, cmd_format='{EXE} {IN} {OUT} ' + str(self.min_evidence_nb) + ' 2>> ' + self.stderr )
MultiMap( merge, inputs=[self.input_bioms, merge_dumps], outputs=tmp_files )
# Process filter and normalisation
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment