Commit 335881cf authored by Clement Frainay's avatar Clement Frainay
Browse files

Handle multiple compartments in side compound scan.

This is aimed at improving side compound detection when using models with compartments. The traditional filter by degree often removes H2O and other cofactors from cytosol, but those compounds usually remains in compartments with fewer biochemical activities, thus with lower overall degree for their constituents
parent bf25b558
...@@ -4,19 +4,22 @@ import fr.inrae.toulouse.metexplore.met4j_chemUtils.FormulaParser; ...@@ -4,19 +4,22 @@ import fr.inrae.toulouse.metexplore.met4j_chemUtils.FormulaParser;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DefaultWeightPolicy; import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DefaultWeightPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction;
import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
import fr.inrae.toulouse.metexplore.met4j_core.utils.StringUtils;
import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.kohsuke.args4j.Option; import org.kohsuke.args4j.Option;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.util.regex.Pattern; import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
/** /**
* *
...@@ -50,6 +53,11 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -50,6 +53,11 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
@Option(name = "-er", aliases = {"--edgeRedundancy"}, usage = "flag as side compound any compound with a number of redundancy in incident edges (parallel edges connecting to the same neighbor) above the given threshold") @Option(name = "-er", aliases = {"--edgeRedundancy"}, usage = "flag as side compound any compound with a number of redundancy in incident edges (parallel edges connecting to the same neighbor) above the given threshold")
public double parallelEdge = Double.NaN; public double parallelEdge = Double.NaN;
enum strategy {by_name,by_id}
@Option(name = "-m", aliases = {"--merge"}, usage = "Degree is shared between compounds in different compartments. " +
"Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).")
public strategy mergingStrat = null;
public static void main(String[] args) throws IOException, Met4jSbmlReaderException { public static void main(String[] args) throws IOException, Met4jSbmlReaderException {
...@@ -86,13 +94,33 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -86,13 +94,33 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
//perform scan //perform scan
//------------ //------------
System.err.println("Scaning..."); System.err.println("Scaning...");
//if merging compartment
Map<String, Integer> mergedDegree = new HashMap<>();
Boolean merge = (mergingStrat!=null);
Function<BioMetabolite,String> getSharedId = BioMetabolite::getName;
if(merge){
if(mergingStrat.equals(strategy.by_id)) getSharedId = (new VertexContraction.MapByIdSubString("^(\\w+)_\\w$"))::commonField;
mergedDegree = graph.vertexSet().stream().collect(
Collectors.groupingBy(
getSharedId,
Collectors.summingInt(v -> graph.degreeOf(v))
)
);
}
//degree statistics //degree statistics
DescriptiveStatistics degreeStats = new DescriptiveStatistics(); DescriptiveStatistics degreeStats = new DescriptiveStatistics();
double dt = degree; double dt = degree;
if (!Double.isNaN(degreePrecentile)) { if (!Double.isNaN(degreePrecentile)) {
for (BioMetabolite v : graph.vertexSet()) { for (BioMetabolite v : graph.vertexSet()) {
if (merge){
degreeStats.addValue(mergedDegree.get(getSharedId.apply(v)));
}else{
degreeStats.addValue(graph.degreeOf(v)); degreeStats.addValue(graph.degreeOf(v));
} }
}
dt = degreeStats.getPercentile(degreePrecentile); dt = degreeStats.getPercentile(degreePrecentile);
} }
...@@ -120,7 +148,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -120,7 +148,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
StringBuffer l = new StringBuffer(v.getId()); StringBuffer l = new StringBuffer(v.getId());
if (reportValue) l.append("\t" + v.getName()); if (reportValue) l.append("\t" + v.getName());
int d = graph.degreeOf(v); int d = merge ? mergedDegree.get(getSharedId.apply(v)) : graph.degreeOf(v);
boolean sideFromDegree = (d >= degree); boolean sideFromDegree = (d >= degree);
if (sideFromDegree) side = true; if (sideFromDegree) side = true;
if (reportValue) l.append("\t" + d); if (reportValue) l.append("\t" + d);
...@@ -137,7 +165,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -137,7 +165,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
if (flagInorganic || flagNoFormula) { if (flagInorganic || flagNoFormula) {
String formula = v.getChemicalFormula(); String formula = v.getChemicalFormula();
String inorganic = "?"; String inorganic = "?";
String validForumla = "true"; String validFormula = "true";
try{ try{
FormulaParser fp = new FormulaParser(formula); FormulaParser fp = new FormulaParser(formula);
if(flagInorganic){ if(flagInorganic){
...@@ -150,14 +178,14 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -150,14 +178,14 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
} }
}catch(IllegalArgumentException e){ }catch(IllegalArgumentException e){
if(flagNoFormula){ if(flagNoFormula){
validForumla = "false"; validFormula = "false";
side = true; side = true;
} }
} }
if (reportValue){ if (reportValue){
if(flagInorganic) l.append("\t" + inorganic); if(flagInorganic) l.append("\t" + inorganic);
if(flagNoFormula) l.append("\t" + validForumla); if(flagNoFormula) l.append("\t" + validFormula);
} }
} }
...@@ -185,7 +213,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication { ...@@ -185,7 +213,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
public String getLongDescription() { public String getLongDescription() {
return this.getShortDescription() + "\n" + return this.getShortDescription() + "\n" +
"Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.\n" + "Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.\n" +
"Side compounds tends to be ubiquitous and not specific to a particular biochemical or physiological process.\n" + "Side compounds tend to be ubiquitous and not specific to a particular biochemical or physiological process." +
"Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, " + "Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, " +
"they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.\n" + "they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.\n" +
"This tool attempts to propose a list of side compounds according to specific criteria: \n" + "This tool attempts to propose a list of side compounds according to specific criteria: \n" +
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment