......@@ -4,19 +4,22 @@ import fr.inrae.toulouse.metexplore.met4j_chemUtils.FormulaParser;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DefaultWeightPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction;
import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy;
import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
import fr.inrae.toulouse.metexplore.met4j_core.utils.StringUtils;
import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.kohsuke.args4j.Option;
import java.util.regex.Pattern;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
......@@ -50,6 +53,11 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
@Option(name = "-er", aliases = {"--edgeRedundancy"}, usage = "flag as side compound any compound with a number of redundancy in incident edges (parallel edges connecting to the same neighbor) above the given threshold")
public double parallelEdge = Double.NaN;
enum strategy {by_name,by_id}
@Option(name = "-m", aliases = {"--merge"}, usage = "Degree is shared between compounds in different compartments. " +
"Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).")
public strategy mergingStrat = null;
public static void main(String[] args) throws IOException, Met4jSbmlReaderException {
......@@ -86,12 +94,32 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
//perform scan
//if merging compartment
Map<String, Integer> mergedDegree = new HashMap<>();
Boolean merge = (mergingStrat!=null);
Function<BioMetabolite,String> getSharedId = BioMetabolite::getName;
if(mergingStrat.equals(strategy.by_id)) getSharedId = (new VertexContraction.MapByIdSubString("^(\\w+)_\\w$"))::commonField;
mergedDegree = graph.vertexSet().stream().collect(
Collectors.summingInt(v -> graph.degreeOf(v))
//degree statistics
DescriptiveStatistics degreeStats = new DescriptiveStatistics();
double dt = degree;
if (!Double.isNaN(degreePrecentile)) {
for (BioMetabolite v : graph.vertexSet()) {
if (merge){
dt = degreeStats.getPercentile(degreePrecentile);
......@@ -120,7 +148,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
StringBuffer l = new StringBuffer(v.getId());
if (reportValue) l.append("\t" + v.getName());
int d = graph.degreeOf(v);
int d = merge ? mergedDegree.get(getSharedId.apply(v)) : graph.degreeOf(v);
boolean sideFromDegree = (d >= degree);
if (sideFromDegree) side = true;
if (reportValue) l.append("\t" + d);
......@@ -137,7 +165,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
if (flagInorganic || flagNoFormula) {
String formula = v.getChemicalFormula();
String inorganic = "?";
String validForumla = "true";
String validFormula = "true";
FormulaParser fp = new FormulaParser(formula);
......@@ -150,14 +178,14 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
}catch(IllegalArgumentException e){
validForumla = "false";
validFormula = "false";
side = true;
if (reportValue){
if(flagInorganic) l.append("\t" + inorganic);
if(flagNoFormula) l.append("\t" + validForumla);
if(flagNoFormula) l.append("\t" + validFormula);
......@@ -185,7 +213,7 @@ public class SideCompoundsScan extends AbstractMet4jApplication {
public String getLongDescription() {
return this.getShortDescription() + "\n" +
"Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.\n" +
"Side compounds tends to be ubiquitous and not specific to a particular biochemical or physiological process.\n" +
"Side compounds tend to be ubiquitous and not specific to a particular biochemical or physiological process." +
"Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, " +
"they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.\n" +
"This tool attempts to propose a list of side compounds according to specific criteria: \n" +
