From af6c77655f4afca9181910e4d8d78a4c24a1aac4 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Fri, 23 Sep 2022 20:23:07 +0200 Subject: [PATCH 1/6] draft class and app --- .../computation/analyze/SourcesAndSinks.java | 159 +++++++++++++++ .../MetabolicSeedsAndTargets.java | 192 ++++++++++++++++++ 2 files changed, 351 insertions(+) create mode 100644 met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java create mode 100644 met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java new file mode 100644 index 00000000..08e464f9 --- /dev/null +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -0,0 +1,159 @@ +package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioCompartment; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; + +import java.util.HashSet; +import java.util.Set; + +/** + * Select nodes according to their neighborhood status, as sinks (no successors) or sources (no predecessor). + * Metabolic sources and sinks are useful for identifying medium requirements and metabolic capability. However, + * for metabolic networks, relevant sources and sinks may be relevant only if present in an extracellular compartment, + * and by transposing neighborhood status of their intracellular counterparts. This class provides means to account for such specificities. + */ +public class SourcesAndSinks { + + boolean useInternal = false; + boolean keepIsolated = false; + boolean source = false; + boolean notsource = false; + boolean sink = false; + boolean notsink = false; + + private BioCollection candidates; + private CompoundGraph graph; + + /** + * Setting to use if sources & sinks are relevant only if accessible from extracellular compartments. + * Since "real" sinks and sources in intracellular compartment(s) may be involved in transport/exchange reactions + * reversible by default, thus not allowing extracellular source or sink, an option allows to take + * the degree (minus extracellular neighbors) of intracellular counterparts. + * + * @param externals set of compounds in compartment of interest. + * @param useInternalsForDegree select nodes according to the degree of their counterparts in other compartments. + * @return + */ + public SourcesAndSinks fromExternalCompartment(BioCollection externals, boolean useInternalsForDegree){ + this.useInternal = useInternalsForDegree; + this.candidates=candidates; + return this; + } + + /** + * Select all compound with no producing reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectSources(boolean select) { + this.source = select; + return this; + } + + /** + * Select all compound with no consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectSinks(boolean select) { + this.sink = select; + return this; + } + + /** + * Select all compound with at least one consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectNonSinks(boolean select) { + this.notsink = select; + return this; + } + + /** + * Select all compound with at least one producing reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectNonSource(boolean select) { + this.notsource = select; + return this; + } + + public SourcesAndSinks(CompoundGraph graph){ + this.graph=graph; + } + + /** + * get all compounds that match the selected neighborhood status + * @return selected compounds + */ + public BioCollection getSelection(){ + if(!(source||sink||notsink||notsource)){ + System.err.println("[Warn] Sources and Sinks: no type selected, will return empty list"); + return new BioCollection<>(); + } + if(candidates.isEmpty()) candidates = new BioCollection<>(graph.vertexSet()); + + if((source&¬source)||(sink&¬sink)){ + System.err.println("[Warn] Sources and Sinks: complementary types selected (such as source and not sources), will return all evaluated nodes"); + return candidates; + } + + return compute(); + } + + + private BioCollection compute(){ + + //Evaluate Candidates + BioCollection res = new BioCollection<>(); + for(BioMetabolite v : candidates){ + int inDegree = useInternal ? getInternalDegree(graph,v, candidates, false) : graph.inDegreeOf(v); + int outDegree = useInternal ? getInternalDegree(graph,v, candidates, true) : graph.outDegreeOf(v); + if(keepIsolated || (inDegree+outDegree>=0)){ + if(inDegree==0){ + if(source) res.add(v); + } else if (notsource) { + res.add(v); + } + + if(outDegree==0){ + if(sink) res.add(v); + } else if (notsink) { + res.add(v); + } + } + } + + return res; + } + + + private int getInternalDegree(CompoundGraph g, BioMetabolite v, BioCollection externalComp , Boolean out){ + Set internal = new HashSet<>(); + if(out){ + internal.addAll(g.predecessorListOf(v)); + }else{ + internal.addAll(g.successorListOf(v)); + } + internal.removeAll(externalComp); + int degree = 0; + for(BioMetabolite neighbor : internal){ + if(out){ + Set n = g.successorListOf(neighbor); + n.removeAll(externalComp); + degree+= n.size(); + }else{ + Set n = g.predecessorListOf(neighbor); + n.removeAll(externalComp); + degree+= n.size(); + } + } + return degree; + } +} diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java new file mode 100644 index 00000000..b7ccb2e0 --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java @@ -0,0 +1,192 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioCompartment; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.SourcesAndSinks; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import org.kohsuke.args4j.Option; + +import java.io.FileWriter; +import java.io.IOException; + +public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { + + @Format(name= EnumFormats.Sbml) + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-i", aliases = {"--inputSBML"}, usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-sc", aliases = {"--sideFile"}, usage = "input Side compound file", required = false) + public String inputSide = null; + + @ParameterType(name= EnumParameterTypes.OutputFile) + @Option(name = "-o", aliases = {"--output"}, usage = "output seeds file", required = true) + public String outputPath = null; + + @ParameterType(name= EnumParameterTypes.Text) + @Option(name = "-c", aliases = {"--comp"}, usage = "Selected compartment(s), as model identifiers, separated by \"+\" sign if more than one", required = false) + public String comp = null; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-s", aliases = {"--source"}, usage = "export sources", required = false) + public boolean source = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-t", aliases = {"--sink"}, usage = "export sinks", required = false) + public boolean sink = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-!s", aliases = {"--notsource"}, usage = "export nodes that are not source", required = false) + public boolean notsource = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-!t", aliases = {"--notsink"}, usage = "export nodes that are not sinks", required = false) + public boolean notsink = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-is", aliases = {"--keepIsolated"}, usage = "do not ignore isolated nodes, consider isolated both source and sink", required = false) + public boolean keepIsolated = false; + + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-in", aliases = {"--internal"}, usage = "if an external compartment is defined, adjust degree by considering internal counterpart", required = false) + public boolean useInternal = false; + + + + + + public static void main(String[] args) { + + MetabolicSeedsAndTargets app = new MetabolicSeedsAndTargets(); + + app.parseArguments(args); + + app.run(); + + } + + public void run() { + //open file + FileWriter fw = null; + try { + fw = new FileWriter(outputPath); + } catch (IOException e) { + System.err.println("Error while opening the output file"); + System.err.println(e.getMessage()); + System.exit(1); + } + + //import network + System.err.println("reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + BioNetwork network = null; + try { + network = reader.read(); + } catch (Met4jSbmlReaderException e) { + System.err.println("Error while reading the SBML file"); + System.err.println(e.getMessage()); + System.exit(1); + } + + //Create compound graph + System.err.println("Creating network..."); + Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); + CompoundGraph graph = builder.getCompoundGraph(); + + //Graph processing: side compound removal [optional] + if (inputSide != null) { + System.err.println("removing side compounds..."); + NodeMapping mapper = new NodeMapping<>(graph).skipIfNotFound(); + BioCollection sideCpds = null; + try { + sideCpds = mapper.map(inputSide); + } catch (IOException e) { + System.err.println("Error while reading the side compound file"); + System.err.println(e.getMessage()); + System.exit(1); + } + boolean removed = graph.removeAllVertices(sideCpds); + if (removed) System.err.println(sideCpds.size() + " compounds removed."); + } + + //compute seeds and targets + SourcesAndSinks ss = new SourcesAndSinks(graph) + .selectNonSinks(notsink) + .selectSinks(sink) + .selectSources(source) + .selectNonSource(notsource); + if (comp != null) { + ss = ss.fromExternalCompartment(getCandidates(network, graph), useInternal); + } + BioCollection res = ss.getSelection(); + + //export results + try { + for (BioMetabolite m : res) { + fw.write(m.getId() + "\n"); + } + fw.close(); + } catch (IOException e) { + System.err.println("Error while writing the result file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.err.println("done."); + + + } + + private BioCollection getCandidates(BioNetwork network, CompoundGraph graph){ + //Select Candidates + BioCollection compoundSet = new BioCollection<>(); + if(comp!=null){ + //for each "external" (available) compartment + for(String id : comp.split("\\+")){ + BioCompartment c = network.getCompartmentsView().get(id); + if(c!=null){ + //add compound graph nodes belonging to external compartment as candidate + for(BioEntity e : c.getComponentsView()){ + if(graph.vertexSet().contains(e)) compoundSet.add((BioMetabolite) e); + } + }else{ + System.out.println("Error: Compartment "+id+" not found in network, please check sbml file."); + } + } + }else{ + compoundSet.addAll(graph.vertexSet()); + } + return compoundSet; + } + + + + @Override + public String getLabel() { + return null; + } + + @Override + public String getLongDescription() { + return null; + } + + @Override + public String getShortDescription() { + return null; + } +} -- GitLab From 30d27e20be6894a1fb30b20679edbb29a2961570 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Tue, 27 Sep 2022 16:30:02 +0200 Subject: [PATCH 2/6] add option for intermediary + test + fix nullpointer for isolated + ignore external only --- .../computation/analyze/SourcesAndSinks.java | 55 +++- .../met4j_graph/TestSourcesAndSinks.java | 237 ++++++++++++++++++ .../MetabolicSeedsAndTargets.java | 2 +- 3 files changed, 280 insertions(+), 14 deletions(-) create mode 100644 met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java index 08e464f9..120cb888 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -1,9 +1,6 @@ package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; -import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioCompartment; -import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; -import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; @@ -24,6 +21,7 @@ public class SourcesAndSinks { boolean notsource = false; boolean sink = false; boolean notsink = false; + boolean notany = false; private BioCollection candidates; private CompoundGraph graph; @@ -40,7 +38,7 @@ public class SourcesAndSinks { */ public SourcesAndSinks fromExternalCompartment(BioCollection externals, boolean useInternalsForDegree){ this.useInternal = useInternalsForDegree; - this.candidates=candidates; + this.candidates=externals; return this; } @@ -79,11 +77,32 @@ public class SourcesAndSinks { * @param select * @return */ - public SourcesAndSinks selectNonSource(boolean select) { + public SourcesAndSinks selectNonSources(boolean select) { this.notsource = select; return this; } + /** + * Select all compounds with at least one producing reaction and one consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectIntermediaries(boolean select) { + this.notany = select; + return this; + } + + /** + * Consider isolated count both as sinks and sources. If set to false, isolated compounds are ignored. + * Default set to false; + * @param select + * @return + */ + public SourcesAndSinks keepIsolated(boolean select) { + this.keepIsolated = select; + return this; + } + public SourcesAndSinks(CompoundGraph graph){ this.graph=graph; } @@ -93,11 +112,11 @@ public class SourcesAndSinks { * @return selected compounds */ public BioCollection getSelection(){ - if(!(source||sink||notsink||notsource)){ + if(!(source||sink||notsink||notsource||notany)){ System.err.println("[Warn] Sources and Sinks: no type selected, will return empty list"); return new BioCollection<>(); } - if(candidates.isEmpty()) candidates = new BioCollection<>(graph.vertexSet()); + if(candidates==null || candidates.isEmpty()) candidates = new BioCollection<>(graph.vertexSet()); if((source&¬source)||(sink&¬sink)){ System.err.println("[Warn] Sources and Sinks: complementary types selected (such as source and not sources), will return all evaluated nodes"); @@ -115,7 +134,10 @@ public class SourcesAndSinks { for(BioMetabolite v : candidates){ int inDegree = useInternal ? getInternalDegree(graph,v, candidates, false) : graph.inDegreeOf(v); int outDegree = useInternal ? getInternalDegree(graph,v, candidates, true) : graph.outDegreeOf(v); - if(keepIsolated || (inDegree+outDegree>=0)){ + //ignore external only + if(inDegree!=-1 && outDegree!=-1 && + //ignore isolated + !(!keepIsolated && (inDegree+outDegree==0))){ if(inDegree==0){ if(source) res.add(v); } else if (notsource) { @@ -127,6 +149,10 @@ public class SourcesAndSinks { } else if (notsink) { res.add(v); } + + if(notany && outDegree>0 && inDegree>0){ + res.add(v); + } } } @@ -135,13 +161,16 @@ public class SourcesAndSinks { private int getInternalDegree(CompoundGraph g, BioMetabolite v, BioCollection externalComp , Boolean out){ + //get internal neighbor counterpart Set internal = new HashSet<>(); - if(out){ - internal.addAll(g.predecessorListOf(v)); - }else{ - internal.addAll(g.successorListOf(v)); - } + internal.addAll(g.neighborListOf(v)); + //remove all neighbors that are not internal internal.removeAll(externalComp); + + //ignore compounds with no internal counterparts + if(internal.isEmpty()) return -1; + + //compute degree of internal counterpart int degree = 0; for(BioMetabolite neighbor : internal){ if(out){ diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java new file mode 100644 index 00000000..3f77830d --- /dev/null +++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java @@ -0,0 +1,237 @@ +package fr.inrae.toulouse.metexplore.met4j_graph; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.SourcesAndSinks; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestSourcesAndSinks { + + + public static CompoundGraph cg; + + public static BioMetabolite a,b,c,d,f,g,h1, h2; + public static BioMetabolite ae,be,ce,de,d0,d2,fe,ge,he; + public static BioMetabolite x,y,z; + + public static ReactionEdge aae, bbe, cce, dde, ffe, gge, h1he, h2he; + public static ReactionEdge aea, beb, cec, ded, d0de, ded2, geg, heh1, heh2; + public static ReactionEdge xa, bx, xc, cy, dx, xf, fy, xh1, h2y, zx; + + public static BioCollection ext; + + @BeforeClass + public static void init() { + cg = new CompoundGraph(); + + //external metabolites + ae= new BioMetabolite("ae"); cg.addVertex(ae); + be= new BioMetabolite("be"); cg.addVertex(be); + ce= new BioMetabolite("ce"); cg.addVertex(ce); + de= new BioMetabolite("de"); cg.addVertex(de); + d0= new BioMetabolite("d0e"); cg.addVertex(d0); + d2= new BioMetabolite("d2e"); cg.addVertex(d2); + d0de=new ReactionEdge(d0,de, new BioReaction("external_rxn"));cg.addEdge(d0de); + ded2=new ReactionEdge(de,d2, new BioReaction("external_rxn"));cg.addEdge(ded2); + fe= new BioMetabolite("fe"); cg.addVertex(fe); + ge= new BioMetabolite("ge"); cg.addVertex(ge); + he= new BioMetabolite("h1h2e"); cg.addVertex(he); + ext = new BioCollection<>(); + ext.add(ae,be,ce,de,d0,d2,fe,ge,he); + + //internal "interface" metabolites (can be transported to external) + a= new BioMetabolite("ai"); cg.addVertex(a); + aae=new ReactionEdge(a,ae, new BioReaction("transport"));cg.addEdge(aae); + aea=new ReactionEdge(ae,a, new BioReaction("transport"));cg.addEdge(aea); + b= new BioMetabolite("bi"); cg.addVertex(b); + bbe=new ReactionEdge(b,be, new BioReaction("transport"));cg.addEdge(bbe); + beb=new ReactionEdge(be,b, new BioReaction("transport"));cg.addEdge(beb); + c= new BioMetabolite("ci"); cg.addVertex(c); + cce=new ReactionEdge(c,ce, new BioReaction("transport"));cg.addEdge(cce); + cec=new ReactionEdge(ce,c, new BioReaction("transport"));cg.addEdge(cec); + d= new BioMetabolite("di"); cg.addVertex(d); + dde=new ReactionEdge(d,de, new BioReaction("transport"));cg.addEdge(dde); + ded=new ReactionEdge(de,d, new BioReaction("transport"));cg.addEdge(ded); + f= new BioMetabolite("fi"); cg.addVertex(f); + ffe=new ReactionEdge(f,fe, new BioReaction("transport"));cg.addEdge(ffe); + g= new BioMetabolite("gi"); cg.addVertex(g); + gge=new ReactionEdge(g,ge, new BioReaction("transport"));cg.addEdge(gge); + geg=new ReactionEdge(ge,g, new BioReaction("transport"));cg.addEdge(geg); + h1= new BioMetabolite("h1i"); cg.addVertex(h1); + h1he=new ReactionEdge(h1,he, new BioReaction("transport"));cg.addEdge(h1he); + heh1=new ReactionEdge(he,h1, new BioReaction("transport"));cg.addEdge(heh1); + h2= new BioMetabolite("h2i"); cg.addVertex(h2); + h2he=new ReactionEdge(h2,he, new BioReaction("transport"));cg.addEdge(h2he); + heh2=new ReactionEdge(he,h2, new BioReaction("transport"));cg.addEdge(heh2); + + //internal metabolites + x=new BioMetabolite("xi");cg.addVertex(x); + y=new BioMetabolite("yi");cg.addVertex(y); + z=new BioMetabolite("zi");cg.addVertex(z); + zx= new ReactionEdge(z,x, new BioReaction("internal_rxn"));cg.addEdge(zx);//z is internal source + + xa=new ReactionEdge(x,a, new BioReaction("internal_rxn"));cg.addEdge(xa);//a is not a source -> a is sink + bx=new ReactionEdge(b,x, new BioReaction("internal_rxn"));cg.addEdge(bx);//b is not a sink -> b is source + xc=new ReactionEdge(x,c, new BioReaction("internal_rxn"));cg.addEdge(xc);//c is not a source + cy=new ReactionEdge(c,y, new BioReaction("internal_rxn"));cg.addEdge(cy);//c is not a sink -> c is intermediary + dx=new ReactionEdge(d,x, new BioReaction("internal_rxn"));cg.addEdge(dx);//d is not a sink -> d is source + xf=new ReactionEdge(x,f, new BioReaction("internal_rxn"));cg.addEdge(xf);//f is not a source + fy=new ReactionEdge(f,y, new BioReaction("internal_rxn"));cg.addEdge(fy);//f is not a sink -> f is intermediary + //g is a source, g is a sink, g is isolated + xh1=new ReactionEdge(x,h1, new BioReaction("internal_rxn"));cg.addEdge(xh1);//h1 is not a source, h is not a source + h2y=new ReactionEdge(h2,y, new BioReaction("internal_rxn"));cg.addEdge(h2y);//h2 is not a sink, h is not a sink -> h is intermediary + + } + + @Test + public void testSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources",3, res.size()); + assertTrue("wrong sources", res.contains(be)); + assertTrue("wrong sources", res.contains(de)); + assertTrue("wrong sources", res.contains(ge)); + } + + @Test + public void testNoSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSources(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of not sources",4, res.size()); + assertTrue("wrong not sources", res.contains(ae)); + assertTrue("wrong not sources", res.contains(ce)); + assertTrue("wrong not sources", res.contains(fe)); + assertTrue("wrong not sources", res.contains(he)); + } + + @Test + public void testSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of sink",2, res.size()); + assertTrue("wrong sink", res.contains(ae)); + assertTrue("wrong sink", res.contains(ge)); + + } + + @Test + public void testIntermediary(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectIntermediaries(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of intermediaries",3, res.size()); + assertTrue("wrong intermediary", res.contains(ce)); + assertTrue("wrong intermediary", res.contains(fe)); + assertTrue("wrong intermediary", res.contains(he)); + + } + + @Test + public void testNoSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSinks(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of not sink",5, res.size()); + assertTrue("wrong not sink", res.contains(be)); + assertTrue("wrong not sink", res.contains(ce)); + assertTrue("wrong not sink", res.contains(de)); + assertTrue("wrong not sink", res.contains(fe)); + assertTrue("wrong not sink", res.contains(he)); + } + + @Test + public void testWholeSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",2, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(z)); + } + + @Test + public void testSourcesNoDegreeAdjust(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,false) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",1, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + } + + @Test + public void testWholeSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of sink (no external/internal adjustment)",3, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(y)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + } + + @Test + public void testSinkNoDegreeAdjust(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,false) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of sink (no external/internal adjustment)",2, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + } + + @Test + public void testSourceNoIsolated(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of sources (no isaolated)",2, res.size()); + assertTrue("wrong sources (no isaolated)", res.contains(be)); + assertTrue("wrong sources (no isaolated)", res.contains(de)); + } + + @Test + public void testSinkNoIsolated(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,true); + BioCollection res = ss.getSelection(); + System.out.println(res); + assertEquals("wrong number of sink (no isaolated)",1, res.size()); + assertTrue("wrong sink (no isaolated)", res.contains(ae)); + } + +} \ No newline at end of file diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java index b7ccb2e0..e9d0590b 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java @@ -129,7 +129,7 @@ public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { .selectNonSinks(notsink) .selectSinks(sink) .selectSources(source) - .selectNonSource(notsource); + .selectNonSources(notsource); if (comp != null) { ss = ss.fromExternalCompartment(getCandidates(network, graph), useInternal); } -- GitLab From 378ca0e2485cac84520779fc5ada6e4617ae7534 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Fri, 30 Sep 2022 10:06:34 +0200 Subject: [PATCH 3/6] Add Borenstein Algorithm --- .../computation/analyze/SourcesAndSinks.java | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java index 120cb888..dfaabd0e 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -3,8 +3,14 @@ package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import org._3pq.jgrapht.DirectedGraph; +import org._3pq.jgrapht.alg.StrongConnectivityInspector; +import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; /** @@ -23,6 +29,8 @@ public class SourcesAndSinks { boolean notsink = false; boolean notany = false; + boolean useBorensteinAlgorithm = false; + private BioCollection candidates; private CompoundGraph graph; @@ -72,6 +80,19 @@ public class SourcesAndSinks { return this; } + /** + * Define seeds and targets (outputs) using the Borenstein et al. algorithm (see Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105) + * This method consider strongly connected components rather than individual nodes, thus, members of cycles can be considered as seed. + * A sink from an external compartment can however be connected to a non sink internal counterpart, thus highlighting what could end up in the external compartment rather than what must be exported. + * This option will ignore the useInternalsForDegree option. + * @param use + * @return + */ + public SourcesAndSinks useBorensteinAlgorithm(boolean use) { + this.useBorensteinAlgorithm = use; + return this; + } + /** * Select all compound with at least one producing reaction for export * @param select @@ -123,7 +144,11 @@ public class SourcesAndSinks { return candidates; } - return compute(); + if(useBorensteinAlgorithm){ + return computeBorensteinAlgorithm(); + }else{ + return compute(); + } } @@ -159,6 +184,51 @@ public class SourcesAndSinks { return res; } + private BioCollection computeBorensteinAlgorithm(){ + + //Evaluate Candidates + BioCollection res = new BioCollection<>(); + KosarajuStrongConnectivityInspector sccComputor = new KosarajuStrongConnectivityInspector<>(graph); + List> scc = sccComputor.stronglyConnectedSets(); + + for(Set cc : scc){ + Set sccCandidates = new HashSet<>(cc); + sccCandidates.retainAll(candidates); + if(!sccCandidates.isEmpty()) { + int inDegree = 0; + int outDegree = 0; + //Same as considering a condensation graph, where a whole strongly connected component is condensed into a single node + //This will sum up, for each component's element, all successors and predecessors outside the component + for (BioMetabolite v : cc) { + Set predecessors = graph.predecessorListOf(v); + predecessors.removeAll(cc); + inDegree += predecessors.size(); + Set successors = graph.successorListOf(v); + successors.removeAll(cc); + outDegree += successors.size(); + } + if (!(!keepIsolated && (inDegree + outDegree == 0))) { + if (inDegree == 0) { + if (source) res.addAll(sccCandidates); + } else if (notsource) { + res.addAll(sccCandidates); + } + + if (outDegree == 0) { + if (sink) res.addAll(sccCandidates); + } else if (notsink) { + res.addAll(sccCandidates); + } + + if (notany && outDegree > 0 && inDegree > 0) { + res.addAll(sccCandidates); + } + } + } + } + return res; + } + private int getInternalDegree(CompoundGraph g, BioMetabolite v, BioCollection externalComp , Boolean out){ //get internal neighbor counterpart -- GitLab From 67c708314931bf5706e9733126648087cc14d6c0 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Fri, 30 Sep 2022 17:44:27 +0200 Subject: [PATCH 4/6] fix isolated node Borenstein algorithm, add test cases --- .../computation/analyze/SourcesAndSinks.java | 2 +- .../met4j_graph/TestSourcesAndSinks.java | 134 ++++++++++++++++-- 2 files changed, 123 insertions(+), 13 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java index dfaabd0e..6722afef 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -207,7 +207,7 @@ public class SourcesAndSinks { successors.removeAll(cc); outDegree += successors.size(); } - if (!(!keepIsolated && (inDegree + outDegree == 0))) { + if (!(!keepIsolated && (inDegree + outDegree == 0) && cc.size()==1)) { if (inDegree == 0) { if (source) res.addAll(sccCandidates); } else if (notsource) { diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java index 3f77830d..a4bf973a 100644 --- a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java +++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java @@ -19,11 +19,11 @@ public class TestSourcesAndSinks { public static BioMetabolite a,b,c,d,f,g,h1, h2; public static BioMetabolite ae,be,ce,de,d0,d2,fe,ge,he; - public static BioMetabolite x,y,z; + public static BioMetabolite v,w,x,y,z; public static ReactionEdge aae, bbe, cce, dde, ffe, gge, h1he, h2he; public static ReactionEdge aea, beb, cec, ded, d0de, ded2, geg, heh1, heh2; - public static ReactionEdge xa, bx, xc, cy, dx, xf, fy, xh1, h2y, zx; + public static ReactionEdge xa, bx, xc, cy, dx, vf, fw, wv, xh1, h2y, zx; public static BioCollection ext; @@ -75,6 +75,8 @@ public class TestSourcesAndSinks { x=new BioMetabolite("xi");cg.addVertex(x); y=new BioMetabolite("yi");cg.addVertex(y); z=new BioMetabolite("zi");cg.addVertex(z); + v=new BioMetabolite("vi");cg.addVertex(v); + w=new BioMetabolite("wi");cg.addVertex(w); zx= new ReactionEdge(z,x, new BioReaction("internal_rxn"));cg.addEdge(zx);//z is internal source xa=new ReactionEdge(x,a, new BioReaction("internal_rxn"));cg.addEdge(xa);//a is not a source -> a is sink @@ -82,14 +84,129 @@ public class TestSourcesAndSinks { xc=new ReactionEdge(x,c, new BioReaction("internal_rxn"));cg.addEdge(xc);//c is not a source cy=new ReactionEdge(c,y, new BioReaction("internal_rxn"));cg.addEdge(cy);//c is not a sink -> c is intermediary dx=new ReactionEdge(d,x, new BioReaction("internal_rxn"));cg.addEdge(dx);//d is not a sink -> d is source - xf=new ReactionEdge(x,f, new BioReaction("internal_rxn"));cg.addEdge(xf);//f is not a source - fy=new ReactionEdge(f,y, new BioReaction("internal_rxn"));cg.addEdge(fy);//f is not a sink -> f is intermediary + vf=new ReactionEdge(v,f, new BioReaction("internal_rxn"));cg.addEdge(vf);//f is not a source + fw=new ReactionEdge(f,w, new BioReaction("internal_rxn"));cg.addEdge(fw);//f is not a sink -> f is intermediary + wv=new ReactionEdge(w,v, new BioReaction("internal_rxn"));cg.addEdge(wv); //g is a source, g is a sink, g is isolated xh1=new ReactionEdge(x,h1, new BioReaction("internal_rxn"));cg.addEdge(xh1);//h1 is not a source, h is not a source h2y=new ReactionEdge(h2,y, new BioReaction("internal_rxn"));cg.addEdge(h2y);//h2 is not a sink, h is not a sink -> h is intermediary } + @Test + public void testSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources",3, res.size()); + assertTrue("wrong sources", res.contains(be)); + assertTrue("wrong sources", res.contains(d0)); + assertTrue("wrong sources", res.contains(ge)); + } + + @Test + public void testNoSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSources(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sources",6, res.size()); + assertTrue("wrong not sources", res.contains(ae)); + assertTrue("wrong not sources", res.contains(ce)); + assertTrue("wrong not sources", res.contains(de)); + assertTrue("wrong not sources", res.contains(d2)); + assertTrue("wrong not sources", res.contains(fe)); + assertTrue("wrong not sources", res.contains(he)); + } + + @Test + public void testSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink",4, res.size()); + assertTrue("wrong sink", res.contains(ae)); + assertTrue("wrong sink", res.contains(ge)); + assertTrue("wrong sink", res.contains(d2)); + assertTrue("wrong sink", res.contains(fe)); + + } + + @Test + public void testIntermediaryBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectIntermediaries(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of intermediaries",3, res.size()); + assertTrue("wrong intermediary", res.contains(ce)); + assertTrue("wrong intermediary", res.contains(de)); + assertTrue("wrong intermediary", res.contains(he)); + + } + + @Test + public void testNoSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSinks(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sink",5, res.size()); + assertTrue("wrong not sink", res.contains(be)); + assertTrue("wrong not sink", res.contains(ce)); + assertTrue("wrong not sink", res.contains(de)); + assertTrue("wrong not sink", res.contains(d0)); + assertTrue("wrong not sink", res.contains(he)); + } + + @Test + public void testWholeSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",9, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(be)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(b)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(z)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(ge)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(g)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(f)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(v)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(w)); + } + + @Test + public void testWholeSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink (no external/internal adjustment)",7, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(y)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(ge)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(g)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(ae)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(a)); + } + @Test public void testSources(){ SourcesAndSinks ss = new SourcesAndSinks(cg) @@ -110,7 +227,6 @@ public class TestSourcesAndSinks { .fromExternalCompartment(ext,true) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of not sources",4, res.size()); assertTrue("wrong not sources", res.contains(ae)); assertTrue("wrong not sources", res.contains(ce)); @@ -125,7 +241,6 @@ public class TestSourcesAndSinks { .fromExternalCompartment(ext,true) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of sink",2, res.size()); assertTrue("wrong sink", res.contains(ae)); assertTrue("wrong sink", res.contains(ge)); @@ -139,7 +254,6 @@ public class TestSourcesAndSinks { .fromExternalCompartment(ext,true) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of intermediaries",3, res.size()); assertTrue("wrong intermediary", res.contains(ce)); assertTrue("wrong intermediary", res.contains(fe)); @@ -154,7 +268,6 @@ public class TestSourcesAndSinks { .fromExternalCompartment(ext,true) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of not sink",5, res.size()); assertTrue("wrong not sink", res.contains(be)); assertTrue("wrong not sink", res.contains(ce)); @@ -191,7 +304,6 @@ public class TestSourcesAndSinks { .selectSinks(true) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of sink (no external/internal adjustment)",3, res.size()); assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); assertTrue("wrong sink (no external/internal adjustment)", res.contains(y)); @@ -205,7 +317,6 @@ public class TestSourcesAndSinks { .fromExternalCompartment(ext,false) .keepIsolated(true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of sink (no external/internal adjustment)",2, res.size()); assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); @@ -217,7 +328,6 @@ public class TestSourcesAndSinks { .selectSources(true) .fromExternalCompartment(ext,true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of sources (no isaolated)",2, res.size()); assertTrue("wrong sources (no isaolated)", res.contains(be)); assertTrue("wrong sources (no isaolated)", res.contains(de)); @@ -229,9 +339,9 @@ public class TestSourcesAndSinks { .selectSinks(true) .fromExternalCompartment(ext,true); BioCollection res = ss.getSelection(); - System.out.println(res); assertEquals("wrong number of sink (no isaolated)",1, res.size()); assertTrue("wrong sink (no isaolated)", res.contains(ae)); } + } \ No newline at end of file -- GitLab From 1e610390881d723dc978e809440dea44a6f09390 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Wed, 19 Oct 2022 15:25:26 +0200 Subject: [PATCH 5/6] Change app name, add doc, add Borenstein option --- .../computation/analyze/SourcesAndSinks.java | 3 -- ...dsAndTargets.java => SeedsAndTargets.java} | 28 +++++++++++++------ 2 files changed, 20 insertions(+), 11 deletions(-) rename met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/{MetabolicSeedsAndTargets.java => SeedsAndTargets.java} (75%) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java index 6722afef..021739b5 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -4,11 +4,8 @@ import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; -import org._3pq.jgrapht.DirectedGraph; -import org._3pq.jgrapht.alg.StrongConnectivityInspector; import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java similarity index 75% rename from met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java rename to met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java index e9d0590b..5e337657 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/MetabolicSeedsAndTargets.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java @@ -22,7 +22,7 @@ import org.kohsuke.args4j.Option; import java.io.FileWriter; import java.io.IOException; -public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { +public class SeedsAndTargets extends AbstractMet4jApplication { @Format(name= EnumFormats.Sbml) @ParameterType(name= EnumParameterTypes.InputFile) @@ -61,6 +61,10 @@ public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { @Option(name = "-is", aliases = {"--keepIsolated"}, usage = "do not ignore isolated nodes, consider isolated both source and sink", required = false) public boolean keepIsolated = false; + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-B", aliases = {"--useBorensteinAlg"}, usage = "use Borenstein Algorithm. Please cite Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105). ignore internal option", required = false) + public boolean useBorensteinAlg = false; + @ParameterType(name= EnumParameterTypes.Boolean) @Option(name = "-in", aliases = {"--internal"}, usage = "if an external compartment is defined, adjust degree by considering internal counterpart", required = false) @@ -72,7 +76,7 @@ public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { public static void main(String[] args) { - MetabolicSeedsAndTargets app = new MetabolicSeedsAndTargets(); + SeedsAndTargets app = new SeedsAndTargets(); app.parseArguments(args); @@ -129,7 +133,9 @@ public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { .selectNonSinks(notsink) .selectSinks(sink) .selectSources(source) - .selectNonSources(notsource); + .selectNonSources(notsource) + .keepIsolated(keepIsolated) + .useBorensteinAlgorithm(useBorensteinAlg); if (comp != null) { ss = ss.fromExternalCompartment(getCandidates(network, graph), useInternal); } @@ -176,17 +182,23 @@ public class MetabolicSeedsAndTargets extends AbstractMet4jApplication { @Override - public String getLabel() { - return null; - } + public String getLabel() {return this.getClass().getSimpleName();} @Override public String getLongDescription() { - return null; + return "Identify exogenously acquired compounds, producible compounds exogenously available and/or dead ends metabolites from metabolic network topology. " + + "Metabolic seeds and targets are useful for identifying medium requirements and metabolic capability, and thus enable analysis of metabolic ties within communities of organisms.\n" + + "This application can use seed definition and SCC-based detection algorithm by Borenstein et al. or, alternatively, degree-based sink and source detection with compartment adjustment.\n" + + "The first method (see Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105) " + + "consider strongly connected components rather than individual nodes, thus, members of cycles can be considered as seed. " + + "A sink from an external compartment can however be connected to a non sink internal counterpart, thus highlighting what could end up in the external compartment rather than what must be exported.\n" + + "The second approach is neighborhood based and identify sources and sinks. Since \"real\" sinks and sources in intracellular compartment(s) may be involved in transport/exchange reactions " + + "reversible by default, thus not allowing extracellular source or sink, an option allows to take " + + "the degree (minus extracellular neighbors) of intracellular counterparts."; } @Override public String getShortDescription() { - return null; + return "Identify exogenously acquired compounds, producible compounds exogenously available and/or dead ends metabolites from metabolic network topology"; } } -- GitLab From eae47c97fe00c471cbc06bc0841572a01b597f92 Mon Sep 17 00:00:00 2001 From: cfrainay Date: Fri, 21 Oct 2022 17:01:22 +0200 Subject: [PATCH 6/6] rename in man for uniformity --- .../met4j_toolbox/networkAnalysis/SeedsAndTargets.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java index 5e337657..d91ffee0 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java @@ -42,23 +42,23 @@ public class SeedsAndTargets extends AbstractMet4jApplication { public String comp = null; @ParameterType(name= EnumParameterTypes.Boolean) - @Option(name = "-s", aliases = {"--source"}, usage = "export sources", required = false) + @Option(name = "-s", aliases = {"--seeds"}, usage = "export seeds", required = false) public boolean source = false; @ParameterType(name= EnumParameterTypes.Boolean) - @Option(name = "-t", aliases = {"--sink"}, usage = "export sinks", required = false) + @Option(name = "-t", aliases = {"--targets"}, usage = "export targets", required = false) public boolean sink = false; @ParameterType(name= EnumParameterTypes.Boolean) - @Option(name = "-!s", aliases = {"--notsource"}, usage = "export nodes that are not source", required = false) + @Option(name = "-!s", aliases = {"--notSeed"}, usage = "export nodes that are not seed", required = false) public boolean notsource = false; @ParameterType(name= EnumParameterTypes.Boolean) - @Option(name = "-!t", aliases = {"--notsink"}, usage = "export nodes that are not sinks", required = false) + @Option(name = "-!t", aliases = {"--notTarget"}, usage = "export nodes that are not targets", required = false) public boolean notsink = false; @ParameterType(name= EnumParameterTypes.Boolean) - @Option(name = "-is", aliases = {"--keepIsolated"}, usage = "do not ignore isolated nodes, consider isolated both source and sink", required = false) + @Option(name = "-is", aliases = {"--keepIsolated"}, usage = "do not ignore isolated nodes, consider isolated both seed and target", required = false) public boolean keepIsolated = false; @ParameterType(name= EnumParameterTypes.Boolean) -- GitLab