diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java new file mode 100644 index 0000000000000000000000000000000000000000..021739b5cc52efd18f3dfe9e8fe15305c2396286 --- /dev/null +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/SourcesAndSinks.java @@ -0,0 +1,255 @@ +package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Select nodes according to their neighborhood status, as sinks (no successors) or sources (no predecessor). + * Metabolic sources and sinks are useful for identifying medium requirements and metabolic capability. However, + * for metabolic networks, relevant sources and sinks may be relevant only if present in an extracellular compartment, + * and by transposing neighborhood status of their intracellular counterparts. This class provides means to account for such specificities. + */ +public class SourcesAndSinks { + + boolean useInternal = false; + boolean keepIsolated = false; + boolean source = false; + boolean notsource = false; + boolean sink = false; + boolean notsink = false; + boolean notany = false; + + boolean useBorensteinAlgorithm = false; + + private BioCollection candidates; + private CompoundGraph graph; + + /** + * Setting to use if sources & sinks are relevant only if accessible from extracellular compartments. + * Since "real" sinks and sources in intracellular compartment(s) may be involved in transport/exchange reactions + * reversible by default, thus not allowing extracellular source or sink, an option allows to take + * the degree (minus extracellular neighbors) of intracellular counterparts. + * + * @param externals set of compounds in compartment of interest. + * @param useInternalsForDegree select nodes according to the degree of their counterparts in other compartments. + * @return + */ + public SourcesAndSinks fromExternalCompartment(BioCollection externals, boolean useInternalsForDegree){ + this.useInternal = useInternalsForDegree; + this.candidates=externals; + return this; + } + + /** + * Select all compound with no producing reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectSources(boolean select) { + this.source = select; + return this; + } + + /** + * Select all compound with no consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectSinks(boolean select) { + this.sink = select; + return this; + } + + /** + * Select all compound with at least one consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectNonSinks(boolean select) { + this.notsink = select; + return this; + } + + /** + * Define seeds and targets (outputs) using the Borenstein et al. algorithm (see Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105) + * This method consider strongly connected components rather than individual nodes, thus, members of cycles can be considered as seed. + * A sink from an external compartment can however be connected to a non sink internal counterpart, thus highlighting what could end up in the external compartment rather than what must be exported. + * This option will ignore the useInternalsForDegree option. + * @param use + * @return + */ + public SourcesAndSinks useBorensteinAlgorithm(boolean use) { + this.useBorensteinAlgorithm = use; + return this; + } + + /** + * Select all compound with at least one producing reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectNonSources(boolean select) { + this.notsource = select; + return this; + } + + /** + * Select all compounds with at least one producing reaction and one consuming reaction for export + * @param select + * @return + */ + public SourcesAndSinks selectIntermediaries(boolean select) { + this.notany = select; + return this; + } + + /** + * Consider isolated count both as sinks and sources. If set to false, isolated compounds are ignored. + * Default set to false; + * @param select + * @return + */ + public SourcesAndSinks keepIsolated(boolean select) { + this.keepIsolated = select; + return this; + } + + public SourcesAndSinks(CompoundGraph graph){ + this.graph=graph; + } + + /** + * get all compounds that match the selected neighborhood status + * @return selected compounds + */ + public BioCollection getSelection(){ + if(!(source||sink||notsink||notsource||notany)){ + System.err.println("[Warn] Sources and Sinks: no type selected, will return empty list"); + return new BioCollection<>(); + } + if(candidates==null || candidates.isEmpty()) candidates = new BioCollection<>(graph.vertexSet()); + + if((source&¬source)||(sink&¬sink)){ + System.err.println("[Warn] Sources and Sinks: complementary types selected (such as source and not sources), will return all evaluated nodes"); + return candidates; + } + + if(useBorensteinAlgorithm){ + return computeBorensteinAlgorithm(); + }else{ + return compute(); + } + } + + + private BioCollection compute(){ + + //Evaluate Candidates + BioCollection res = new BioCollection<>(); + for(BioMetabolite v : candidates){ + int inDegree = useInternal ? getInternalDegree(graph,v, candidates, false) : graph.inDegreeOf(v); + int outDegree = useInternal ? getInternalDegree(graph,v, candidates, true) : graph.outDegreeOf(v); + //ignore external only + if(inDegree!=-1 && outDegree!=-1 && + //ignore isolated + !(!keepIsolated && (inDegree+outDegree==0))){ + if(inDegree==0){ + if(source) res.add(v); + } else if (notsource) { + res.add(v); + } + + if(outDegree==0){ + if(sink) res.add(v); + } else if (notsink) { + res.add(v); + } + + if(notany && outDegree>0 && inDegree>0){ + res.add(v); + } + } + } + + return res; + } + + private BioCollection computeBorensteinAlgorithm(){ + + //Evaluate Candidates + BioCollection res = new BioCollection<>(); + KosarajuStrongConnectivityInspector sccComputor = new KosarajuStrongConnectivityInspector<>(graph); + List> scc = sccComputor.stronglyConnectedSets(); + + for(Set cc : scc){ + Set sccCandidates = new HashSet<>(cc); + sccCandidates.retainAll(candidates); + if(!sccCandidates.isEmpty()) { + int inDegree = 0; + int outDegree = 0; + //Same as considering a condensation graph, where a whole strongly connected component is condensed into a single node + //This will sum up, for each component's element, all successors and predecessors outside the component + for (BioMetabolite v : cc) { + Set predecessors = graph.predecessorListOf(v); + predecessors.removeAll(cc); + inDegree += predecessors.size(); + Set successors = graph.successorListOf(v); + successors.removeAll(cc); + outDegree += successors.size(); + } + if (!(!keepIsolated && (inDegree + outDegree == 0) && cc.size()==1)) { + if (inDegree == 0) { + if (source) res.addAll(sccCandidates); + } else if (notsource) { + res.addAll(sccCandidates); + } + + if (outDegree == 0) { + if (sink) res.addAll(sccCandidates); + } else if (notsink) { + res.addAll(sccCandidates); + } + + if (notany && outDegree > 0 && inDegree > 0) { + res.addAll(sccCandidates); + } + } + } + } + return res; + } + + + private int getInternalDegree(CompoundGraph g, BioMetabolite v, BioCollection externalComp , Boolean out){ + //get internal neighbor counterpart + Set internal = new HashSet<>(); + internal.addAll(g.neighborListOf(v)); + //remove all neighbors that are not internal + internal.removeAll(externalComp); + + //ignore compounds with no internal counterparts + if(internal.isEmpty()) return -1; + + //compute degree of internal counterpart + int degree = 0; + for(BioMetabolite neighbor : internal){ + if(out){ + Set n = g.successorListOf(neighbor); + n.removeAll(externalComp); + degree+= n.size(); + }else{ + Set n = g.predecessorListOf(neighbor); + n.removeAll(externalComp); + degree+= n.size(); + } + } + return degree; + } +} diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java new file mode 100644 index 0000000000000000000000000000000000000000..a4bf973af0dc830c4b9d6f3a79b5118848fadada --- /dev/null +++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestSourcesAndSinks.java @@ -0,0 +1,347 @@ +package fr.inrae.toulouse.metexplore.met4j_graph; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.SourcesAndSinks; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestSourcesAndSinks { + + + public static CompoundGraph cg; + + public static BioMetabolite a,b,c,d,f,g,h1, h2; + public static BioMetabolite ae,be,ce,de,d0,d2,fe,ge,he; + public static BioMetabolite v,w,x,y,z; + + public static ReactionEdge aae, bbe, cce, dde, ffe, gge, h1he, h2he; + public static ReactionEdge aea, beb, cec, ded, d0de, ded2, geg, heh1, heh2; + public static ReactionEdge xa, bx, xc, cy, dx, vf, fw, wv, xh1, h2y, zx; + + public static BioCollection ext; + + @BeforeClass + public static void init() { + cg = new CompoundGraph(); + + //external metabolites + ae= new BioMetabolite("ae"); cg.addVertex(ae); + be= new BioMetabolite("be"); cg.addVertex(be); + ce= new BioMetabolite("ce"); cg.addVertex(ce); + de= new BioMetabolite("de"); cg.addVertex(de); + d0= new BioMetabolite("d0e"); cg.addVertex(d0); + d2= new BioMetabolite("d2e"); cg.addVertex(d2); + d0de=new ReactionEdge(d0,de, new BioReaction("external_rxn"));cg.addEdge(d0de); + ded2=new ReactionEdge(de,d2, new BioReaction("external_rxn"));cg.addEdge(ded2); + fe= new BioMetabolite("fe"); cg.addVertex(fe); + ge= new BioMetabolite("ge"); cg.addVertex(ge); + he= new BioMetabolite("h1h2e"); cg.addVertex(he); + ext = new BioCollection<>(); + ext.add(ae,be,ce,de,d0,d2,fe,ge,he); + + //internal "interface" metabolites (can be transported to external) + a= new BioMetabolite("ai"); cg.addVertex(a); + aae=new ReactionEdge(a,ae, new BioReaction("transport"));cg.addEdge(aae); + aea=new ReactionEdge(ae,a, new BioReaction("transport"));cg.addEdge(aea); + b= new BioMetabolite("bi"); cg.addVertex(b); + bbe=new ReactionEdge(b,be, new BioReaction("transport"));cg.addEdge(bbe); + beb=new ReactionEdge(be,b, new BioReaction("transport"));cg.addEdge(beb); + c= new BioMetabolite("ci"); cg.addVertex(c); + cce=new ReactionEdge(c,ce, new BioReaction("transport"));cg.addEdge(cce); + cec=new ReactionEdge(ce,c, new BioReaction("transport"));cg.addEdge(cec); + d= new BioMetabolite("di"); cg.addVertex(d); + dde=new ReactionEdge(d,de, new BioReaction("transport"));cg.addEdge(dde); + ded=new ReactionEdge(de,d, new BioReaction("transport"));cg.addEdge(ded); + f= new BioMetabolite("fi"); cg.addVertex(f); + ffe=new ReactionEdge(f,fe, new BioReaction("transport"));cg.addEdge(ffe); + g= new BioMetabolite("gi"); cg.addVertex(g); + gge=new ReactionEdge(g,ge, new BioReaction("transport"));cg.addEdge(gge); + geg=new ReactionEdge(ge,g, new BioReaction("transport"));cg.addEdge(geg); + h1= new BioMetabolite("h1i"); cg.addVertex(h1); + h1he=new ReactionEdge(h1,he, new BioReaction("transport"));cg.addEdge(h1he); + heh1=new ReactionEdge(he,h1, new BioReaction("transport"));cg.addEdge(heh1); + h2= new BioMetabolite("h2i"); cg.addVertex(h2); + h2he=new ReactionEdge(h2,he, new BioReaction("transport"));cg.addEdge(h2he); + heh2=new ReactionEdge(he,h2, new BioReaction("transport"));cg.addEdge(heh2); + + //internal metabolites + x=new BioMetabolite("xi");cg.addVertex(x); + y=new BioMetabolite("yi");cg.addVertex(y); + z=new BioMetabolite("zi");cg.addVertex(z); + v=new BioMetabolite("vi");cg.addVertex(v); + w=new BioMetabolite("wi");cg.addVertex(w); + zx= new ReactionEdge(z,x, new BioReaction("internal_rxn"));cg.addEdge(zx);//z is internal source + + xa=new ReactionEdge(x,a, new BioReaction("internal_rxn"));cg.addEdge(xa);//a is not a source -> a is sink + bx=new ReactionEdge(b,x, new BioReaction("internal_rxn"));cg.addEdge(bx);//b is not a sink -> b is source + xc=new ReactionEdge(x,c, new BioReaction("internal_rxn"));cg.addEdge(xc);//c is not a source + cy=new ReactionEdge(c,y, new BioReaction("internal_rxn"));cg.addEdge(cy);//c is not a sink -> c is intermediary + dx=new ReactionEdge(d,x, new BioReaction("internal_rxn"));cg.addEdge(dx);//d is not a sink -> d is source + vf=new ReactionEdge(v,f, new BioReaction("internal_rxn"));cg.addEdge(vf);//f is not a source + fw=new ReactionEdge(f,w, new BioReaction("internal_rxn"));cg.addEdge(fw);//f is not a sink -> f is intermediary + wv=new ReactionEdge(w,v, new BioReaction("internal_rxn"));cg.addEdge(wv); + //g is a source, g is a sink, g is isolated + xh1=new ReactionEdge(x,h1, new BioReaction("internal_rxn"));cg.addEdge(xh1);//h1 is not a source, h is not a source + h2y=new ReactionEdge(h2,y, new BioReaction("internal_rxn"));cg.addEdge(h2y);//h2 is not a sink, h is not a sink -> h is intermediary + + } + + @Test + public void testSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources",3, res.size()); + assertTrue("wrong sources", res.contains(be)); + assertTrue("wrong sources", res.contains(d0)); + assertTrue("wrong sources", res.contains(ge)); + } + + @Test + public void testNoSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSources(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sources",6, res.size()); + assertTrue("wrong not sources", res.contains(ae)); + assertTrue("wrong not sources", res.contains(ce)); + assertTrue("wrong not sources", res.contains(de)); + assertTrue("wrong not sources", res.contains(d2)); + assertTrue("wrong not sources", res.contains(fe)); + assertTrue("wrong not sources", res.contains(he)); + } + + @Test + public void testSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink",4, res.size()); + assertTrue("wrong sink", res.contains(ae)); + assertTrue("wrong sink", res.contains(ge)); + assertTrue("wrong sink", res.contains(d2)); + assertTrue("wrong sink", res.contains(fe)); + + } + + @Test + public void testIntermediaryBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectIntermediaries(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of intermediaries",3, res.size()); + assertTrue("wrong intermediary", res.contains(ce)); + assertTrue("wrong intermediary", res.contains(de)); + assertTrue("wrong intermediary", res.contains(he)); + + } + + @Test + public void testNoSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSinks(true) + .fromExternalCompartment(ext,false) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sink",5, res.size()); + assertTrue("wrong not sink", res.contains(be)); + assertTrue("wrong not sink", res.contains(ce)); + assertTrue("wrong not sink", res.contains(de)); + assertTrue("wrong not sink", res.contains(d0)); + assertTrue("wrong not sink", res.contains(he)); + } + + @Test + public void testWholeSourcesBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",9, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(be)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(b)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(z)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(ge)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(g)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(f)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(v)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(w)); + } + + @Test + public void testWholeSinkBA(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .useBorensteinAlgorithm(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink (no external/internal adjustment)",7, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(y)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(ge)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(g)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(ae)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(a)); + } + + @Test + public void testSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources",3, res.size()); + assertTrue("wrong sources", res.contains(be)); + assertTrue("wrong sources", res.contains(de)); + assertTrue("wrong sources", res.contains(ge)); + } + + @Test + public void testNoSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSources(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sources",4, res.size()); + assertTrue("wrong not sources", res.contains(ae)); + assertTrue("wrong not sources", res.contains(ce)); + assertTrue("wrong not sources", res.contains(fe)); + assertTrue("wrong not sources", res.contains(he)); + } + + @Test + public void testSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink",2, res.size()); + assertTrue("wrong sink", res.contains(ae)); + assertTrue("wrong sink", res.contains(ge)); + + } + + @Test + public void testIntermediary(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectIntermediaries(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of intermediaries",3, res.size()); + assertTrue("wrong intermediary", res.contains(ce)); + assertTrue("wrong intermediary", res.contains(fe)); + assertTrue("wrong intermediary", res.contains(he)); + + } + + @Test + public void testNoSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectNonSinks(true) + .fromExternalCompartment(ext,true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of not sink",5, res.size()); + assertTrue("wrong not sink", res.contains(be)); + assertTrue("wrong not sink", res.contains(ce)); + assertTrue("wrong not sink", res.contains(de)); + assertTrue("wrong not sink", res.contains(fe)); + assertTrue("wrong not sink", res.contains(he)); + } + + @Test + public void testWholeSources(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",2, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(z)); + } + + @Test + public void testSourcesNoDegreeAdjust(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,false) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no external/internal adjustment)",1, res.size()); + assertTrue("wrong sources (no external/internal adjustment)", res.contains(d0)); + } + + @Test + public void testWholeSink(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink (no external/internal adjustment)",3, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(y)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + } + + @Test + public void testSinkNoDegreeAdjust(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,false) + .keepIsolated(true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink (no external/internal adjustment)",2, res.size()); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(d2)); + assertTrue("wrong sink (no external/internal adjustment)", res.contains(fe)); + } + + @Test + public void testSourceNoIsolated(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSources(true) + .fromExternalCompartment(ext,true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sources (no isaolated)",2, res.size()); + assertTrue("wrong sources (no isaolated)", res.contains(be)); + assertTrue("wrong sources (no isaolated)", res.contains(de)); + } + + @Test + public void testSinkNoIsolated(){ + SourcesAndSinks ss = new SourcesAndSinks(cg) + .selectSinks(true) + .fromExternalCompartment(ext,true); + BioCollection res = ss.getSelection(); + assertEquals("wrong number of sink (no isaolated)",1, res.size()); + assertTrue("wrong sink (no isaolated)", res.contains(ae)); + } + + +} \ No newline at end of file diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java new file mode 100644 index 0000000000000000000000000000000000000000..d91ffee03622f23146ce9f93cb2999c19b8d9d6e --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/SeedsAndTargets.java @@ -0,0 +1,204 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioCompartment; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.SourcesAndSinks; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import org.kohsuke.args4j.Option; + +import java.io.FileWriter; +import java.io.IOException; + +public class SeedsAndTargets extends AbstractMet4jApplication { + + @Format(name= EnumFormats.Sbml) + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-i", aliases = {"--inputSBML"}, usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-sc", aliases = {"--sideFile"}, usage = "input Side compound file", required = false) + public String inputSide = null; + + @ParameterType(name= EnumParameterTypes.OutputFile) + @Option(name = "-o", aliases = {"--output"}, usage = "output seeds file", required = true) + public String outputPath = null; + + @ParameterType(name= EnumParameterTypes.Text) + @Option(name = "-c", aliases = {"--comp"}, usage = "Selected compartment(s), as model identifiers, separated by \"+\" sign if more than one", required = false) + public String comp = null; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-s", aliases = {"--seeds"}, usage = "export seeds", required = false) + public boolean source = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-t", aliases = {"--targets"}, usage = "export targets", required = false) + public boolean sink = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-!s", aliases = {"--notSeed"}, usage = "export nodes that are not seed", required = false) + public boolean notsource = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-!t", aliases = {"--notTarget"}, usage = "export nodes that are not targets", required = false) + public boolean notsink = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-is", aliases = {"--keepIsolated"}, usage = "do not ignore isolated nodes, consider isolated both seed and target", required = false) + public boolean keepIsolated = false; + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-B", aliases = {"--useBorensteinAlg"}, usage = "use Borenstein Algorithm. Please cite Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105). ignore internal option", required = false) + public boolean useBorensteinAlg = false; + + + @ParameterType(name= EnumParameterTypes.Boolean) + @Option(name = "-in", aliases = {"--internal"}, usage = "if an external compartment is defined, adjust degree by considering internal counterpart", required = false) + public boolean useInternal = false; + + + + + + public static void main(String[] args) { + + SeedsAndTargets app = new SeedsAndTargets(); + + app.parseArguments(args); + + app.run(); + + } + + public void run() { + //open file + FileWriter fw = null; + try { + fw = new FileWriter(outputPath); + } catch (IOException e) { + System.err.println("Error while opening the output file"); + System.err.println(e.getMessage()); + System.exit(1); + } + + //import network + System.err.println("reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + BioNetwork network = null; + try { + network = reader.read(); + } catch (Met4jSbmlReaderException e) { + System.err.println("Error while reading the SBML file"); + System.err.println(e.getMessage()); + System.exit(1); + } + + //Create compound graph + System.err.println("Creating network..."); + Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); + CompoundGraph graph = builder.getCompoundGraph(); + + //Graph processing: side compound removal [optional] + if (inputSide != null) { + System.err.println("removing side compounds..."); + NodeMapping mapper = new NodeMapping<>(graph).skipIfNotFound(); + BioCollection sideCpds = null; + try { + sideCpds = mapper.map(inputSide); + } catch (IOException e) { + System.err.println("Error while reading the side compound file"); + System.err.println(e.getMessage()); + System.exit(1); + } + boolean removed = graph.removeAllVertices(sideCpds); + if (removed) System.err.println(sideCpds.size() + " compounds removed."); + } + + //compute seeds and targets + SourcesAndSinks ss = new SourcesAndSinks(graph) + .selectNonSinks(notsink) + .selectSinks(sink) + .selectSources(source) + .selectNonSources(notsource) + .keepIsolated(keepIsolated) + .useBorensteinAlgorithm(useBorensteinAlg); + if (comp != null) { + ss = ss.fromExternalCompartment(getCandidates(network, graph), useInternal); + } + BioCollection res = ss.getSelection(); + + //export results + try { + for (BioMetabolite m : res) { + fw.write(m.getId() + "\n"); + } + fw.close(); + } catch (IOException e) { + System.err.println("Error while writing the result file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.err.println("done."); + + + } + + private BioCollection getCandidates(BioNetwork network, CompoundGraph graph){ + //Select Candidates + BioCollection compoundSet = new BioCollection<>(); + if(comp!=null){ + //for each "external" (available) compartment + for(String id : comp.split("\\+")){ + BioCompartment c = network.getCompartmentsView().get(id); + if(c!=null){ + //add compound graph nodes belonging to external compartment as candidate + for(BioEntity e : c.getComponentsView()){ + if(graph.vertexSet().contains(e)) compoundSet.add((BioMetabolite) e); + } + }else{ + System.out.println("Error: Compartment "+id+" not found in network, please check sbml file."); + } + } + }else{ + compoundSet.addAll(graph.vertexSet()); + } + return compoundSet; + } + + + + @Override + public String getLabel() {return this.getClass().getSimpleName();} + + @Override + public String getLongDescription() { + return "Identify exogenously acquired compounds, producible compounds exogenously available and/or dead ends metabolites from metabolic network topology. " + + "Metabolic seeds and targets are useful for identifying medium requirements and metabolic capability, and thus enable analysis of metabolic ties within communities of organisms.\n" + + "This application can use seed definition and SCC-based detection algorithm by Borenstein et al. or, alternatively, degree-based sink and source detection with compartment adjustment.\n" + + "The first method (see Borenstein et al. 2008 Large-scale reconstruction and phylogenetic analysis of metabolic environments https://doi.org/10.1073/pnas.0806162105) " + + "consider strongly connected components rather than individual nodes, thus, members of cycles can be considered as seed. " + + "A sink from an external compartment can however be connected to a non sink internal counterpart, thus highlighting what could end up in the external compartment rather than what must be exported.\n" + + "The second approach is neighborhood based and identify sources and sinks. Since \"real\" sinks and sources in intracellular compartment(s) may be involved in transport/exchange reactions " + + "reversible by default, thus not allowing extracellular source or sink, an option allows to take " + + "the degree (minus extracellular neighbors) of intracellular counterparts."; + } + + @Override + public String getShortDescription() { + return "Identify exogenously acquired compounds, producible compounds exogenously available and/or dead ends metabolites from metabolic network topology"; + } +}