Commit 3f6a5f4e authored by Clement Frainay's avatar Clement Frainay
Browse files

Merge branch 'feature/SBML_scraping' into 'develop'

Expand Toolbox with SBML scrapping app

See merge request !9
parents 78bfe637 e17255f4
package fr.inrae.toulouse.metexplore.met4j_toolbox.attributes;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.*;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.JsbmlWriter;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.Met4jSbmlWriterException;
import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
import org.kohsuke.args4j.Option;
import java.io.IOException;
public class ExtractPathways extends AbstractMet4jApplication {
@Option(name = "-i", usage = "input SBML file", required = true)
public String inputPath = null;
@Option(name = "-o", usage = "output SBML file", required = true)
public String outputPath = null;
@Option(name = "-p", usage = "pathway identifiers, separated by \"+\" sign if more than one", required = true)
public String pathwayId = null;
public static void main(String[] args) throws IOException, Met4jSbmlReaderException, Met4jSbmlWriterException {
ExtractPathways app = new ExtractPathways();
app.parseArguments(args);
app.run();
}
public void run() throws IOException, Met4jSbmlReaderException, Met4jSbmlWriterException {
//read smbl
JsbmlReader reader = new JsbmlReader(this.inputPath);
BioNetwork network = reader.read();
System.out.println("Number of reactions in original network: "+network.getReactionsView().size());
System.out.println("Number of species in original network: "+network.getMetabolitesView().size());
System.out.println("Number of genes in original network: "+network.getGenesView().size());
//get all reactions & metabolites
BioCollection<BioReaction> reactions = new BioCollection<>(network.getReactionsView());
BioCollection<BioMetabolite> metabolites = new BioCollection<>(network.getMetabolitesView());
BioCollection<BioGene> genes = new BioCollection<>(network.getGenesView());
//get pathways
BioCollection<BioPathway> pathways = new BioCollection<>();
for(String id : pathwayId.split("\\+")){
BioPathway pathway = network.getPathwaysView().get(id);
if(pathway!=null){
pathways.add(pathway);
System.out.println("Number of reactions in pathway "+pathway.getName()+" ("+id+"): "+network.getReactionsFromPathways(pathway).size());
System.out.println("Number of species in pathway "+pathway.getName()+" ("+id+"): "+network.getMetabolitesFromPathway(pathway).size());
System.out.println("Number of genes in pathway "+pathway.getName()+" ("+id+"): "+network.getGenesFromPathways(pathway).size());
}else{
System.out.println("Error: Pathway "+id+" not found in network, please check sbml file.");
}
}
//remove pathway's reactions and metabolites from list
BioCollection<BioReaction> pathwaysReactions = network.getReactionsFromPathways(pathways);
reactions.removeAll(pathwaysReactions);
metabolites.removeAll(network.getMetabolitesFromReactions(pathwaysReactions));
genes.removeAll(network.getGenesFromReactions(pathwaysReactions));
//remove remaining reactions
network.removeOnCascade(reactions);
network.removeOnCascade(metabolites);
network.removeOnCascade(genes);
System.out.println("Number of reactions in network: "+network.getReactionsView().size());
System.out.println("Number of species in network: "+network.getMetabolitesView().size());
System.out.println("Number of genes in network: "+network.getGenesView().size());
//export network
JsbmlWriter w = new JsbmlWriter(outputPath, network);
w.write();
System.err.println("network exported.");
return;
}
@Override
public String getLabel() {
return this.getClass().getSimpleName();
}
@Override
public String getLongDescription() {
return "\"Extract pathway(s) from GSMN: From a SBML file, Create a sub-network SBML file including only a selection of pathways";
}
@Override
public String getShortDescription() {
return "Extract pathway(s) from GSMN";
}
}
package fr.inrae.toulouse.metexplore.met4j_toolbox.attributes;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioRef;
import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
import org.kohsuke.args4j.Option;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
public class ExtractSbmlAnnot extends AbstractMet4jApplication {
@Option(name = "-i", usage = "input SBML file", required = true)
public String inputPath = null;
@Option(name = "-o", usage = "output file path", required = true)
public String outputPath = null;
enum entity { METABOLITE,REACTION,GENE}
@Option(name="-export", usage = "the type of entity to extract annotation, either metabolite, reaction, or gene", required = true)
public entity export;
@Option(name="-db", usage = "name of the referenced database to export annotations from, as listed in notes or identifiers.org base uri", required = true)
public String db;
@Option(name="-uniq", usage = "keep only one identifier if multiple are referenced for the same entity", required = false)
public Boolean uniq = false;
@Option(name="-skip", usage = "Skip entities without the selected annotations, by default output them with NA value", required = false)
public Boolean skip = false;
public String sep = "\t";
public static void main(String[] args) throws IOException, Met4jSbmlReaderException {
ExtractSbmlAnnot app = new ExtractSbmlAnnot();
app.parseArguments(args);
app.run();
}
private void run() throws IOException, Met4jSbmlReaderException {
//open file
FileWriter fw = new FileWriter(outputPath);
//read smbl
JsbmlReader reader = new JsbmlReader(this.inputPath);
BioNetwork network = reader.read();
BioCollection<? extends BioEntity> entities = new BioCollection<>();
if(export==entity.METABOLITE){
entities=network.getMetabolitesView();
}else if(export==entity.REACTION){
entities=network.getReactionsView();
}else if(export==entity.GENE){
entities=network.getGenesView();
}
//write header
fw.write(export.name()+sep+db.toUpperCase()+"\n");
//export annotations
//keep track of successful export
int i = 0;
for(BioEntity e : entities){
Set<BioRef> refSet = e.getRefs(db);
if(refSet!=null){
i+=1;
if(uniq)refSet= new HashSet<BioRef>(Arrays.asList(refSet.iterator().next()));
for(BioRef ref : refSet){
StringBuffer sb = new StringBuffer();
sb.append(e.getId());
sb.append(sep);
sb.append(ref.getId());
sb.append("\n");
fw.write(sb.toString());
}
}else if(!skip){
StringBuffer sb = new StringBuffer();
sb.append(e.getId());
sb.append(sep);
sb.append("NA\n");
fw.write(sb.toString());
}
}
fw.close();
System.out.println("annotations found for "+i+"/"+entities.size()+" "+export.name().toLowerCase()+((i>1)?"s":""));
}
@Override
public String getLabel() {
return this.getClass().getSimpleName();
}
@Override
public String getLongDescription() {
return "Extract databases' references from SBML annotations or notes. " +
"The references are exported as a tabulated file with one column with the SBML compound, " +
"reaction or gene identifiers, and one column with the corresponding database identifier." +
"The name of the targeted database need to be provided under the same form than the one used " +
"in the notes field or the identifiers.org uri";
}
@Override
public String getShortDescription() {
return "Extract databases' references from SBML annotations or notes.";
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment