From 80d0f20ecb29bcfdcb56005a25af2f0ef4b6b86e Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 1 Jun 2023 17:53:13 +0200 Subject: [PATCH 01/13] stub class for merging compartments --- .../biodata/utils/CompartmentMerger.java | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java new file mode 100644 index 000000000..8adb1ac5e --- /dev/null +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -0,0 +1,169 @@ +package fr.inrae.toulouse.metexplore.met4j_core.biodata.utils; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class CompartmentMerger { + + //criterion for identifying same compound over multiple compartments + //default use name as common field + Function<BioMetabolite,String> groupingFunction = BioMetabolite::getName; + //criterion for selecting a unique representative from a group of compound instances + //default takes first id in alphabetical order + Function<List<BioMetabolite>,BioMetabolite> pickFunction = (l -> {l.sort(Comparator.comparing(BioMetabolite::getId));return l.get(0);}); + //unique compartment + //default is named "NA" + BioCompartment uniqComp = new BioCompartment("1", "NA"); + + // map for each compound toward their unique counterpart (can be themselves) + HashMap<BioMetabolite,BioMetabolite> convert; + // merged bioNetwork + BioNetwork merged; + + public CompartmentMerger(){ + } + + public BioNetwork merge(BioNetwork original){ + + //create new network with same metadata and single compartment + buildNetwork(original); + + //group corresponding compounds + Map<String, List<BioMetabolite>> compoundGroups = original.getMetabolitesView().stream().collect(Collectors.groupingBy(groupingFunction)); + + //for each group, create a unique compound + convert = new HashMap<>(); + for(List<BioMetabolite> toContract : compoundGroups.values()){ + BioMetabolite uniq = buildCompound(toContract); //(add newly created compound to new network) + //populate map for each compound toward their unique counterpart (can be themselves) + for(BioMetabolite m : toContract){ + convert.put(m,uniq); + } + } + + //copy Gene, Protein and Enzyme + keepGPR(original); + + //for each reaction, replace reactants by their unique counterpart + for(BioReaction r : original.getReactionsView()){ + // create deep copy, except for reactants + buildReaction(r); + } + + //remove reactions that create loops + removeLoops(); + + //remove redundant reactions? + //TODO + + //copy Pathways + //TODO + + return merged; + } + + private void buildNetwork(BioNetwork original){ + //create new network with single compartment + merged = new BioNetwork(); + merged.addCompartment(uniqComp); + //update metadata + merged.setSynonyms(new ArrayList<>(original.getSynonyms())); + merged.setComment(original.getComment()); + merged.setRefs(new HashMap<>(original.getRefs())); + merged.setAttributes(new HashMap<>(original.getAttributes())); + } + + private BioMetabolite buildCompound(List<BioMetabolite> originalCtoMerge){ + //from compounds to merge, pick one as template for new unique compound + BioMetabolite chosen = pickFunction.apply(originalCtoMerge); + BioMetabolite newMetabolite = new BioMetabolite(chosen); + merged.add(newMetabolite); + merged.affectToCompartment(uniqComp,newMetabolite); + return newMetabolite; + } + + private BioReaction buildReaction(BioReaction originalR){ + BioReaction newReaction = new BioReaction(originalR); + newReaction.setSpontaneous(originalR.isSpontaneous()); + newReaction.setReversible(originalR.isReversible()); + newReaction.setEcNumber(originalR.getEcNumber()); + + merged.add(newReaction); + + // Create substrates, swap to unique compound + for (BioReactant reactant : originalR.getLeftReactantsView()) { + BioMetabolite newMetabolite = convert.get(reactant.getMetabolite()); + Double sto = reactant.getQuantity(); + merged.affectLeft(newReaction, sto, uniqComp, newMetabolite); + } + + // Create products, swap to unique compound + for (BioReactant reactant : originalR.getRightReactantsView()) { + BioMetabolite newMetabolite = convert.get(reactant.getMetabolite()); + Double sto = reactant.getQuantity(); + merged.affectRight(newReaction, sto, uniqComp, newMetabolite); + } + + //copy GPR + for (BioEnzyme enzyme : originalR.getEnzymesView()) { + BioEnzyme newEnzyme = merged.getEnzyme(enzyme.getId()); + merged.affectEnzyme(newReaction, newEnzyme); + } + + return newReaction; + } + + // remove reactions that create loops, i.e. transport reactions between compartments + private void removeLoops(){ + BioCollection<BioReaction> toRemove = new BioCollection<>(); + for(BioReaction r : merged.getReactionsView()){ + if(r.getLeftsView().stream().anyMatch(r.getRightsView()::contains)) toRemove.add(r); + } + merged.removeOnCascade(toRemove); + } + + private void keepGPR(BioNetwork original){ + // Copy genes + for (BioGene gene : original.getGenesView()) { + BioGene newGene = new BioGene(gene); + merged.add(newGene); + } + + // Copy proteins + for (BioProtein protein : original.getProteinsView()) { + BioProtein newProtein = new BioProtein(protein); + merged.add(newProtein); + if (protein.getGene() != null) { + String geneId = protein.getGene().getId(); + BioGene newGene = merged.getGene(geneId); + merged.affectGeneProduct(newProtein, newGene); + } + } + + // Copy enzymes + for (BioEnzyme enzyme : original.getEnzymesView()) { + BioEnzyme newEnzyme = new BioEnzyme(enzyme); + merged.add(newEnzyme); + + BioCollection<BioEnzymeParticipant> participants = enzyme.getParticipantsView(); + + for (BioEnzymeParticipant participant : participants) { + Double quantity = participant.getQuantity(); + if (participant.getPhysicalEntity().getClass().equals(BioMetabolite.class)) { + BioMetabolite metabolite = (BioMetabolite) participant.getPhysicalEntity(); + // swap to unique compound + merged.affectSubUnit(newEnzyme, quantity, convert.get(metabolite)); + } else if (participant.getPhysicalEntity().getClass().equals(BioProtein.class)) { + BioProtein protein = (BioProtein) participant.getPhysicalEntity(); + BioProtein newProtein = merged.getProtein(protein.getId()); + merged.affectSubUnit(newEnzyme, quantity, newProtein); + } + } + } + } + +} -- GitLab From 481d009cce89e861fd1bf171ea3a47eac668c791 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Mon, 5 Jun 2023 09:39:21 +0200 Subject: [PATCH 02/13] add options for merge functions, handle new compound id collision, copy pathways --- .../biodata/utils/CompartmentMerger.java | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java index 8adb1ac5e..4382f0e8d 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -11,10 +11,11 @@ public class CompartmentMerger { //criterion for identifying same compound over multiple compartments //default use name as common field - Function<BioMetabolite,String> groupingFunction = BioMetabolite::getName; + Function<BioMetabolite,String> getUniqIdFunction = BioMetabolite::getName; //criterion for selecting a unique representative from a group of compound instances //default takes first id in alphabetical order Function<List<BioMetabolite>,BioMetabolite> pickFunction = (l -> {l.sort(Comparator.comparing(BioMetabolite::getId));return l.get(0);}); + //unique compartment //default is named "NA" BioCompartment uniqComp = new BioCompartment("1", "NA"); @@ -27,13 +28,28 @@ public class CompartmentMerger { public CompartmentMerger(){ } + public CompartmentMerger setGetUniqIdFunction(Function<BioMetabolite, String> uniqIdFunction) { + this.getUniqIdFunction = uniqIdFunction; + return this; + } + + public CompartmentMerger setCompoundMergeFunction(Function<List<BioMetabolite>, BioMetabolite> compoundMergeFunction) { + this.pickFunction = compoundMergeFunction; + return this; + } + + public CompartmentMerger setUniqCompartment(BioCompartment uniqComp) { + this.uniqComp = uniqComp; + return this; + } + public BioNetwork merge(BioNetwork original){ //create new network with same metadata and single compartment buildNetwork(original); //group corresponding compounds - Map<String, List<BioMetabolite>> compoundGroups = original.getMetabolitesView().stream().collect(Collectors.groupingBy(groupingFunction)); + Map<String, List<BioMetabolite>> compoundGroups = original.getMetabolitesView().stream().collect(Collectors.groupingBy(getUniqIdFunction)); //for each group, create a unique compound convert = new HashMap<>(); @@ -54,15 +70,27 @@ public class CompartmentMerger { buildReaction(r); } + //copy Pathways + for (BioPathway pathway : original.getPathwaysView()) { + + BioPathway newPathway = new BioPathway(pathway); + merged.add(newPathway); + + // Add reactions into pathway + BioCollection<BioReaction> reactions = original.getReactionsFromPathways(pathway); + + for (BioReaction reaction : reactions) { + BioReaction newReaction = merged.getReaction(reaction.getId()); + merged.affectToPathway(newPathway, newReaction); + } + } + //remove reactions that create loops removeLoops(); //remove redundant reactions? //TODO - //copy Pathways - //TODO - return merged; } @@ -81,6 +109,14 @@ public class CompartmentMerger { //from compounds to merge, pick one as template for new unique compound BioMetabolite chosen = pickFunction.apply(originalCtoMerge); BioMetabolite newMetabolite = new BioMetabolite(chosen); + BioMetabolite old = merged.getMetabolite(newMetabolite.getId()); + if(old!=null){ + System.err.println("WARNING: collision in new compound identifiers. Compounds with different unique ids will be merged under the same new entity "+newMetabolite.getId()+"."); + System.err.println("If it is an expected behaviour that the provided merge function may not produce a different compounds at each call, please review the following merge:"); + System.err.println(originalCtoMerge.stream().map(c -> c.getId()+" : "+ getUniqIdFunction.apply(c)).collect(Collectors.toList())); + System.err.println(convert.entrySet().stream().filter(e -> old.equals(e.getValue())).map(e -> e.getKey().getId()+" : "+ getUniqIdFunction.apply(e.getKey())).collect(Collectors.toList())); + return old; + } merged.add(newMetabolite); merged.affectToCompartment(uniqComp,newMetabolite); return newMetabolite; -- GitLab From ed40b6ebbdc996692468ea773005769cfe3aa518 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 8 Jun 2023 16:11:59 +0200 Subject: [PATCH 03/13] add javadoc --- .../biodata/utils/CompartmentMerger.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java index 4382f0e8d..523a50652 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -7,6 +7,9 @@ import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; +/** + * A class to create, from a network with multiple compartments, a new network with a single compartment, avoiding duplicated compounds. + */ public class CompartmentMerger { //criterion for identifying same compound over multiple compartments @@ -25,24 +28,52 @@ public class CompartmentMerger { // merged bioNetwork BioNetwork merged; + /** + * Create a new Compartment Merger + */ public CompartmentMerger(){ } + /** + * Fluent builder setting the function that provides the criterion used for identifying same compounds over multiple compartments + * Default use "name" as common field for the same compound over multiple compartments + * @param uniqIdFunction the function + * @return a CompartmentMerger instance + */ public CompartmentMerger setGetUniqIdFunction(Function<BioMetabolite, String> uniqIdFunction) { this.getUniqIdFunction = uniqIdFunction; return this; } + /** + * Fluent builder setting the function that select or create a unique representative from a group of compound instances + * default return compound from list with first id in alphabetical order. + * A new compound with custom id can be returned, but if an id is generated twice or more, the corresponding groups will be merged into a single one + * @param compoundMergeFunction the function + * @return a CompartmentMerger instance + */ public CompartmentMerger setCompoundMergeFunction(Function<List<BioMetabolite>, BioMetabolite> compoundMergeFunction) { this.pickFunction = compoundMergeFunction; return this; } + /** + * Fluent builder setting the compartment where all the compounds will be merged. + * Default is a compartment named "NA". + * @param uniqComp the single compartment + * @return a CompartmentMerger instance + */ public CompartmentMerger setUniqCompartment(BioCompartment uniqComp) { this.uniqComp = uniqComp; return this; } + /** + * Merge compartments by indexing compounds to identify groups of same compounds over different compartments, and select or + * create a unique compound to be added to a new single compartment. + * @param original the original network + * @return a network with merged compartments + */ public BioNetwork merge(BioNetwork original){ //create new network with same metadata and single compartment @@ -162,6 +193,7 @@ public class CompartmentMerger { merged.removeOnCascade(toRemove); } + // copy Gene, Protein and Enzyme from original network private void keepGPR(BioNetwork original){ // Copy genes for (BioGene gene : original.getGenesView()) { -- GitLab From d37f1082f04bfcef7e84d4dd7599b66bf77e2a5f Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 14 Jun 2023 16:56:57 +0200 Subject: [PATCH 04/13] option to set new network name --- .../biodata/utils/CompartmentMerger.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java index 523a50652..c661c21cb 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -28,6 +28,9 @@ public class CompartmentMerger { // merged bioNetwork BioNetwork merged; + // merged network name + String name; + /** * Create a new Compartment Merger */ @@ -68,6 +71,17 @@ public class CompartmentMerger { return this; } + /** + * Fluent builder setting the merged network name. + * Default append "_compartments-merged" to original network name. + * @param name the name + * @return a CompartmentMerger instance + */ + public CompartmentMerger setNewNetworkName(String name) { + this.name = name; + return this; + } + /** * Merge compartments by indexing compounds to identify groups of same compounds over different compartments, and select or * create a unique compound to be added to a new single compartment. @@ -128,6 +142,11 @@ public class CompartmentMerger { private void buildNetwork(BioNetwork original){ //create new network with single compartment merged = new BioNetwork(); + if(name == null){ + merged.setName(original.getName()+"_compartments-merged"); + }else{ + merged.setName(name); + } merged.addCompartment(uniqComp); //update metadata merged.setSynonyms(new ArrayList<>(original.getSynonyms())); -- GitLab From 09d9b34b45a8ff6be0ea0c9e1d489325f8b4984d Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 14 Jun 2023 16:57:35 +0200 Subject: [PATCH 05/13] [Toolbox][convert] new app to process sbml, including compartment merging --- .../met4j_toolbox/convert/ProcessSBML.java | 221 ++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java new file mode 100644 index 000000000..897bfcfab --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java @@ -0,0 +1,221 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.convert; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.CompartmentMerger; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.*; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.EdgeMerger; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.utils.ComputeAdjacencyMatrix; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.ExportGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.GroupPathwayParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.NotesParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.JsbmlWriter; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.Met4jSbmlWriterException; +import fr.inrae.toulouse.metexplore.met4j_mapping.Mapper; +import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.ExportMatrix; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis.CompoundNet; +import org.kohsuke.args4j.Option; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; + +public class ProcessSBML extends AbstractMet4jApplication { + + @Format(name = EnumFormats.Sbml) + @ParameterType(name = EnumParameterTypes.InputFile) + @Option(name = "-s", usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name = EnumParameterTypes.InputFile) + @Format(name = EnumFormats.Txt) + @Option(name = "-rc", usage = "remove compounds from input identifier file", required = false) + public String inputSide = null; + + @ParameterType(name = EnumParameterTypes.InputFile) + @Format(name = EnumFormats.Txt) + @Option(name = "-rr", usage = "remove reaction from input identifier file", required = false) + public String inputReactions = null; + + @ParameterType(name = EnumParameterTypes.OutputFile) + @Format(name = EnumFormats.Sbml) + @Option(name = "-o", usage = "output SBML file", required = true) + public String outputPath = null; + + enum strategy {no, by_name, by_id} + + @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + + "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).") + public strategy mergingStrat = strategy.no; + public String idRegex = "^(\\w+)_\\w$"; + + + @Option(name = "-rEX", aliases = {"--removeExchange"}, usage = "remove exchange reactions and species from given exchange compartment identifier", required = false) + public String exchangeCompToRemove; + + public static void main(String[] args) throws Met4jSbmlWriterException, IOException { + + ProcessSBML app = new ProcessSBML(); + + app.parseArguments(args); + + app.run(); + + } + + + public void run() throws Met4jSbmlWriterException, IOException { + System.out.print("Reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList( + new NotesParser(false), new FBCParser(), new GroupPathwayParser())); + + BioNetwork network = null; + + try { + network = reader.read(pkgs); + } catch (Met4jSbmlReaderException e) { + System.err.println("Error while reading the SBML file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.out.println(" Done."); + + //print info + System.out.println("\tcompartments:\t"+network.getCompartmentsView().size()); + System.out.println("\tmetabolites:\t"+network.getMetabolitesView().size()); + System.out.println("\treactions:\t"+network.getReactionsView().size()); + System.out.println("\tenzymes:\t"+network.getEnzymesView().size()); + System.out.println("\tgenes:\t"+network.getGenesView().size()); + System.out.println("\tprotein:\t"+network.getProteinsView().size()); + System.out.println("\tpathway:\t"+network.getPathwaysView().size()); + + //side compound removal [optional] + if (inputSide != null) { + BioCollection<BioMetabolite> sideCpds = new BioCollection<>(); + System.err.println("removing side compounds..."); + Mapper<BioMetabolite> cmapper = new Mapper<>(network, BioNetwork::getMetabolitesView).skipIfNotFound(); + + try { + sideCpds = cmapper.map(inputSide); + } catch (IOException e) { + System.err.println("Error while reading the side compound file"); + System.err.println(e.getMessage()); + System.exit(1); + } + if (cmapper.getNumberOfSkippedEntries() > 0) + System.err.println(cmapper.getNumberOfSkippedEntries() + " side compounds not found in network."); + + for(BioMetabolite sc : sideCpds){ + network.removeOnCascade(sc); + } + System.err.println(sideCpds.size() + " side compounds removed from network."); + } + + //irrelevant reaction removal [optional] + if (inputReactions != null) { + BioCollection<BioReaction> sideRxns = new BioCollection<>(); + System.err.println("removing side reaction..."); + Mapper<BioReaction> rmapper = new Mapper<>(network, BioNetwork::getReactionsView).skipIfNotFound(); + + try { + sideRxns = rmapper.map(inputReactions); + } catch (IOException e) { + System.err.println("Error while reading the irrelevant reactions file"); + System.err.println(e.getMessage()); + System.exit(1); + } + if (rmapper.getNumberOfSkippedEntries() > 0) + System.err.println(rmapper.getNumberOfSkippedEntries() + " reactions not found in network."); + + for(BioReaction r : sideRxns){ + network.removeOnCascade(r); + } + System.err.println(sideRxns.size() + " irrelevant reactions removed from network."); + } + + //exchange reaction removal + if(exchangeCompToRemove!=null){ + System.err.println("removing external compartment..."); + BioCompartment exchange = network.getCompartment(exchangeCompToRemove); + if(exchange==null){ + System.err.println("Exchange compartment not found, please check provided identifier"); + }else{ + int n = 0; + for (BioEntity e : exchange.getComponentsView()){ + network.removeOnCascade(e); + n++; + } + System.err.println(n + " external species removed from network."); + } + } + + //merge compartment + BioNetwork newNetwork; + if (mergingStrat == strategy.by_id) { + System.err.print("Merging compartments..."); + CompartmentMerger merger = new CompartmentMerger() + .setGetUniqIdFunction(c -> c.getId().substring(0,c.getId().length()-2)) + .setCompoundMergeFunction((l -> { + BioMetabolite oldComp = l.get(0); + return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); + })); + newNetwork = merger.merge(network); + System.err.println(" Done."); + }else if (mergingStrat != strategy.by_name) { + System.err.print("Merging compartments..."); + CompartmentMerger merger = new CompartmentMerger() + .setGetUniqIdFunction(BioMetabolite::getName); + newNetwork = merger.merge(network); + System.err.println(" Done."); + }else{ + newNetwork = network; + } + + //print info + System.out.println("\tcompartments:\t"+newNetwork.getCompartmentsView().size()); + System.out.println("\tmetabolites:\t"+newNetwork.getMetabolitesView().size()); + System.out.println("\treactions:\t"+newNetwork.getReactionsView().size()); + System.out.println("\tenzymes:\t"+newNetwork.getEnzymesView().size()); + System.out.println("\tgenes:\t"+newNetwork.getGenesView().size()); + System.out.println("\tprotein:\t"+newNetwork.getProteinsView().size()); + System.out.println("\tpathway:\t"+newNetwork.getPathwaysView().size()); + + //export network + System.out.print("Exporting..."); + new JsbmlWriter(outputPath,newNetwork).write(); + System.out.println(" Done."); + return; + } + + @Override + public String getLabel() { + return this.getClass().getSimpleName(); + } + + @Override + public String getLongDescription() { + return "General SBML model processing including compound removal (such as side compounds), reaction removal (blocked or exchange reaction), and compartments merging"; + } + + @Override + public String getShortDescription() { + return "General SBML model processing"; + } +} -- GitLab From 7252b2ff1e29b7738792360bd33fa064e8c82256 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 15 Jun 2023 17:11:05 +0200 Subject: [PATCH 06/13] add option to remove reaction with closed flux bounds --- .../met4j_toolbox/convert/ProcessSBML.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java index 897bfcfab..fbe114e50 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java @@ -13,6 +13,7 @@ import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; import fr.inrae.toulouse.metexplore.met4j_graph.io.ExportGraph; import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.annotations.reaction.ReactionAttributes; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser; @@ -58,6 +59,9 @@ public class ProcessSBML extends AbstractMet4jApplication { @Option(name = "-o", usage = "output SBML file", required = true) public String outputPath = null; + @Option(name = "-r0", aliases = {"--noFlux"}, usage = "remove reactions with lower and upper flux bounds both set to 0.0") + public boolean removeNoFlux; + enum strategy {no, by_name, by_id} @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + @@ -150,6 +154,21 @@ public class ProcessSBML extends AbstractMet4jApplication { System.err.println(sideRxns.size() + " irrelevant reactions removed from network."); } + //removal of reactions that cannot hold flux in any condition + if(removeNoFlux){ + System.err.println("removing reaction with closed flux bound..."); + BioCollection<BioReaction> toRemove = new BioCollection<>(); + for(BioReaction r : network.getReactionsView()){ + if(ReactionAttributes.getLowerBound(r).value==0.0 && + ReactionAttributes.getUpperBound(r).value==0.0){ + toRemove.add(r); + } + } + + network.removeOnCascade(toRemove); + System.err.println(toRemove.size() + " external species removed from network."); + } + //exchange reaction removal if(exchangeCompToRemove!=null){ System.err.println("removing external compartment..."); -- GitLab From ef443d4bbc1304403e1aa911bd1b5761282ed876 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 21 Jun 2023 16:38:28 +0200 Subject: [PATCH 07/13] add unit test --- .../biodata/utils/CompartmentMergerTest.java | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java new file mode 100644 index 000000000..510e32539 --- /dev/null +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java @@ -0,0 +1,129 @@ +package fr.inrae.toulouse.metexplore.met4j_core.biodata.utils; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class CompartmentMergerTest { + + + BioNetwork network; + BioReaction r1,r2,r1X,r3,rt1,rt2; + BioMetabolite a,b,c,d,aX,bX,cX,eX; + BioCompartment comp0,compX,compMerge; + BioProtein p1; + BioGene g1; + BioEnzyme e1; + + @Before + public void init() { + network = new BioNetwork(); + r1 = new BioReaction("r1"); + r2 = new BioReaction("r2"); + r1X = new BioReaction("r1X"); + r3 = new BioReaction("r3"); + rt1 = new BioReaction("rt1"); + rt2 = new BioReaction("rt2"); + network.add(r1,r2,r1X,r3,rt1,rt2); + + a = new BioMetabolite("a_0", "a"); + b = new BioMetabolite("b_0", "b"); + c = new BioMetabolite("c_0", "c"); + d = new BioMetabolite("d_0", "d"); + aX = new BioMetabolite("a_X", "a"); + bX = new BioMetabolite("b_X", "b"); + cX = new BioMetabolite("c_X", "c"); + eX = new BioMetabolite("d_X", "e"); + network.add(a,b,c,d,aX,bX,cX,eX); + comp0 = new BioCompartment("0"); + compX = new BioCompartment("X"); + compMerge = new BioCompartment("merge"); + network.add(comp0,compX); + network.affectToCompartment(comp0, a, b, c, d); + network.affectToCompartment(compX, aX, bX, cX, eX); + + network.affectLeft(r1, 2.0, comp0, a); + network.affectRight(r1, 1.0, comp0, b); + network.affectRight(r1, 1.0, comp0, c); + r1.setReversible(false); + + network.affectLeft(r1X, 2.0, compX, aX); + network.affectRight(r1X, 1.0, compX, bX); + network.affectRight(r1X, 1.0, compX, cX); + r1X.setReversible(false); + + network.affectLeft(r2, 1.0, comp0, c); + network.affectRight(r2, 1.0, comp0, d); + r2.setReversible(false); + + network.affectLeft(r3, 1.0, compX, cX); + network.affectRight(r3, 1.0, compX, eX); + r3.setReversible(false); + + network.affectLeft(rt1, 1.0, comp0, a); + network.affectRight(rt1, 1.0, compX, aX); + rt1.setReversible(true); + + network.affectLeft(rt2, 1.0, comp0, c); + network.affectRight(rt2, 1.0, compX, cX); + rt2.setReversible(true); + + + e1 = new BioEnzyme("e1"); + network.add(e1); + p1 = new BioProtein("p1"); + network.add(p1); + g1 = new BioGene("g1", "G1"); + network.add(g1); + network.affectGeneProduct(p1, g1); + network.affectSubUnit(e1, 1.0, p1); + network.affectEnzyme(r3, e1); + + } + + @Test + public void testMerge() { + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .setGetUniqIdFunction(BioMetabolite::getName); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } + + public void testMergeII() { + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .setGetUniqIdFunction(c -> c.getId().substring(0,c.getId().length()-2)) + .setCompoundMergeFunction((l -> { + BioMetabolite oldComp = l.get(0); + return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); + })); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("a")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("b")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("c")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("d")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("e")); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } +} -- GitLab From ea6b145e28167b963c907e54a98114362a680464 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 21 Jun 2023 17:31:55 +0200 Subject: [PATCH 08/13] add utilities for "by id" merging using palsson convention or base-id extraction regex --- .../biodata/utils/CompartmentMerger.java | 45 ++++++++++++++++++- .../biodata/utils/CompartmentMergerTest.java | 28 +++++++++--- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java index c661c21cb..5a813f3c1 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -5,6 +5,8 @@ import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; import java.util.*; import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -38,7 +40,8 @@ public class CompartmentMerger { } /** - * Fluent builder setting the function that provides the criterion used for identifying same compounds over multiple compartments + * Fluent builder setting the function that provides the criterion used for identifying same compounds over multiple compartments, + * using a custom function provided as argument. * Default use "name" as common field for the same compound over multiple compartments * @param uniqIdFunction the function * @return a CompartmentMerger instance @@ -48,6 +51,46 @@ public class CompartmentMerger { return this; } + /** + * Fluent builder setting both functions that provides the criterion used for identifying same compounds over multiple compartments, + * and that creates a unique representative for such compounds, using a common identifier convention. + * This will strip the two last characters from compound identifiers to create shared ids that will be used in final merged network. + * This should be used for SBML that use the naming convention "xxx_y" for compounds, where xxx is the base identifier and y is the compound identifier (single letter). + * @return a CompartmentMerger instance + */ + public CompartmentMerger usePalssonIdentifierConvention() { + this.getUniqIdFunction = c -> c.getId().substring(0,c.getId().length()-2); + this.pickFunction = (l -> { + BioMetabolite oldComp = l.get(0); + return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); + }); + return this; + } + + /** + * Fluent builder setting both functions that provides the criterion used for identifying same compounds over multiple compartments, + * and that creates a unique representative for such compounds, when compound identifiers contains explicit compartment info. + * This use a provided regex to extract a shared base identifier from compound identifiers, and used it in final merged network. + * This should be used for SBML that use a compound identifier convention containing a base identifier and a compartment suffix/prefix, + * such as "xxx_y" (regex "^(\\w+)_\\w$") "xxx[y]" or "xxx-yyy", where xxx is the base identifier and y is the compound identifier. + * @return a CompartmentMerger instance + */ + public CompartmentMerger useBaseIdentifierRegex(String regex) { + this.getUniqIdFunction = (v ->{ + String id = v.getId(); + Matcher m = Pattern.compile(regex).matcher(id); + if(m.matches()) id=m.group(1); + return id;}); + this.pickFunction = (l -> { + BioMetabolite oldComp = l.get(0); + String id = oldComp.getId(); + Matcher m = Pattern.compile(regex).matcher(id); + if(m.matches()) id=m.group(1); + return new BioMetabolite(oldComp,id); + }); + return this; + } + /** * Fluent builder setting the function that select or create a unique representative from a group of compound instances * default return compound from list with first id in alphabetical order. diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java index 510e32539..734d41c8e 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java @@ -105,11 +105,7 @@ public class CompartmentMergerTest { CompartmentMerger merger = new CompartmentMerger() .setNewNetworkName("myNewName") .setUniqCompartment(compMerge) - .setGetUniqIdFunction(c -> c.getId().substring(0,c.getId().length()-2)) - .setCompoundMergeFunction((l -> { - BioMetabolite oldComp = l.get(0); - return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); - })); + .usePalssonIdentifierConvention(); BioNetwork newNetwork = merger.merge(network); assertEquals("Error while setting new name","myNewName",newNetwork.getName()); @@ -126,4 +122,26 @@ public class CompartmentMergerTest { assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); } + + public void testMergeIII() { + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .useBaseIdentifierRegex("^(\\w+)_\\w$"); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("a")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("b")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("c")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("d")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("e")); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } } -- GitLab From 0a171c65a0ef53e9b2e74c0288d05841dc7bf364 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 21 Jun 2023 18:46:49 +0200 Subject: [PATCH 09/13] fix tests --- .../met4j_core/biodata/utils/CompartmentMergerTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java index 734d41c8e..6f31b2088 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java @@ -35,7 +35,7 @@ public class CompartmentMergerTest { aX = new BioMetabolite("a_X", "a"); bX = new BioMetabolite("b_X", "b"); cX = new BioMetabolite("c_X", "c"); - eX = new BioMetabolite("d_X", "e"); + eX = new BioMetabolite("e_X", "e"); network.add(a,b,c,d,aX,bX,cX,eX); comp0 = new BioCompartment("0"); compX = new BioCompartment("X"); @@ -85,6 +85,7 @@ public class CompartmentMergerTest { @Test public void testMerge() { + a.setName("a"); CompartmentMerger merger = new CompartmentMerger() .setNewNetworkName("myNewName") .setUniqCompartment(compMerge) @@ -101,7 +102,9 @@ public class CompartmentMergerTest { assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); } + @Test public void testMergeII() { + a.setName("notA");//break default merging strategy CompartmentMerger merger = new CompartmentMerger() .setNewNetworkName("myNewName") .setUniqCompartment(compMerge) @@ -123,7 +126,9 @@ public class CompartmentMergerTest { assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); } + @Test public void testMergeIII() { + a.setName("notA");//break default merging strategy CompartmentMerger merger = new CompartmentMerger() .setNewNetworkName("myNewName") .setUniqCompartment(compMerge) @@ -144,4 +149,5 @@ public class CompartmentMergerTest { assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); } + } -- GitLab From 967182cd6b6e2b2b17fe7ee492690e0e5091ef33 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 22 Jun 2023 11:59:56 +0200 Subject: [PATCH 10/13] add option to handle GPR in reaction redundance check + add method for removal of all redundant reaction in BioNetworkUtils --- .../biodata/utils/BioNetworkUtils.java | 21 +++++++++++ .../biodata/utils/BioReactionUtils.java | 28 +++++++++++---- .../biodata/utils/BioReactionUtilsTest.java | 35 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java index 30d311a5e..bf66a9cbb 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java @@ -104,6 +104,27 @@ public class BioNetworkUtils { } } + /** + * Remove from a network all duplicated reactions + * + * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} + * @param checkSameGPR if reactions should be considered non-redundant if they share same reactants but have different GPR + */ + public static void removeDuplicatedReactions(@NonNull BioNetwork network, boolean checkSameGPR) { + ArrayList<BioReaction> reactions = new ArrayList<>(network.getReactionsView()); + BioCollection<BioReaction> toRemove = new BioCollection<>(); + for (int i = 0; i < reactions.size(); i++){ + for (int j = i + 1; j < reactions.size(); j++) { + BioReaction r1 = reactions.get(i); + BioReaction r2 = reactions.get(j); + if(BioReactionUtils.areRedundant(network,r1,r2,checkSameGPR)){ + toRemove.add(r1); + } + } + } + network.removeOnCascade(toRemove); + } + public static void deepCopy(BioNetwork networkIn, BioNetwork networkOut) { deepCopy(networkIn, networkOut, true, false); } diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java index 4dafe8eea..8f9921e72 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java @@ -59,10 +59,11 @@ public class BioReactionUtils { * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} * @param r1 a first {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} * @param r2 a second {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} - * @return true if the substrates and the products have the same id + * @param checkSameGPR if reactions should be considered non-redundant if they share same reactants but have different GPR + * @return true if the two reactions are redundant (same reactants and optionally same GPR) * @throws java.lang.IllegalArgumentException if one of the reaction is not in the network */ - public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2) { + public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2, boolean checkSameGPR) { if (!network.contains(r1)) { @@ -88,18 +89,33 @@ public class BioReactionUtils { rightR1.containsAll(rightR2) && rightR2.containsAll(rightR1); - if (!r1.isReversible()) { - return flag1; - } else { + if (r1.isReversible()) { Boolean flag2 = rightR1.containsAll(leftR2) && leftR2.containsAll(rightR1) && leftR1.containsAll(rightR2) && rightR2.containsAll(leftR1); - return flag1 || flag2; + flag1 = (flag1 || flag2); + } + + if(flag1 && checkSameGPR){ + return BioReactionUtils.getGPR(network, r1).equals(BioReactionUtils.getGPR(network, r2)); } + return flag1; + } + /** + * Comparison of two reactions + * + * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} + * @param r1 a first {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} + * @param r2 a second {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} + * @return true if the substrates and the products have the same id + * @throws java.lang.IllegalArgumentException if one of the reaction is not in the network + */ + public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2) { + return areRedundant(network, r1, r2, false); } diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java index 92bd22a50..bd031882d 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java @@ -121,6 +121,41 @@ public class BioReactionUtilsTest { } + /** + * Test method for + * {@link BioReactionUtils#areRedundant(BioNetwork, BioReaction, BioReaction)}. + */ + @Test + public void testAreRedundantCheckGPR() { + + BioReaction r2 = new BioReaction("r2"); + r2.setReversible(false); + network.add(r2); + + network.affectLeft(r2, 1.0, c, m1); + network.affectRight(r2, 2.0, c, m2); + network.affectRight(r2, 1.5, c, m3); + + network.affectGeneProduct(p1, g1); + network.affectGeneProduct(p2, g1); + network.affectGeneProduct(p3, g1); + + network.affectSubUnit(e1, 1.0, p1); + network.affectSubUnit(e1, 1.0, p2); + network.affectSubUnit(e2, 1.0, p3); + + network.affectEnzyme(r1, e1); + network.affectEnzyme(r1, e2); + + assertTrue("r1 and r2 must be identified as redundant", BioReactionUtils.areRedundant(network, r1, r2, false)); + assertFalse("r1 and r2 must be identified as not redundant, considering GPR", BioReactionUtils.areRedundant(network, r1, r2, true)); + + network.affectEnzyme(r2, e1); + network.affectEnzyme(r2, e2); + assertTrue("r1 and r2 must be identified as redundant, considering GPR", BioReactionUtils.areRedundant(network, r1, r2, true)); + + } + @Test public void testAreRedundantReversible() { -- GitLab From cfb6cc74a7ee9289b229fa4795cd2e605ddeaf3c Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 22 Jun 2023 13:21:23 +0200 Subject: [PATCH 11/13] add isolated node removal and duplicated reaction removal options in ProcessSBML --- .../met4j_toolbox/convert/ProcessSBML.java | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java index fbe114e50..0dd746251 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java @@ -2,6 +2,7 @@ package fr.inrae.toulouse.metexplore.met4j_toolbox.convert; import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.BioNetworkUtils; import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.CompartmentMerger; import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.*; import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.EdgeMerger; @@ -49,6 +50,9 @@ public class ProcessSBML extends AbstractMet4jApplication { @Option(name = "-rc", usage = "remove compounds from input identifier file", required = false) public String inputSide = null; + @Option(name = "-ric", aliases = {"--noIsolated"}, usage = "remove isolated compounds (not involved in any reaction)") + public boolean removeIsolated; + @ParameterType(name = EnumParameterTypes.InputFile) @Format(name = EnumFormats.Txt) @Option(name = "-rr", usage = "remove reaction from input identifier file", required = false) @@ -67,7 +71,9 @@ public class ProcessSBML extends AbstractMet4jApplication { @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).") public strategy mergingStrat = strategy.no; - public String idRegex = "^(\\w+)_\\w$"; + + @Option(name = "-rdr", aliases = {"--noDuplicated"}, usage = "remove duplicated reactions (same reactants, same GPR)") + public boolean removeDuplicated; @Option(name = "-rEX", aliases = {"--removeExchange"}, usage = "remove exchange reactions and species from given exchange compartment identifier", required = false) @@ -185,16 +191,19 @@ public class ProcessSBML extends AbstractMet4jApplication { } } + + //remove compounds not in any reactions + if(removeIsolated){ + System.err.println("removing isolated compounds..."); + BioNetworkUtils.removeNotConnectedMetabolites(network); + } + //merge compartment BioNetwork newNetwork; if (mergingStrat == strategy.by_id) { System.err.print("Merging compartments..."); CompartmentMerger merger = new CompartmentMerger() - .setGetUniqIdFunction(c -> c.getId().substring(0,c.getId().length()-2)) - .setCompoundMergeFunction((l -> { - BioMetabolite oldComp = l.get(0); - return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); - })); + .usePalssonIdentifierConvention(); newNetwork = merger.merge(network); System.err.println(" Done."); }else if (mergingStrat != strategy.by_name) { @@ -207,6 +216,12 @@ public class ProcessSBML extends AbstractMet4jApplication { newNetwork = network; } + //remove duplicated reactions + if(removeDuplicated){ + System.err.println("removing duplicated reactions..."); + BioNetworkUtils.removeDuplicatedReactions(newNetwork,true); + } + //print info System.out.println("\tcompartments:\t"+newNetwork.getCompartmentsView().size()); System.out.println("\tmetabolites:\t"+newNetwork.getMetabolitesView().size()); @@ -230,7 +245,7 @@ public class ProcessSBML extends AbstractMet4jApplication { @Override public String getLongDescription() { - return "General SBML model processing including compound removal (such as side compounds), reaction removal (blocked or exchange reaction), and compartments merging"; + return "General SBML model processing including compound removal (such as side compounds or isolated compounds), reaction removal (ex. blocked or exchange reaction), and compartments merging"; } @Override -- GitLab From 338c7facf21f4aa83fd00c8673cd6d1ab9b3ba4c Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Fri, 23 Jun 2023 15:36:49 +0200 Subject: [PATCH 12/13] Optimize Redundant reactions removal + add test --- .../biodata/utils/BioNetworkUtils.java | 20 +++++++-------- .../biodata/utils/BioNetworkUtilsTest.java | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java index bf66a9cbb..6753f34f6 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java @@ -111,17 +111,17 @@ public class BioNetworkUtils { * @param checkSameGPR if reactions should be considered non-redundant if they share same reactants but have different GPR */ public static void removeDuplicatedReactions(@NonNull BioNetwork network, boolean checkSameGPR) { - ArrayList<BioReaction> reactions = new ArrayList<>(network.getReactionsView()); - BioCollection<BioReaction> toRemove = new BioCollection<>(); - for (int i = 0; i < reactions.size(); i++){ - for (int j = i + 1; j < reactions.size(); j++) { - BioReaction r1 = reactions.get(i); - BioReaction r2 = reactions.get(j); - if(BioReactionUtils.areRedundant(network,r1,r2,checkSameGPR)){ - toRemove.add(r1); - } - } + //1- for each reaction, create an id from equation and, optionally, GPR + //2- put id-reaction pairs in map, each new reaction overrides its duplicates, if any + //3- remove from network all reactions not in map + HashMap<String,BioReaction> indexedReaction = new HashMap<>(); + BioCollection<BioReaction> toRemove = new BioCollection<>(network.getReactionsView()); + for (BioReaction r : network.getReactionsView()){ + String uniqId = BioReactionUtils.getEquation(r,false,true); + if(checkSameGPR) uniqId = uniqId+BioReactionUtils.getGPR(network,r); + indexedReaction.put(uniqId,r); } + toRemove.removeAll(indexedReaction.values()); network.removeOnCascade(toRemove); } diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java index bc9c6f2eb..3220ce16e 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java @@ -94,6 +94,31 @@ public class BioNetworkUtilsTest { } + @Test + public void testRemoveDuplicatedReactions(){ + BioNetwork network = miniNetwork(); + BioReaction r3 = new BioReaction("R3"); + network.add(r3); + network.affectLeft(r3, 2.0, c1, m1); + network.affectRight(r3, 1.0, c1, m2); + + BioNetworkUtils.removeDuplicatedReactions(network,false); + assertFalse("Duplicated reaction not removed",network.containsReaction("R3")); + + r3 = new BioReaction("R3"); + network.add(r3); + network.affectLeft(r3, 2.0, c1, m1); + network.affectRight(r3, 1.0, c1, m2); + + BioNetworkUtils.removeDuplicatedReactions(network,true); + assertTrue("Non-duplicated reaction (considering GPR) removed",network.containsReaction("R3")); + + network.affectEnzyme(r3, enzyme1); + + BioNetworkUtils.removeDuplicatedReactions(network,true); + assertFalse("Duplicated reaction (considering GPR) not removed",network.containsReaction("R3")); + } + @Test public void removeNotConnectedMetabolites() { -- GitLab From 4adeeb81bf3ad992f9c90d0b3c9b7058c6ceb9ea Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Fri, 23 Jun 2023 15:48:03 +0200 Subject: [PATCH 13/13] rename app, clean imports, improve console output --- .../{ProcessSBML.java => SBMLwizard.java} | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) rename met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/{ProcessSBML.java => SBMLwizard.java} (88%) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java similarity index 88% rename from met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java rename to met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java index 0dd746251..a44ce464c 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/ProcessSBML.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java @@ -4,16 +4,6 @@ import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.BioNetworkUtils; import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.CompartmentMerger; -import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.*; -import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.EdgeMerger; -import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction; -import fr.inrae.toulouse.metexplore.met4j_graph.computation.utils.ComputeAdjacencyMatrix; -import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; -import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; -import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; -import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; -import fr.inrae.toulouse.metexplore.met4j_graph.io.ExportGraph; -import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; import fr.inrae.toulouse.metexplore.met4j_io.annotations.reaction.ReactionAttributes; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; @@ -24,21 +14,18 @@ import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.JsbmlWriter; import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.Met4jSbmlWriterException; import fr.inrae.toulouse.metexplore.met4j_mapping.Mapper; -import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.ExportMatrix; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; -import fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis.CompoundNet; import org.kohsuke.args4j.Option; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; -public class ProcessSBML extends AbstractMet4jApplication { +public class SBMLwizard extends AbstractMet4jApplication { @Format(name = EnumFormats.Sbml) @ParameterType(name = EnumParameterTypes.InputFile) @@ -81,7 +68,7 @@ public class ProcessSBML extends AbstractMet4jApplication { public static void main(String[] args) throws Met4jSbmlWriterException, IOException { - ProcessSBML app = new ProcessSBML(); + SBMLwizard app = new SBMLwizard(); app.parseArguments(args); @@ -108,13 +95,13 @@ public class ProcessSBML extends AbstractMet4jApplication { System.out.println(" Done."); //print info - System.out.println("\tcompartments:\t"+network.getCompartmentsView().size()); + System.out.println("\n\n\tcompartments:\t"+network.getCompartmentsView().size()); System.out.println("\tmetabolites:\t"+network.getMetabolitesView().size()); System.out.println("\treactions:\t"+network.getReactionsView().size()); System.out.println("\tenzymes:\t"+network.getEnzymesView().size()); System.out.println("\tgenes:\t"+network.getGenesView().size()); System.out.println("\tprotein:\t"+network.getProteinsView().size()); - System.out.println("\tpathway:\t"+network.getPathwaysView().size()); + System.out.println("\tpathway:\t"+network.getPathwaysView().size()+"\n\n"); //side compound removal [optional] if (inputSide != null) { @@ -172,7 +159,7 @@ public class ProcessSBML extends AbstractMet4jApplication { } network.removeOnCascade(toRemove); - System.err.println(toRemove.size() + " external species removed from network."); + System.err.println(toRemove.size() + " \"closed\" reactions removed from network."); } //exchange reaction removal @@ -195,7 +182,9 @@ public class ProcessSBML extends AbstractMet4jApplication { //remove compounds not in any reactions if(removeIsolated){ System.err.println("removing isolated compounds..."); + int n = network.getMetabolitesView().size(); BioNetworkUtils.removeNotConnectedMetabolites(network); + System.err.println((n-network.getMetabolitesView().size())+" isolated compounds removed from network."); } //merge compartment @@ -219,17 +208,19 @@ public class ProcessSBML extends AbstractMet4jApplication { //remove duplicated reactions if(removeDuplicated){ System.err.println("removing duplicated reactions..."); + int n = network.getReactionsView().size(); BioNetworkUtils.removeDuplicatedReactions(newNetwork,true); + System.err.println((n-network.getMetabolitesView().size())+" duplicated reactions removed from network."); } //print info - System.out.println("\tcompartments:\t"+newNetwork.getCompartmentsView().size()); + System.out.println("\n\n\tcompartments:\t"+newNetwork.getCompartmentsView().size()); System.out.println("\tmetabolites:\t"+newNetwork.getMetabolitesView().size()); System.out.println("\treactions:\t"+newNetwork.getReactionsView().size()); System.out.println("\tenzymes:\t"+newNetwork.getEnzymesView().size()); System.out.println("\tgenes:\t"+newNetwork.getGenesView().size()); System.out.println("\tprotein:\t"+newNetwork.getProteinsView().size()); - System.out.println("\tpathway:\t"+newNetwork.getPathwaysView().size()); + System.out.println("\tpathway:\t"+newNetwork.getPathwaysView().size()+"\n\n"); //export network System.out.print("Exporting..."); -- GitLab