From 28e5f234295c05fbea9de7982013bd85df8d1e27 Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 27 Jun 2025 12:55:10 -0700 Subject: [PATCH 1/7] Added fast gapfilling flag. All tests pass with fast_gapfill=True --- src/cobra/flux_analysis/gapfilling.py | 21 +++++- tests/test_flux_analysis/test_gapfilling.py | 84 +++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/cobra/flux_analysis/gapfilling.py b/src/cobra/flux_analysis/gapfilling.py index 0f572bf75..703887793 100644 --- a/src/cobra/flux_analysis/gapfilling.py +++ b/src/cobra/flux_analysis/gapfilling.py @@ -67,6 +67,8 @@ class GapFiller: The threshold at which a value is considered non-zero (aka integrality threshold). If gapfilled models fail to validate, you may want to lower this value (default 1E-6). + fast_gapfill : bool, optional + Whether to use the fast gap filling approach (default False). Attributes ---------- @@ -110,6 +112,7 @@ def __init__( exchange_reactions: bool = False, demand_reactions: bool = True, integer_threshold: float = 1e-6, + fast_gapfill: bool = False, **kwargs, ) -> None: """Initialize a new GapFiller object. @@ -139,6 +142,8 @@ def __init__( if penalties is not None: self.penalties.update(penalties) self.indicators = [] + self.fast_gapfill = fast_gapfill + self.costs = {} self.extend_model(exchange_reactions, demand_reactions) fix_objective_as_constraint(self.model, bound=lower_bound) @@ -226,8 +231,12 @@ def add_switches_and_objective(self) -> None: for rxn in self.model.reactions: if not hasattr(rxn, "gapfilling_type"): continue + if self.fast_gapfill: + indicator_type = "continuous" + else: + indicator_type = "binary" indicator = prob.Variable( - name=f"indicator_{rxn.id}", lb=0, ub=1, type="binary" + name=f"indicator_{rxn.id}", lb=0, ub=1, type=indicator_type ) if rxn.id in self.penalties: indicator.cost = self.penalties[rxn.id] @@ -309,6 +318,8 @@ def fill(self, iterations: int = 1) -> List[List["Reaction"]]: self.update_costs() return used_reactions + + def validate(self, reactions: List["Reaction"]) -> bool: """Validate the model. @@ -343,6 +354,7 @@ def gapfill( demand_reactions: bool = True, exchange_reactions: bool = False, iterations: int = 1, + fast_gapfill: bool = False, ): """Perform gap filling on a model. @@ -375,7 +387,11 @@ def gapfill( which may include previously used reactions i.e., with enough iterations pathways including 10 steps will eventually be reported even if the shortest pathway is a single reaction (default 1). - + fast_gapfill : bool, optional + Use continuous variables for the indicator variables instead of + binary variables. This can speed up the gap filling but may lead to + suboptimal solutions. If you want to use this, you should also + consider increasing the number of iterations (default False). Returns ------- list of list of cobra.Reaction @@ -402,5 +418,6 @@ def gapfill( penalties=penalties, demand_reactions=demand_reactions, exchange_reactions=exchange_reactions, + fast_gapfill=fast_gapfill ) return gapfiller.fill(iterations=iterations) diff --git a/tests/test_flux_analysis/test_gapfilling.py b/tests/test_flux_analysis/test_gapfilling.py index 127def917..41a8e32a4 100644 --- a/tests/test_flux_analysis/test_gapfilling.py +++ b/tests/test_flux_analysis/test_gapfilling.py @@ -87,3 +87,87 @@ def test_gapfilling(salmonella: Model) -> None: solution = gf.fill() assert "TKT2" not in {r.id for r in solution[0]} assert gf.validate(solution[0]) + +def test_fast_gapfilling(salmonella: Model) -> None: + """Test Gapfilling.""" + m = Model() + m.add_metabolites([Metabolite(m_id) for m_id in ["a", "b", "c"]]) + exa = Reaction("EX_a") + exa.add_metabolites({m.metabolites.a: 1}) + b2c = Reaction("b2c") + b2c.add_metabolites({m.metabolites.b: -1, m.metabolites.c: 1}) + dmc = Reaction("DM_c") + dmc.add_metabolites({m.metabolites.c: -1}) + m.add_reactions([exa, b2c, dmc]) + m.objective = "DM_c" + + universal = Model() + a2b = Reaction("a2b") + a2d = Reaction("a2d") + universal.add_reactions([a2b, a2d]) + a2b.build_reaction_from_string("a --> b", verbose=False) + a2d.build_reaction_from_string("a --> d", verbose=False) + + # # GrowMatch + # result = gapfilling.growMatch(m, universal)[0] + result = gapfill(m, universal, fast_gapfill=True)[0] + assert len(result) == 1 + assert result[0].id == "a2b" + + # # SMILEY + # result = gapfilling.SMILEY(m, "b", universal)[0] + with m: + m.objective = m.add_boundary(m.metabolites.b, type="demand") + result = gapfill(m, universal, fast_gapfill=True)[0] + assert len(result) == 1 + assert result[0].id == "a2b" + + # # 2 rounds of GrowMatch with exchange reactions + # result = gapfilling.growMatch(m, None, ex_rxns=True, iterations=2) + result = gapfill(m, None, exchange_reactions=True, iterations=2, fast_gapfill=True) + assert len(result) == 2 + assert len(result[0]) == 1 + assert len(result[1]) == 1 + assert {i[0].id for i in result} == {"EX_b", "EX_c"} + + # # Gapfilling solution adds metabolites not present in original model + # test for when demand = T + # a demand reaction must be added to clear new metabolite + universal_noDM = Model() + a2b = Reaction("a2b") + universal_noDM.add_reactions([a2b]) + a2b.build_reaction_from_string("a --> b + d", verbose=False) + result = gapfill( + m, universal_noDM, exchange_reactions=False, demand_reactions=True, fast_gapfill=True + )[0] + # add reaction a2b and demand reaction to clear met d + assert len(result) == 2 + assert "a2b" in [x.id for x in result] + + # test for when demand = False + # test for when metabolites are added to the model and + # must be cleared by other reactions in universal model + # (i.e. not necessarily a demand reaction) + universal_withDM = universal_noDM.copy() + d_dm = Reaction("d_dm") + universal_withDM.add_reactions([d_dm]) + d_dm.build_reaction_from_string("d -->", verbose=False) + result = gapfill( + m, universal_withDM, exchange_reactions=False, demand_reactions=False, fast_gapfill=True + )[0] + assert len(result) == 2 + assert "a2b" in [x.id for x in result] + + # somewhat bigger model + universal = Model("universal_reactions") + with salmonella as model: + for i in [i.id for i in model.metabolites.f6p_c.reactions]: + reaction = model.reactions.get_by_id(i) + universal.add_reactions([reaction.copy()]) + model.remove_reactions([reaction]) + gf = GapFiller( + model, universal, penalties={"TKT2": 1e3}, demand_reactions=False, fast_gapfill=True + ) + solution = gf.fill() + assert "TKT2" not in {r.id for r in solution[0]} + assert gf.validate(solution[0]) \ No newline at end of file From 252508d3c787961705338e6378f2e85b2cb898ca Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 27 Jun 2025 13:46:54 -0700 Subject: [PATCH 2/7] Fast Gapfill feature added to release notes --- release-notes/next-release.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/release-notes/next-release.md b/release-notes/next-release.md index a7f5935eb..7063de1f6 100644 --- a/release-notes/next-release.md +++ b/release-notes/next-release.md @@ -2,10 +2,16 @@ ## New features +Adds a `fast_gapfill` parameter to `cobra.flux_analysis.gapfilling.gapfill()` to enable Fast Gap Filling, which is significantly faster than the default method, at the cost of some accuracy. This is based on the method described in the supplementary information of the following paper: + +> Dreyfuss, J. M., Zucker, J. D., Hood, H. M., Ocasio, L. R., Sachs, M. S., & Galagan, J. E. (2013). [Reconstruction and validation of a genome-scale metabolic model for the filamentous fungus Neurospora crassa using FARM](https://doi.org/10.1371/journal.pcbi.1003126). PLoS Computational Biology, 9(7), e1003126. https://doi.org/10.1371/journal.pcbi.1003126 + + ## Fixes Fixes failures of GPR.copy() in Python 3.13. + Fix compartment not being stored for metabolites created during reaction.build_reaction_from_string From a21114c7b212ae45ffcf0211a108fbae7e343800 Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 27 Jun 2025 15:44:25 -0700 Subject: [PATCH 3/7] Extracted fast_gapfill check out of the reaction loop so only one check needs to happen instead of nrxns checks --- src/cobra/flux_analysis/gapfilling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cobra/flux_analysis/gapfilling.py b/src/cobra/flux_analysis/gapfilling.py index 703887793..3891edc22 100644 --- a/src/cobra/flux_analysis/gapfilling.py +++ b/src/cobra/flux_analysis/gapfilling.py @@ -228,13 +228,14 @@ def add_switches_and_objective(self) -> None: constraints = [] big_m = max(max(abs(b) for b in r.bounds) for r in self.model.reactions) prob = self.model.problem + if self.fast_gapfill: + indicator_type = "continuous" + else: + indicator_type = "binary" for rxn in self.model.reactions: if not hasattr(rxn, "gapfilling_type"): continue - if self.fast_gapfill: - indicator_type = "continuous" - else: - indicator_type = "binary" + indicator = prob.Variable( name=f"indicator_{rxn.id}", lb=0, ub=1, type=indicator_type ) From 17a1677023a27461e97c3fc145831d3a83bffca0 Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 1 Aug 2025 21:27:18 -0700 Subject: [PATCH 4/7] Black has reformatted gapfilling.py --- src/cobra/flux_analysis/gapfilling.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/cobra/flux_analysis/gapfilling.py b/src/cobra/flux_analysis/gapfilling.py index 3891edc22..e899f1ce6 100644 --- a/src/cobra/flux_analysis/gapfilling.py +++ b/src/cobra/flux_analysis/gapfilling.py @@ -68,7 +68,7 @@ class GapFiller: integrality threshold). If gapfilled models fail to validate, you may want to lower this value (default 1E-6). fast_gapfill : bool, optional - Whether to use the fast gap filling approach (default False). + Whether to use the fast gap filling approach (default False). Attributes ---------- @@ -143,7 +143,7 @@ def __init__( self.penalties.update(penalties) self.indicators = [] self.fast_gapfill = fast_gapfill - + self.costs = {} self.extend_model(exchange_reactions, demand_reactions) fix_objective_as_constraint(self.model, bound=lower_bound) @@ -235,7 +235,7 @@ def add_switches_and_objective(self) -> None: for rxn in self.model.reactions: if not hasattr(rxn, "gapfilling_type"): continue - + indicator = prob.Variable( name=f"indicator_{rxn.id}", lb=0, ub=1, type=indicator_type ) @@ -319,8 +319,6 @@ def fill(self, iterations: int = 1) -> List[List["Reaction"]]: self.update_costs() return used_reactions - - def validate(self, reactions: List["Reaction"]) -> bool: """Validate the model. @@ -419,6 +417,6 @@ def gapfill( penalties=penalties, demand_reactions=demand_reactions, exchange_reactions=exchange_reactions, - fast_gapfill=fast_gapfill + fast_gapfill=fast_gapfill, ) return gapfiller.fill(iterations=iterations) From ab16eac4ecdb0cea34687ef682d90fb3db803914 Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 1 Aug 2025 21:33:37 -0700 Subject: [PATCH 5/7] flake8 passes --- tests/test_flux_analysis/test_gapfilling.py | 24 ++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/test_flux_analysis/test_gapfilling.py b/tests/test_flux_analysis/test_gapfilling.py index 41a8e32a4..65ac3e42a 100644 --- a/tests/test_flux_analysis/test_gapfilling.py +++ b/tests/test_flux_analysis/test_gapfilling.py @@ -88,6 +88,7 @@ def test_gapfilling(salmonella: Model) -> None: assert "TKT2" not in {r.id for r in solution[0]} assert gf.validate(solution[0]) + def test_fast_gapfilling(salmonella: Model) -> None: """Test Gapfilling.""" m = Model() @@ -138,7 +139,11 @@ def test_fast_gapfilling(salmonella: Model) -> None: universal_noDM.add_reactions([a2b]) a2b.build_reaction_from_string("a --> b + d", verbose=False) result = gapfill( - m, universal_noDM, exchange_reactions=False, demand_reactions=True, fast_gapfill=True + m, + universal_noDM, + exchange_reactions=False, + demand_reactions=True, + fast_gapfill=True, )[0] # add reaction a2b and demand reaction to clear met d assert len(result) == 2 @@ -153,7 +158,11 @@ def test_fast_gapfilling(salmonella: Model) -> None: universal_withDM.add_reactions([d_dm]) d_dm.build_reaction_from_string("d -->", verbose=False) result = gapfill( - m, universal_withDM, exchange_reactions=False, demand_reactions=False, fast_gapfill=True + m, + universal_withDM, + exchange_reactions=False, + demand_reactions=False, + fast_gapfill=True, )[0] assert len(result) == 2 assert "a2b" in [x.id for x in result] @@ -161,13 +170,18 @@ def test_fast_gapfilling(salmonella: Model) -> None: # somewhat bigger model universal = Model("universal_reactions") with salmonella as model: - for i in [i.id for i in model.metabolites.f6p_c.reactions]: + f6p_rxns = [i.id for i in model.metabolites.f6p_c.reactions] + for i in f6p_rxns: reaction = model.reactions.get_by_id(i) universal.add_reactions([reaction.copy()]) model.remove_reactions([reaction]) gf = GapFiller( - model, universal, penalties={"TKT2": 1e3}, demand_reactions=False, fast_gapfill=True + model, + universal, + penalties={"TKT2": 1e3}, + demand_reactions=False, + fast_gapfill=True, ) solution = gf.fill() assert "TKT2" not in {r.id for r in solution[0]} - assert gf.validate(solution[0]) \ No newline at end of file + assert gf.validate(solution[0]) From ca0ed139aa3ea6775ccf149bb91af1c26e71e6b0 Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Fri, 1 Aug 2025 21:35:47 -0700 Subject: [PATCH 6/7] Removed unnecessary list comprehension --- tests/test_flux_analysis/test_gapfilling.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_flux_analysis/test_gapfilling.py b/tests/test_flux_analysis/test_gapfilling.py index 65ac3e42a..f7cbc0063 100644 --- a/tests/test_flux_analysis/test_gapfilling.py +++ b/tests/test_flux_analysis/test_gapfilling.py @@ -170,8 +170,7 @@ def test_fast_gapfilling(salmonella: Model) -> None: # somewhat bigger model universal = Model("universal_reactions") with salmonella as model: - f6p_rxns = [i.id for i in model.metabolites.f6p_c.reactions] - for i in f6p_rxns: + for i in [i.id for i in model.metabolites.f6p_c.reactions]: reaction = model.reactions.get_by_id(i) universal.add_reactions([reaction.copy()]) model.remove_reactions([reaction]) From c0290c79d346c93320bc69da92566265582b083c Mon Sep 17 00:00:00 2001 From: "Zucker, Jeremy D" Date: Sat, 2 Aug 2025 22:02:47 -0700 Subject: [PATCH 7/7] Added a pytest.skip to test_remote_load if in a CI environment --- tests/test_io/test_web/test_load.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_io/test_web/test_load.py b/tests/test_io/test_web/test_load.py index c520e262b..5396fed8d 100644 --- a/tests/test_io/test_web/test_load.py +++ b/tests/test_io/test_web/test_load.py @@ -71,7 +71,6 @@ def test_unknown_model() -> None: with pytest.raises(RuntimeError): load_model("MODELWHO?", cache=False) - @pytest.mark.parametrize( "model_id, num_metabolites, num_reactions", [("e_coli_core", 72, 95), ("BIOMD0000000633", 50, 35)], @@ -89,6 +88,9 @@ def test_remote_load(model_id: str, num_metabolites: int, num_reactions: int) -> The total number of reactions in the model having ID `model_id`. """ + if os.getenv("CI") == "true": + pytest.skip("Skipping remote load tests on CI") + model = load_model(model_id, cache=False) assert len(model.metabolites) == num_metabolites assert len(model.reactions) == num_reactions