diff --git a/release-notes/next-release.md b/release-notes/next-release.md index a7f5935eb..7063de1f6 100644 --- a/release-notes/next-release.md +++ b/release-notes/next-release.md @@ -2,10 +2,16 @@ ## New features +Adds a `fast_gapfill` parameter to `cobra.flux_analysis.gapfilling.gapfill()` to enable Fast Gap Filling, which is significantly faster than the default method, at the cost of some accuracy. This is based on the method described in the supplementary information of the following paper: + +> Dreyfuss, J. M., Zucker, J. D., Hood, H. M., Ocasio, L. R., Sachs, M. S., & Galagan, J. E. (2013). [Reconstruction and validation of a genome-scale metabolic model for the filamentous fungus Neurospora crassa using FARM](https://doi.org/10.1371/journal.pcbi.1003126). PLoS Computational Biology, 9(7), e1003126. https://doi.org/10.1371/journal.pcbi.1003126 + + ## Fixes Fixes failures of GPR.copy() in Python 3.13. + Fix compartment not being stored for metabolites created during reaction.build_reaction_from_string diff --git a/src/cobra/flux_analysis/gapfilling.py b/src/cobra/flux_analysis/gapfilling.py index 0f572bf75..e899f1ce6 100644 --- a/src/cobra/flux_analysis/gapfilling.py +++ b/src/cobra/flux_analysis/gapfilling.py @@ -67,6 +67,8 @@ class GapFiller: The threshold at which a value is considered non-zero (aka integrality threshold). If gapfilled models fail to validate, you may want to lower this value (default 1E-6). + fast_gapfill : bool, optional + Whether to use the fast gap filling approach (default False). Attributes ---------- @@ -110,6 +112,7 @@ def __init__( exchange_reactions: bool = False, demand_reactions: bool = True, integer_threshold: float = 1e-6, + fast_gapfill: bool = False, **kwargs, ) -> None: """Initialize a new GapFiller object. @@ -139,6 +142,8 @@ def __init__( if penalties is not None: self.penalties.update(penalties) self.indicators = [] + self.fast_gapfill = fast_gapfill + self.costs = {} self.extend_model(exchange_reactions, demand_reactions) fix_objective_as_constraint(self.model, bound=lower_bound) @@ -223,11 +228,16 @@ def add_switches_and_objective(self) -> None: constraints = [] big_m = max(max(abs(b) for b in r.bounds) for r in self.model.reactions) prob = self.model.problem + if self.fast_gapfill: + indicator_type = "continuous" + else: + indicator_type = "binary" for rxn in self.model.reactions: if not hasattr(rxn, "gapfilling_type"): continue + indicator = prob.Variable( - name=f"indicator_{rxn.id}", lb=0, ub=1, type="binary" + name=f"indicator_{rxn.id}", lb=0, ub=1, type=indicator_type ) if rxn.id in self.penalties: indicator.cost = self.penalties[rxn.id] @@ -343,6 +353,7 @@ def gapfill( demand_reactions: bool = True, exchange_reactions: bool = False, iterations: int = 1, + fast_gapfill: bool = False, ): """Perform gap filling on a model. @@ -375,7 +386,11 @@ def gapfill( which may include previously used reactions i.e., with enough iterations pathways including 10 steps will eventually be reported even if the shortest pathway is a single reaction (default 1). - + fast_gapfill : bool, optional + Use continuous variables for the indicator variables instead of + binary variables. This can speed up the gap filling but may lead to + suboptimal solutions. If you want to use this, you should also + consider increasing the number of iterations (default False). Returns ------- list of list of cobra.Reaction @@ -402,5 +417,6 @@ def gapfill( penalties=penalties, demand_reactions=demand_reactions, exchange_reactions=exchange_reactions, + fast_gapfill=fast_gapfill, ) return gapfiller.fill(iterations=iterations) diff --git a/tests/test_flux_analysis/test_gapfilling.py b/tests/test_flux_analysis/test_gapfilling.py index 127def917..f7cbc0063 100644 --- a/tests/test_flux_analysis/test_gapfilling.py +++ b/tests/test_flux_analysis/test_gapfilling.py @@ -87,3 +87,100 @@ def test_gapfilling(salmonella: Model) -> None: solution = gf.fill() assert "TKT2" not in {r.id for r in solution[0]} assert gf.validate(solution[0]) + + +def test_fast_gapfilling(salmonella: Model) -> None: + """Test Gapfilling.""" + m = Model() + m.add_metabolites([Metabolite(m_id) for m_id in ["a", "b", "c"]]) + exa = Reaction("EX_a") + exa.add_metabolites({m.metabolites.a: 1}) + b2c = Reaction("b2c") + b2c.add_metabolites({m.metabolites.b: -1, m.metabolites.c: 1}) + dmc = Reaction("DM_c") + dmc.add_metabolites({m.metabolites.c: -1}) + m.add_reactions([exa, b2c, dmc]) + m.objective = "DM_c" + + universal = Model() + a2b = Reaction("a2b") + a2d = Reaction("a2d") + universal.add_reactions([a2b, a2d]) + a2b.build_reaction_from_string("a --> b", verbose=False) + a2d.build_reaction_from_string("a --> d", verbose=False) + + # # GrowMatch + # result = gapfilling.growMatch(m, universal)[0] + result = gapfill(m, universal, fast_gapfill=True)[0] + assert len(result) == 1 + assert result[0].id == "a2b" + + # # SMILEY + # result = gapfilling.SMILEY(m, "b", universal)[0] + with m: + m.objective = m.add_boundary(m.metabolites.b, type="demand") + result = gapfill(m, universal, fast_gapfill=True)[0] + assert len(result) == 1 + assert result[0].id == "a2b" + + # # 2 rounds of GrowMatch with exchange reactions + # result = gapfilling.growMatch(m, None, ex_rxns=True, iterations=2) + result = gapfill(m, None, exchange_reactions=True, iterations=2, fast_gapfill=True) + assert len(result) == 2 + assert len(result[0]) == 1 + assert len(result[1]) == 1 + assert {i[0].id for i in result} == {"EX_b", "EX_c"} + + # # Gapfilling solution adds metabolites not present in original model + # test for when demand = T + # a demand reaction must be added to clear new metabolite + universal_noDM = Model() + a2b = Reaction("a2b") + universal_noDM.add_reactions([a2b]) + a2b.build_reaction_from_string("a --> b + d", verbose=False) + result = gapfill( + m, + universal_noDM, + exchange_reactions=False, + demand_reactions=True, + fast_gapfill=True, + )[0] + # add reaction a2b and demand reaction to clear met d + assert len(result) == 2 + assert "a2b" in [x.id for x in result] + + # test for when demand = False + # test for when metabolites are added to the model and + # must be cleared by other reactions in universal model + # (i.e. not necessarily a demand reaction) + universal_withDM = universal_noDM.copy() + d_dm = Reaction("d_dm") + universal_withDM.add_reactions([d_dm]) + d_dm.build_reaction_from_string("d -->", verbose=False) + result = gapfill( + m, + universal_withDM, + exchange_reactions=False, + demand_reactions=False, + fast_gapfill=True, + )[0] + assert len(result) == 2 + assert "a2b" in [x.id for x in result] + + # somewhat bigger model + universal = Model("universal_reactions") + with salmonella as model: + for i in [i.id for i in model.metabolites.f6p_c.reactions]: + reaction = model.reactions.get_by_id(i) + universal.add_reactions([reaction.copy()]) + model.remove_reactions([reaction]) + gf = GapFiller( + model, + universal, + penalties={"TKT2": 1e3}, + demand_reactions=False, + fast_gapfill=True, + ) + solution = gf.fill() + assert "TKT2" not in {r.id for r in solution[0]} + assert gf.validate(solution[0]) diff --git a/tests/test_io/test_web/test_load.py b/tests/test_io/test_web/test_load.py index c520e262b..5396fed8d 100644 --- a/tests/test_io/test_web/test_load.py +++ b/tests/test_io/test_web/test_load.py @@ -71,7 +71,6 @@ def test_unknown_model() -> None: with pytest.raises(RuntimeError): load_model("MODELWHO?", cache=False) - @pytest.mark.parametrize( "model_id, num_metabolites, num_reactions", [("e_coli_core", 72, 95), ("BIOMD0000000633", 50, 35)], @@ -89,6 +88,9 @@ def test_remote_load(model_id: str, num_metabolites: int, num_reactions: int) -> The total number of reactions in the model having ID `model_id`. """ + if os.getenv("CI") == "true": + pytest.skip("Skipping remote load tests on CI") + model = load_model(model_id, cache=False) assert len(model.metabolites) == num_metabolites assert len(model.reactions) == num_reactions