Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/cobra/flux_analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
single_reaction_deletion,
)
from .fastcc import fastcc
from .find_cyclic_reactions import find_cyclic_reactions
from .gapfilling import gapfill
from .geometric import geometric_fba
from .loopless import loopless_solution, add_loopless
Expand Down
226 changes: 226 additions & 0 deletions src/cobra/flux_analysis/find_cyclic_reactions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
"""Provides a function to find cyclic reactions in a metabolic model."""

from typing import TYPE_CHECKING, List, Optional, Tuple

import numpy as np
import optlang
from optlang.symbolics import Zero

from ..util import create_stoichiometric_matrix
from ..util import solver as sutil
from .helpers import normalize_cutoff


if TYPE_CHECKING:
from cobra import Model


def _create_find_cyclic_reactions_problem(
solver: "optlang.interface",
s_int: np.ndarray,
directions_int: np.ndarray,
zero_cutoff: float,
bound: float,
) -> Tuple["optlang.interface.Model", List["optlang.interface.Variable"]]:
"""Create an optimization problem to find cyclic reactions.

This optimization problem includes only internal reactions, assumes
steady-state constraints, and enforces reaction directions.
"""
model = solver.Model()

q_list = []
for i in range(s_int.shape[1]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't it be faster to add those to the original model and use a context? For instance the code below re-adds all the constraints from S that are already in the original model

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this function it is important to work only with internal reactions, however, the original model contains boundary reactions and additional constraints. It is harder to filter them, the easier way is just to create the new problem.

We checked that creating a new problem before solving is not a bottleneck and takes negligible time comparing with the whole function running time. Also this function is typically not called in a loop, so we won't create more than one additional problem.

q = solver.Variable(
name=f"q_{i}",
lb=bound * min(0, directions_int[i, 0]),
ub=bound * max(0, directions_int[i, 1]),
problem=model,
)
q_list.append(q)
model.add(q_list)

for idx, row in enumerate(s_int):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be more efficient to iterate over model.constrains directly and clone or modify

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I written in another reply, it is easier to create a new helper optimization problem, rather than filtering all unnecessary variables and constraints from the copy of the original problem. Also, from my experience it takes approximately the same time to copy the original problem and to create the new one.

nnz_list = np.flatnonzero(np.abs(row) > zero_cutoff)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will ignore additional constraints from model.constraints that the user may have defined.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this function, it is important that we find cyclic reactions based only on stoichiometric matrix and reaction directions. Otherwise, the implemented method does not work. This function is used for filtering reactions, so it is ok. I've added a comment to docstring about what this function exactly does.

p.s.
To satisfy all constraints (even reactions bounds, not just directions) the algorithm should be much slower. We definitely know, that in many models there are some cyclic reactions, that cannot carry any non-zero flux (if all bounds are considered), however, it is harder problem to prove that.

if len(nnz_list) == 0:
continue
constraint = solver.Constraint(Zero, lb=0, ub=0, name=f"row_{idx}")
model.add([constraint], sloppy=True)
model.constraints[f"row_{idx}"].set_linear_coefficients(
{q_list[i]: row[i] for i in nnz_list}
)

model.update()

model.objective = solver.Objective(Zero)

return model, q_list


def find_cyclic_reactions(
model: "Model",
zero_cutoff: Optional[float] = None,
bound: float = 1e4,
method: str = "optimized",
required_stop_checks_num: int = 3,
) -> Tuple[List[str], List[Tuple[bool, bool]]]:
"""Find reactions, that can be in a loop in a steady state flux distribution.

Parameters
----------
model : cobra.Model
The metabolic model to analyze.
zero_cutoff : float, optional
The cutoff value to consider a flux as zero.
The default uses the `model.tolerance` (default None).
bound : float, optional
The bound for the reaction fluxes in the optimization problem.
(default is 1e4).
method : str, optional
The method to use for finding cyclic reactions.
Options are "optimized" (default) or "basic".
See notes for details.
required_stop_checks_num : int, optional
This parameter is used only for the "optimized" method.
The number of random checks to pass to prove that all cyclic
reactions were found. (default is 3).

Returns
-------
A tuple containing two lists:
- A list of reaction IDs that can be part of a loop.
- A list of tuples indicating the possible directions of
reactions from the first list in the loop.
Each tuple contains two boolean values: (can_be_negative, can_be_positive).

Notes
-----
This function considers only the stoichiometric matrix and reaction directions.
Any other constraints present in the model are ignored.
* If a reaction is identified as cyclic, there may still be no feasible loop
when taking into account all bounds and additional constraints.
* However, if a reaction is identified as non-cyclic, it cannot participate
in any loop in a steady-state flux distribution.

The "basic" method for each reaction and direction checks if it can be a part of
a loop by optimizing linear programming problem.

The "optimized" method uses a faster randomized approach to firstly find all
reactions that can be part of a loop and then checks their directions. This method
usually works at least 2 times faster than the "basic" method.
The `required_stop_checks_num` parameter is used to descrease the probability
of missing some cyclic reactions.
"""

if method not in ["optimized", "basic"]:
raise ValueError(
"The `method` parameter must be either 'optimized' or 'basic'."
)

if required_stop_checks_num < 1:
raise ValueError(
"The `required_stop_checks_num` parameter must be greater than 0."
)

zero_cutoff = normalize_cutoff(model, zero_cutoff)

internal = [i for i, r in enumerate(model.reactions) if not r.boundary]
s_int = create_stoichiometric_matrix(model)[:, np.array(internal)]
n = s_int.shape[1]

bounds_int = np.array([model.reactions[i].bounds for i in internal])
directions_int = np.sign(bounds_int)

lp, q_list = _create_find_cyclic_reactions_problem(
model.problem, s_int, directions_int, zero_cutoff, bound
)

candidate_reactions = list(range(n))
can_positive = [False] * n
can_negative = [False] * n

if method == "optimized":
is_cyclic = [False] * n

def set_reaction_weights():
signs = 2 * np.random.randint(low=0, high=2, size=n) - 1
weights = np.random.uniform(0.5, 1.0, size=n) * signs
lp.objective.set_linear_coefficients(
{q_list[i]: weights[i] for i in range(n) if not is_cyclic[i]}
)

set_reaction_weights()
dir_order = ["min", "max"]
stop_checks_num = 0
cyclic_reactions_num = 0

while cyclic_reactions_num < n and stop_checks_num < required_stop_checks_num:
found_cyclic = False
reverse_dir_order = False

for dir in dir_order:
lp.objective.direction = dir
lp.optimize()

sutil.check_solver_status(lp.status)

if abs(lp.objective.value) > zero_cutoff:
remove_coef = {}
for i in range(n):
if not is_cyclic[i] and abs(q_list[i].primal) > zero_cutoff:
is_cyclic[i] = True
cyclic_reactions_num += 1
remove_coef[q_list[i]] = 0
if q_list[i].primal > zero_cutoff:
can_positive[i] = True
else:
can_negative[i] = True

found_cyclic = True
lp.objective.set_linear_coefficients(remove_coef)
stop_checks_num = 0
break

reverse_dir_order = True

if not found_cyclic:
set_reaction_weights()
stop_checks_num += 1

if reverse_dir_order:
dir_order.reverse()

candidate_reactions = [i for i in range(n) if is_cyclic[i]]

lp.objective = model.problem.Objective(Zero, direction="max")
for i in candidate_reactions:
for dir in ["min", "max"]:
if (dir == "min" and can_negative[i]) or (dir == "max" and can_positive[i]):
continue

if directions_int[i, int(dir == "max")] == 0:
continue

lp.objective.set_linear_coefficients(
{q_list[i]: 1.0 if dir == "max" else -1.0}
)

lp.optimize()
sutil.check_solver_status(lp.status)

if lp.objective.value > zero_cutoff:
if dir == "max":
can_positive[i] = True
else:
can_negative[i] = True

lp.objective.set_linear_coefficients({q_list[i]: 0.0})

cyclic_reactions = []
cyclic_reactions_directions = []
for i in range(n):
if can_positive[i] or can_negative[i]:
cyclic_reactions.append(model.reactions[internal[i]].id)
cyclic_reactions_directions.append((can_negative[i], can_positive[i]))

return cyclic_reactions, cyclic_reactions_directions
80 changes: 68 additions & 12 deletions src/cobra/flux_analysis/loopless.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
"""Provide functions to remove thermodynamically infeasible loops."""

from typing import TYPE_CHECKING, Dict, Optional, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Union

import numpy as np
from optlang.symbolics import Zero

from ..core import get_solution
from ..util import create_stoichiometric_matrix, nullspace
from ..util import create_stoichiometric_matrix, nullspace, nullspace_fast_snp
from .find_cyclic_reactions import find_cyclic_reactions
from .helpers import normalize_cutoff


if TYPE_CHECKING:
from cobra import Model, Reaction, Solution


def add_loopless(model: "Model", zero_cutoff: Optional[float] = None) -> None:
def add_loopless(
model: "Model",
zero_cutoff: Optional[float] = None,
method: str = "fastSNP",
reactions: Optional[List[str]] = None,
) -> None:
"""Modify a model so all feasible flux distributions are loopless.

It adds variables and constraints to a model which will disallow flux
distributions with loops. The used formulation is described in [1]_.
This function *will* modify your model.
distributions with loops. This function *will* modify your model.

The used formulation is described in [1]_. If `method` is set to
"fastSNP", it uses a faster implementation based on the Fast-SNP
algorithm [2]_.

In most cases you probably want to use the much faster
`loopless_solution`. May be used in cases where you want to add complex
Expand All @@ -35,27 +44,74 @@ def add_loopless(model: "Model", zero_cutoff: Optional[float] = None) -> None:
Cutoff used for null space. Coefficients with an absolute value
smaller than `zero_cutoff` are considered to be zero. The default
uses the `model.tolerance` (default None).
method : str, "original" or "fastSNP", optional
The method to use for finding the null space. The "original" method
uses the original method from [1]_, while "fastSNP" uses a faster
implementation based on the FastSNP algorithm. The "fastSNP" method
is much faster and should be used in most cases.
reactions : list of str, optional
The list of reaction IDs to constrain. All cycles within these
reactions will be removed. If `None`, all reactions will be constrained.

References
----------
.. [1] Elimination of thermodynamically infeasible loops in steady-state
metabolic models. Schellenberger J, Lewis NE, Palsson BO. Biophys J.
2011 Feb 2;100(3):544-53. doi: 10.1016/j.bpj.2010.12.3707. Erratum
in: Biophys J. 2011 Mar 2;100(5):1381.

.. [2] Fast-SNP: a fast matrix pre-processing algorithm for efficient
loopless flux optimization of metabolic models. Saa PA, Nielsen LK.
Bioinformatics. 2016 Dec;32(24):3807–3814. doi: 10.1093/bioinformatics/btw555.
"""
if method not in ["original", "fastSNP"]:
raise ValueError(f"unsupported method: {method}")

zero_cutoff = normalize_cutoff(model, zero_cutoff)

internal = [i for i, r in enumerate(model.reactions) if not r.boundary]
s_int = create_stoichiometric_matrix(model)[:, np.array(internal)]
n_int = nullspace(s_int).T
if reactions is None and method == "fastSNP":
reactions = find_cyclic_reactions(model, zero_cutoff=zero_cutoff)[0]

reactions_to_constrain = [
i for i, r in enumerate(model.reactions) if not r.boundary
]
if reactions is not None:
reactions_set = set(reactions)
reactions_to_constrain = [
i
for i, r in enumerate(model.reactions)
if not r.boundary and r.id in reactions_set
]

s_int = create_stoichiometric_matrix(model)[:, np.array(reactions_to_constrain)]

if method == "original":
n_int = nullspace(s_int).T
elif method == "fastSNP":
bounds_int = np.array(
[model.reactions[i].bounds for i in reactions_to_constrain]
)
directions_int = np.sign(bounds_int)
v_bound = np.max(np.abs(bounds_int))
n_int = nullspace_fast_snp(
model.problem,
s_int,
directions_int,
v_bound=v_bound,
zero_cutoff=zero_cutoff,
).T
else:
raise ValueError(f"unsupported method: {method}")

max_bound = max(max(abs(b) for b in r.bounds) for r in model.reactions)
prob = model.problem

# Add indicator variables and new constraints
to_add = []
for i in internal:
rxn = model.reactions[i]
for i, ridx in enumerate(reactions_to_constrain):
if not (np.abs(n_int[:, i]) > zero_cutoff).any():
continue

rxn = model.reactions[ridx]
# indicator variable a_i
indicator = prob.Variable(f"indicator_{rxn.id}", type="binary")
# -M*(1 - a_i) <= v_i <= M*a_i
Expand Down Expand Up @@ -84,7 +140,7 @@ def add_loopless(model: "Model", zero_cutoff: Optional[float] = None) -> None:
model.add_cons_vars([nullspace_constraint])
coefs = {
model.variables[f"delta_g_{model.reactions[ridx].id}"]: row[i]
for i, ridx in enumerate(internal)
for i, ridx in enumerate(reactions_to_constrain)
if abs(row[i]) > zero_cutoff
}
model.constraints[name].set_linear_coefficients(coefs)
Expand Down
Loading