Skip to content

Commit

Permalink
Fixing bugs in gapfilling
Browse files Browse the repository at this point in the history
  • Loading branch information
Christopher Henry committed Feb 22, 2025
1 parent ee1872c commit a888d36
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 22 deletions.
9 changes: 6 additions & 3 deletions modelseedpy/core/annotationontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"tmscore",
"rmsd",
"hmmscore",
"score"
]

def convert_to_search_role(role):
Expand All @@ -60,9 +61,9 @@ def __init__(self, parent, event, term, probability=1, scores={}, ref_entity=Non
self.ref_entity = ref_entity
self.entity_type = entity_type
self.scores = scores
for item in self.scores:
if item not in allowable_score_types:
logger.warning(item + " not an allowable score type!")
#for item in self.scores:
#if item not in allowable_score_types:
#logger.warning(item + " not an allowable score type!")

def to_data(self):
output = {
Expand Down Expand Up @@ -485,6 +486,8 @@ def get_events_from_priority_list(self,priority_list):
elif item == "Merge":
if len(event.method) > 5 and event.method[0:5] == "Merge" and event.id not in event_list:
selected_merge = event.id
elif item in event.description or item in event.id:
event_list.append(event.id)
if selected_merge:
event_list.append(selected_merge)
return event_list
27 changes: 19 additions & 8 deletions modelseedpy/core/msgapfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __init__(
}
)

def test_gapfill_database(self, media, target=None, before_filtering=True):
def test_gapfill_database(self, media, target=None, before_filtering=True,active_reactions=[]):
# Testing if gapfilling can work before filtering
if target:
self.gfpkgmgr.getpkg("GapfillingPkg").set_base_objective(target,None)
Expand All @@ -126,7 +126,7 @@ def test_gapfill_database(self, media, target=None, before_filtering=True):
target = target[13:]
#Setting media
self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media)
if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database():
if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(active_reactions):
return True
if self.gfpkgmgr.getpkg("GapfillingPkg").test_solution.status == 'infeasible':
return False
Expand Down Expand Up @@ -162,14 +162,17 @@ def test_and_adjust_gapfilling_conditions(self,medias,targets,thresholds,prefilt
"medias":[],
"targets":[],
"thresholds":[],
"conditions":[]
"conditions":[],
"active_reactions":[]
}
logger.debug("Testing unfiltered database")
for i,media in enumerate(medias):
if self.test_gapfill_database(media,targets[i],before_filtering=True):
active_reactions = []
if self.test_gapfill_database(media,targets[i],before_filtering=True,active_reactions=active_reactions):
output["medias"].append(media)
output["targets"].append(targets[i])
output["thresholds"].append(thresholds[i])
output["active_reactions"].append(active_reactions)
output["conditions"].append({
"media": media,
"is_max_threshold": False,
Expand All @@ -179,25 +182,29 @@ def test_and_adjust_gapfilling_conditions(self,medias,targets,thresholds,prefilt
# Filtering
if prefilter:
logger.debug("Filtering database")
self.prefilter(growth_conditions=output["conditions"])
self.prefilter(growth_conditions=output["conditions"],active_reaction_sets=output["active_reactions"])
medias = []
targets = []
thresholds = []
conditions = []
active_reaction_sets = []
logger.debug("Testing filtered database")
for i,media in enumerate(output["medias"]):
if self.test_gapfill_database(media,output["targets"][i],before_filtering=False):
active_reactions = []
if self.test_gapfill_database(media,output["targets"][i],before_filtering=False,active_reactions=active_reactions):
medias.append(media)
targets.append(output["targets"][i])
thresholds.append(output["thresholds"][i])
conditions.append(output["conditions"][i])
active_reaction_sets.append(active_reactions)
output["medias"] = medias
output["targets"] = targets
output["thresholds"] = thresholds
output["conditions"] = conditions
output["active_reactions"] = active_reaction_sets
return output

def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=False,base_filter_only=False):
def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=False,base_filter_only=False,active_reaction_sets=[]):
"""Prefilters the database by removing any reactions that break specified ATP tests
Parameters
----------
Expand All @@ -215,7 +222,8 @@ def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering
self.test_conditions,
growth_conditions=growth_conditions,
base_filter=base_filter,
base_filter_only=base_filter_only
base_filter_only=base_filter_only,
active_reaction_sets=active_reaction_sets
)
gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes(
"gf_filter", {}
Expand Down Expand Up @@ -355,6 +363,7 @@ def run_global_gapfilling(
self.gfpkgmgr.getpkg("GapfillingPkg").set_media(media)
#Copying model and either making it the base model or adding to the model list
model_cpy = self.gfmodel.copy()

if i == 0:
merged_model = model_cpy
else:
Expand Down Expand Up @@ -400,7 +409,9 @@ def run_global_gapfilling(
self.mdlutl.printlp(model=merged_model,filename="GlobalGapfill",print=True)

# Running gapfilling and checking solution
print("Runninng global optimization")
sol = merged_model.optimize()
print("Global optimization complete")
logger.info(
f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}"
)
Expand Down
33 changes: 29 additions & 4 deletions modelseedpy/core/msgrowthphenotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,15 @@ def gapfill_model_for_phenotype(

return gfresults


class MSGrowthPhenotypes:
def __init__(
self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={}
self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={}, id=None, name=None, source=None, source_id=None, type=None
):
self.id = id
self.name = name
self.source = source
self.source_id = source_id
self.type = type
self.base_media = base_media
self.phenotypes = DictList()
self.base_uptake = base_uptake
Expand All @@ -304,7 +308,7 @@ def from_compound_hash(
type="growth"
):
growthpheno = MSGrowthPhenotypes(
base_media, base_uptake, base_excretion, global_atom_limits
base_media=base_media, base_uptake=base_uptake, base_excretion=base_excretion, global_atom_limits=global_atom_limits, id=None, name=None, source=None, source_id=None, type=None
)
new_phenos = []
for cpd in compounds:
Expand All @@ -323,7 +327,7 @@ def from_kbase_object(
global_atom_limits={},
):
growthpheno = MSGrowthPhenotypes(
base_media, base_uptake, base_excretion, global_atom_limits
base_media=base_media, base_uptake=base_uptake, base_excretion=base_excretion, global_atom_limits=global_atom_limits, id=data["id"], name=data["name"], source=data["source"], source_id=data["source_id"], type=data["type"]
)
new_phenos = []
for pheno in data["phenotypes"]:
Expand Down Expand Up @@ -414,6 +418,27 @@ def from_ms_file(
growthpheno.add_phenotypes(new_phenos)
return growthpheno

def to_kbase_json(self,genome_ref):
pheno_data = {
"id": self.id,
"name": self.name,
"source": self.source,
"source_id": self.source_id,
"type": self.type,
"phenotypes": [],
"genome_ref": genome_ref
}
for pheno in self.phenotypes:
pheno_data["phenotypes"].append({
"id": pheno.id,
"name": pheno.name,
"media_ref": pheno.media.info.ref,
"normalizedGrowth": pheno.experimental_value,
"geneko_refs": pheno.gene_ko,
"additionalcompound_refs": pheno.additional_compounds
})
return pheno_data

def build_super_media(self):
super_media = None
for pheno in self.phenotypes:
Expand Down
24 changes: 21 additions & 3 deletions modelseedpy/core/msmodelutl.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,13 +852,21 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]):
#################################################################################
# Functions related to utility functions
#################################################################################
def assign_reliability_scores_to_reactions(self):
def assign_reliability_scores_to_reactions(self,active_reaction_sets=[]):
"""Assigns a reliability score to every model reaction which indicates how likely the reaction is to be accurate and to take place
Returns
-------
{ reaction ID<string> : { reaction direction<string> : score<float> } }
"""
active_rxn_dictionary={}
for item in active_reaction_sets:
for array in item:
if array[0] not in active_rxn_dictionary:
active_rxn_dictionary[array[0]] = {}
if array[1] not in active_rxn_dictionary[array[0]]:
active_rxn_dictionary[array[0]][array[1]] = 0
active_rxn_dictionary[array[0]][array[1]]+=1
if self.reliability_scores == None:
self.reliability_scores = {}
biochem = ModelSEEDBiochem.get()
Expand Down Expand Up @@ -927,6 +935,15 @@ def assign_reliability_scores_to_reactions(self):
self.reliability_scores[reaction.id] = {}
self.reliability_scores[reaction.id][">"] = 1000
self.reliability_scores[reaction.id]["<"] = 1000
for_multiplier = 1
rev_multiplier = 1
if reaction.id in active_rxn_dictionary:
if ">" in active_rxn_dictionary[reaction.id]:
for_multiplier += 0.1*active_rxn_dictionary[reaction.id][">"]
if "<" in active_rxn_dictionary[reaction.id]:
rev_multiplier += 0.1*active_rxn_dictionary[reaction.id]["<"]
self.reliability_scores[reaction.id][">"] = self.reliability_scores[reaction.id][">"]*for_multiplier
self.reliability_scores[reaction.id]["<"] = self.reliability_scores[reaction.id]["<"]*rev_multiplier
return self.reliability_scores

def is_core(self,rxn):
Expand Down Expand Up @@ -1636,7 +1653,8 @@ def reaction_expansion_test(
binary_search=True,
attribute_label="gf_filter",
positive_growth=[],
resort_by_score=True
resort_by_score=True,
active_reaction_sets=[]
):
"""Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail
Expand All @@ -1659,7 +1677,7 @@ def reaction_expansion_test(
self.breaking_reaction = None
filtered_list = []
if resort_by_score:
scores = self.assign_reliability_scores_to_reactions()
scores = self.assign_reliability_scores_to_reactions(active_reaction_sets=active_reaction_sets)
reaction_list = sorted(reaction_list, key=lambda x: scores[x[0].id][x[1]])
for item in reaction_list:
logger.debug(item[0].id+":"+item[1]+":"+str(scores[item[0].id][item[1]]))
Expand Down
20 changes: 16 additions & 4 deletions modelseedpy/fbapkg/gapfillingpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
from optlang.symbolics import Zero, add
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import pfba
from cobra.io import (
load_json_model,
save_json_model,
Expand Down Expand Up @@ -717,7 +718,7 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10):
return None
return solution

def test_gapfill_database(self):
def test_gapfill_database(self,active_reactions=[]):
self.reset_objective_minimum(0,False)
self.model.objective = self.original_objective
self.test_solution = self.model.optimize()
Expand All @@ -731,6 +732,11 @@ def test_gapfill_database(self):
self.model.objective = self.parameters["gfobj"]
if self.test_solution.objective_value < self.parameters["minimum_obj"] or self.test_solution.status == 'infeasible':
return False
#Running pFBA to determine active reactions for nonzero objective
solution = pfba(self.model)
for rxn in self.model.reactions:
if solution.fluxes[rxn.id] > 0:
active_reactions.append([rxn.id,">"])
return True

def reset_objective_minimum(self, min_objective,reset_params=True):
Expand All @@ -749,7 +755,7 @@ def reset_objective_minimum(self, min_objective,reset_params=True):
if min_objective < 0:
self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].ub = min_objective

def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0",base_filter_only=False,all_noncore=True):
def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0",base_filter_only=False,all_noncore=True,active_reaction_sets=[]):
#Saving the current media
current_media = self.current_media()
#Clearing element uptake constraints
Expand All @@ -776,20 +782,26 @@ def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],bas
if not base_filter_only:
with self.model:
rxnlist = []
rxndict = {}
for reaction in self.model.reactions:
if reaction.id in self.gapfilling_penalties:
rxndict[reaction.id] = 1
if "reverse" in self.gapfilling_penalties[reaction.id]:
rxnlist.append([reaction, "<"])
if "forward" in self.gapfilling_penalties[reaction.id]:
rxnlist.append([reaction, ">"])
elif all_noncore and not self.modelutl.is_core(reaction):
rxndict[reaction.id] = 1
if reaction.lower_bound < 0:
rxnlist.append([reaction, "<"])
if reaction.upper_bound > 0:
rxnlist.append([reaction, ">"])
print("Full model:",len(self.modelutl.model.reactions))
print("Gapfilling count:",len(self.gapfilling_penalties))
print("Reaction list:",len(rxndict))
filtered_list = self.modelutl.reaction_expansion_test(
rxnlist, test_conditions
)
rxnlist, test_conditions,active_reaction_sets=active_reaction_sets
)#,positive_growth=growth_conditions
#Adding base filter reactions to model
if base_filter != None:
gf_filter_att = self.modelutl.get_attributes("gf_filter", {})
Expand Down

0 comments on commit a888d36

Please sign in to comment.