From 4a362081a8346d4870322bbf68ab39b04bb10b16 Mon Sep 17 00:00:00 2001 From: Daniel Milstein Date: Fri, 11 May 2018 20:17:23 -0400 Subject: [PATCH 1/3] In influence map, use English labels instead of PySB rule labels --- bioagents/mra/mra.py | 67 +++++++++++++++++++++++++++++----- bioagents/tests/ekb_cache.json | 2 + 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/bioagents/mra/mra.py b/bioagents/mra/mra.py index d7c88bad..b6a6793a 100644 --- a/bioagents/mra/mra.py +++ b/bioagents/mra/mra.py @@ -20,6 +20,7 @@ from pysb.tools import render_reactions from pysb.export import export from indra.util.kappa_util import im_json_to_graph, cm_json_to_graph +from indra.assemblers.english_assembler import EnglishAssembler logger = logging.getLogger('MRA') @@ -64,7 +65,8 @@ def build_model_from_ekb(self, model_ekb): res['ambiguities'] = ambiguities model_exec = self.assemble_pysb(stmts) res['model_exec'] = model_exec - res['diagrams'] = make_diagrams(model_exec, model_id) + res['diagrams'] = make_diagrams(model_exec, model_id, + self.models[model_id]) return res def build_model_from_json(self, model_json): @@ -77,7 +79,8 @@ def build_model_from_json(self, model_json): return res model_exec = self.assemble_pysb(stmts) res['model_exec'] = model_exec - res['diagrams'] = make_diagrams(model_exec, model_id) + res['diagrams'] = make_diagrams(model_exec, model_id, + self.models[model_id]) return res def expand_model_from_ekb(self, model_ekb, model_id): @@ -99,7 +102,8 @@ def expand_model_from_ekb(self, model_ekb, model_id): res['model_new'] = new_stmts model_exec = self.assemble_pysb(model_stmts) res['model_exec'] = model_exec - res['diagrams'] = make_diagrams(model_exec, new_model_id) + res['diagrams'] = make_diagrams(model_exec, new_model_id, + self.models[model_id]) return res def expand_model_from_json(self, model_json, model_id): @@ -116,7 +120,8 @@ def expand_model_from_json(self, model_json, model_id): res['model_new'] = new_stmts model_exec = self.assemble_pysb(model_stmts) res['model_exec'] = model_exec - res['diagrams'] = make_diagrams(model_exec, new_model_id) + res['diagrams'] = make_diagrams(model_exec, new_model_id, + self.models[model_id]) return res def has_mechanism(self, mech_ekb, model_id): @@ -159,7 +164,8 @@ def remove_mechanism(self, mech_ekb, model_id): res['model_exec'] = model_exec if removed_stmts: res['removed'] = removed_stmts - res['diagrams'] = make_diagrams(model_exec, model_id) + res['diagrams'] = make_diagrams(model_exec, model_id, + self.models[model_id]) self.new_model(new_stmts) return res @@ -181,7 +187,8 @@ def model_undo(self): if not stmts: return res res['ambiguities'] = [] - res['diagrams'] = make_diagrams(model_exec, new_model_id) + res['diagrams'] = make_diagrams(model_exec, new_model_id, + self.models[new_model_id]) return res def get_upstream(self, target, model_id): @@ -249,11 +256,11 @@ def get_ambiguities(tp): return all_ambiguities -def make_diagrams(pysb_model, model_id): +def make_diagrams(pysb_model, model_id, indra_model): sbgn = make_sbgn(pysb_model, model_id) rxn = draw_reaction_network(pysb_model, model_id) cm = draw_contact_map(pysb_model, model_id) - im = draw_influence_map(pysb_model, model_id) + im = draw_influence_map(pysb_model, model_id, indra_model) diagrams = {'reactionnetwork': rxn, 'contactmap': cm, 'influencemap': im, 'sbgn': sbgn} return diagrams @@ -272,10 +279,12 @@ def make_sbgn(pysb_model, model_id): return sbgn_str -def draw_influence_map(pysb_model, model_id): +def draw_influence_map(pysb_model, model_id, indra_model): """Generate a Kappa influence map, draw it and save it as a PNG.""" try: im = make_influence_map(pysb_model) + im = make_influence_map_labels_natural_language(im, pysb_model, + indra_model) fname = 'model%d_im' % model_id abs_path = os.path.abspath(os.getcwd()) full_path = os.path.join(abs_path, fname + '.png') @@ -304,6 +313,46 @@ def make_influence_map(pysb_model): return im +def get_statement_by_uuid(statements, uuid): + """Returns the first statement in statements with the given uuid, or None + if no such statements are in the provided list.""" + for statement in statements: + if statement.uuid == uuid: + return statement + return None + + +def make_unique_label(labels_so_far, new_label): + """Make new_label a unique label by appeneding spaces.""" + while new_label in labels_so_far: + new_label += ' ' + return new_label + + +def make_influence_map_labels_natural_language(im, pysb_model, indra_model): + """Replaces the labels of the influence map with natural language labels. + """ + # Get the name of all the rules in the pysb model + rule_names = [rule.name for rule in pysb_model.rules] + + relabel_map = {} + for annotation in pysb_model.annotations: + if annotation.subject in rule_names: + name = annotation.subject + if annotation.predicate == 'from_indra_statement': + # We found the INDRA statement uuid corresponding to a rule + statement_uuid = annotation.object + s = get_statement_by_uuid(indra_model, statement_uuid) + + # Convert the statement into English + assembler = EnglishAssembler([s]) + text = assembler.make_model() + text = make_unique_label(relabel_map.values(), text) + relabel_map[name] = text + im = networkx.relabel_nodes(im, relabel_map) + return im + + def draw_contact_map(pysb_model, model_id): try: cm = make_contact_map(pysb_model) diff --git a/bioagents/tests/ekb_cache.json b/bioagents/tests/ekb_cache.json index fa775166..a5bf592c 100644 --- a/bioagents/tests/ekb_cache.json +++ b/bioagents/tests/ekb_cache.json @@ -23,6 +23,7 @@ "KRAS": "KRAS\nKRASONT::GENE-PROTEINKRASONT::GENEONT::GENEONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINKRAS", "KRAS activates BRAF": "KRAS activates BRAF\nKRAS activates BRAFONT::ACTIVATEONT::TRUEONT::ACTIVATEKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENE-PROTEINBRAFKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENEONT::GENEONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINKRASONT::GENE-PROTEINBRAFONT::GENEONT::GENEONT::PROTEINBRAF", "KRAS activates BRAF.": "KRAS activates BRAF.\nKRAS activates BRAF.ONT::ACTIVATEONT::TRUEONT::ACTIVATEKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENE-PROTEINBRAFKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENEONT::GENEONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINKRASONT::GENE-PROTEINBRAFONT::GENEONT::GENEONT::PROTEINBRAF", + "KRAS activates BRAF. Active BRAF binds MEK.": "KRAS activates BRAF. Active BRAF binds MEK.KRAS activates BRAF.Active BRAF binds MEK.ONT::ACTIVATEONT::TRUEONT::ACTIVATEKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENE-PROTEINBRAFKRAS activates BRAFONT::GENE-PROTEINKRASONT::GENEONT::GENEONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINKRASONT::GENE-PROTEINBRAFONT::GENEONT::GENEONT::PROTEINBRAFONT::BINDONT::TRUEONT::BINDActive BRAF binds MEKONT::GENE-PROTEINActive BRAFONT::PROTEIN-FAMILYMEKActive BRAF binds MEKONT::GENE-PROTEINBRAFTRUEONT::GENEONT::GENEONT::PROTEINActive BRAFONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEK", "KRAS bound to GTP phosphorylates BRAF on T373.": "KRAS bound to GTP phosphorylates BRAF on T373.\nKRAS bound to GTP phosphorylates BRAF on T373.ONT::PHOSPHORYLATIONONT::TRUEONT::PHOSPHORYLATIONKRAS bound to GTP phosphorylates BRAF on T373ONT::GENE-PROTEINKRAS bound to GTPONT::GENE-PROTEINBRAFONT::MOLECULAR-SITET373ONT::POST-TRANSLATIONAL-MODIFICATIONKRAS bound to GTP phosphorylates BRAF on T373ONT::GENE-PROTEINKRASONT::GENEONT::GENEONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINONT::PROTEINKRAS bound to GTPONT::GENE-PROTEINBRAFONT::GENEONT::GENEONT::PROTEINBRAFONT::BINDONT::TRUEONT::BINDKRAS bound to GTPONT::GENE-PROTEINKRAS bound to GTPONT::CHEMICALto GTPKRAS bound to GTPONT::CHEMICALGTPONT::CHEMICALONT::MOLECULEONT::CHEMICALONT::MOLECULEto GTPONT::MOLECULAR-SITET-373ThreonineT373T373", "MAP2K1": "MAP2K1\nMAP2K1ONT::GENE-PROTEINMAP-2-K-1ONT::GENEONT::GENEONT::PROTEINMAP2K1", "MAP2K1 binds MAPK1": "MAP2K1 binds MAPK1\nMAP2K1 binds MAPK1ONT::BINDONT::TRUEONT::BINDMAP2K1 binds MAPK1ONT::GENE-PROTEINMAP2K1ONT::GENE-PROTEINMAPK1MAP2K1 binds MAPK1ONT::GENE-PROTEINMAP-2-K-1ONT::GENEONT::GENEONT::PROTEINMAP2K1ONT::GENE-PROTEINMAPK-1ONT::GENEONT::GENEONT::PROTEINMAPK1", @@ -33,6 +34,7 @@ "MAPK1-bound MAP2K1": "MAPK1-bound MAP2K1\nMAPK1-bound MAP2K1ONT::GENE-PROTEINMAP-2-K-1ONT::GENEONT::GENEONT::PROTEINMAPK1-bound MAP2K1ONT::BINDONT::TRUEONT::BINDMAPK1-bound MAP2K1ONT::GENE-PROTEINMAPK1-boundONT::GENE-PROTEINMAPK1-bound MAP2K1MAPK1-bound MAP2K1ONT::GENE-PROTEINMAPK-1ONT::GENEONT::GENEONT::PROTEINMAPK1-bound", "MEK": "MEK\nMEKONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEK", "MEK binds ERK": "MEK binds ERK\nMEK binds ERKONT::BINDONT::TRUEONT::BINDMEK binds ERKONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINERKMEK binds ERKONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINERKONT::PROTEIN-FAMILYONT::PROTEINONT::PROTEINONT::GENEONT::GENEONT::GENEONT::GENEONT::GENEERK", + "MEK binds MAPK1. MEK binds MAPK3.": "MEK binds MAPK1. MEK binds MAPK3.MEK binds MAPK1.MEK binds MAPK3.ONT::BINDONT::TRUEONT::BINDMEK binds MAPK1ONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINMAPK1MEK binds MAPK1ONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINMAPK-1ONT::GENEONT::GENEONT::PROTEINMAPK1ONT::BINDONT::TRUEONT::BINDMEK binds MAPK3ONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINMAPK3MEK binds MAPK3ONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINMAPK-3ONT::GENEONT::GENEONT::PROTEINMAPK3", "MEK bound to ERK": "MEK bound to ERK\nMEK bound to ERKONT::BINDONT::TRUEONT::BINDMEK bound to ERKONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINto ERKMEK bound to ERKONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINERKONT::PROTEIN-FAMILYONT::PROTEINONT::PROTEINONT::GENEONT::GENEONT::GENEONT::GENEONT::GENEto ERK", "MEK not bound to Selumetinib phosphorylates ERK. DUSP dephosphorylates ERK. Selumetinib binds MEK.": "MEK not bound to Selumetinib phosphorylates ERK. DUSP dephosphorylates ERK. Selumetinib binds MEK.\nMEK not bound to Selumetinib phosphorylates ERK.DUSP dephosphorylates ERK.Selumetinib binds MEK.ONT::PHOSPHORYLATIONONT::TRUEONT::PHOSPHORYLATIONMEK not bound to Selumetinib phosphorylates ERKONT::PROTEIN-FAMILYMEK not bound to SelumetinibONT::GENE-PROTEINERKONT::POST-TRANSLATIONAL-MODIFICATIONMEK not bound to Selumetinib phosphorylates ERKONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEK not bound to SelumetinibONT::GENE-PROTEINERKONT::PROTEIN-FAMILYONT::PROTEINONT::PROTEINONT::GENEONT::GENEONT::GENEONT::GENEONT::GENEERKONT::BIND+ONT::FALSEONT::BINDMEK not bound to SelumetinibONT::PROTEIN-FAMILYMEK not bound to SelumetinibONT::PHARMACOLOGIC-SUBSTANCEto SelumetinibMEK not bound to SelumetinibONT::PHARMACOLOGIC-SUBSTANCESELUMETINIBONT::PHARMACOLOGIC-SUBSTANCEONT::PHARMACOLOGIC-SUBSTANCEONT::PHARMACOLOGIC-SUBSTANCEto SelumetinibONT::PHOSPHORYLATIONONT::TRUEONT::MANNER-UNDODE-ONT::PHOSPHORYLATIONDUSP dephosphorylates ERKONT::GENE-PROTEINDUSPONT::GENE-PROTEINERKDUSP dephosphorylates ERKONT::GENE-PROTEINDUSPONT::PROTEIN-FAMILYONT::GENEDUSPONT::GENE-PROTEINERKONT::PROTEIN-FAMILYONT::PROTEINONT::PROTEINONT::GENEONT::GENEONT::GENEONT::GENEONT::GENEERKONT::BINDONT::TRUEONT::BINDSelumetinib binds MEKONT::PHARMACOLOGIC-SUBSTANCESelumetinibONT::PROTEIN-FAMILYMEKSelumetinib binds MEKONT::PHARMACOLOGIC-SUBSTANCESELUMETINIBONT::PHARMACOLOGIC-SUBSTANCEONT::CHEMICALONT::MOLECULESelumetinibONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEK", "MEK phosphorylates ERK": "MEK phosphorylates ERK\nMEK phosphorylates ERKONT::PHOSPHORYLATIONONT::TRUEONT::PHOSPHORYLATIONMEK phosphorylates ERKONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINERKONT::POST-TRANSLATIONAL-MODIFICATIONMEK phosphorylates ERKONT::PROTEIN-FAMILYMEKONT::PROTEIN-FAMILYONT::PROTEIN-FAMILYMEKONT::GENE-PROTEINERKONT::PROTEIN-FAMILYONT::PROTEINONT::PROTEINONT::GENEONT::GENEONT::GENEONT::GENEONT::GENEERK", From 6437d6ccf7ac4748463effd70c43fab7b5d54473 Mon Sep 17 00:00:00 2001 From: Daniel Milstein Date: Fri, 11 May 2018 20:21:48 -0400 Subject: [PATCH 2/3] Add the PySB label underneath the English label because a single statement can corresond to multiple PySB rules --- bioagents/mra/mra.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioagents/mra/mra.py b/bioagents/mra/mra.py index b6a6793a..69f5218d 100644 --- a/bioagents/mra/mra.py +++ b/bioagents/mra/mra.py @@ -348,6 +348,7 @@ def make_influence_map_labels_natural_language(im, pysb_model, indra_model): assembler = EnglishAssembler([s]) text = assembler.make_model() text = make_unique_label(relabel_map.values(), text) + text += '\n' + name relabel_map[name] = text im = networkx.relabel_nodes(im, relabel_map) return im From ac0165098fde2c2ec7aa2587795b63d70101d6b4 Mon Sep 17 00:00:00 2001 From: Daniel Milstein Date: Fri, 11 May 2018 20:35:30 -0400 Subject: [PATCH 3/3] Only include the PySB rule in the influence map labels if needed to disambiguate --- bioagents/mra/mra.py | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/bioagents/mra/mra.py b/bioagents/mra/mra.py index 69f5218d..741b2da1 100644 --- a/bioagents/mra/mra.py +++ b/bioagents/mra/mra.py @@ -322,12 +322,14 @@ def get_statement_by_uuid(statements, uuid): return None -def make_unique_label(labels_so_far, new_label): - """Make new_label a unique label by appeneding spaces.""" - while new_label in labels_so_far: - new_label += ' ' - return new_label - +def dict_value_appears_multiple_times(d, value): + """Returns True iff the given value is mapped to multiple times in this + dictionary.""" + count = 0 + for k, v in d.items(): + if v == value: + count += 1 + return count > 1 def make_influence_map_labels_natural_language(im, pysb_model, indra_model): """Replaces the labels of the influence map with natural language labels. @@ -335,7 +337,8 @@ def make_influence_map_labels_natural_language(im, pysb_model, indra_model): # Get the name of all the rules in the pysb model rule_names = [rule.name for rule in pysb_model.rules] - relabel_map = {} + relabel_map_english = {} + relabel_map_english_and_rule = {} for annotation in pysb_model.annotations: if annotation.subject in rule_names: name = annotation.subject @@ -346,10 +349,24 @@ def make_influence_map_labels_natural_language(im, pysb_model, indra_model): # Convert the statement into English assembler = EnglishAssembler([s]) - text = assembler.make_model() - text = make_unique_label(relabel_map.values(), text) - text += '\n' + name - relabel_map[name] = text + + english_text = assembler.make_model() + english_and_rule = english_text + ' (' + name + ')' + + relabel_map_english[name] = english_text + relabel_map_english_and_rule[name] = english_and_rule + + # Use only the English text if that is unambiguous, otherwise include + # the rule name too + relabel_map = {} + for before in relabel_map_english: + english_text = relabel_map_english[before] + if dict_value_appears_multiple_times(relabel_map_english, + english_text): + relabel_map[before] = relabel_map_english_and_rule[before] + else: + relabel_map[before] = english_text + im = networkx.relabel_nodes(im, relabel_map) return im