Skip to content
10 changes: 7 additions & 3 deletions indra/preassembler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _ev_keys(sts):
if 'prior_uuids' not in ev.annotations:
ev.annotations['prior_uuids'] = []
ev.annotations['prior_uuids'].append(stmt.uuid)
new_stmt.evidence.append(ev)
new_stmt.add_evidence(ev)
ev_keys.add(ev_key)
end_ev_keys = _ev_keys([new_stmt])
if len(end_ev_keys) != len(start_ev_keys):
Expand Down Expand Up @@ -990,10 +990,14 @@ def flatten_evidence(stmts, collect_from=None):
def _flatten_evidence_for_stmt(stmt, collect_from):
supp_stmts = (stmt.supports if collect_from == 'supports'
else stmt.supported_by)
total_evidence = set(stmt.evidence)
evs = {ev.matches_key(): ev for ev in stmt.evidence}
total_evidence = set(evs.values())
for supp_stmt in supp_stmts:
child_evidence = _flatten_evidence_for_stmt(supp_stmt, collect_from)
total_evidence = total_evidence.union(child_evidence)
chevs = {ev.matches_key(): ev for ev in child_evidence}
for k, v in chevs.items():
evs[k] = v
total_evidence = list(evs.values())
return list(total_evidence)


Expand Down
7 changes: 6 additions & 1 deletion indra/preassembler/grounding_mapper/adeft.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ def run_adeft_disambiguation(stmt, agent, idx):
return False
# Initialize annotations if needed so Adeft predicted
# probabilities can be added to Agent annotations
annots = stmt.evidence[0].annotations

evs = stmt.evidence
# Note that the assumption here is that the statement only has a single
# piece of evidence (typically a raw statement)
annots = evs[0].annotations
agent_txt = agent.db_refs['TEXT']
if 'agents' in annots:
if 'adeft' not in annots['agents']:
Expand Down Expand Up @@ -89,6 +93,7 @@ def run_adeft_disambiguation(stmt, agent, idx):
standardize_refs=True)
annots['agents']['adeft'][idx] = disamb_scores
success = True
stmt.evidence = evs
return success


Expand Down
4 changes: 3 additions & 1 deletion indra/sources/sparser/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,10 @@ def set_statements_pmid(self, pmid):
ev['pmid'] = pmid
# Replace PMID value in extracted Statements next
for stmt in self.statements:
for ev in stmt.evidence:
evs = stmt.evidence
for ev in evs:
ev.pmid = pmid
stmt.evidence = evs


def _fix_agent(agent):
Expand Down
4 changes: 3 additions & 1 deletion indra/sources/trips/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,10 @@ def get_cause_events(mod_event_types):

for stmt in stmts_to_make:
stmt.enz = enz
for ev in stmt.evidence:
evs = stmt.evidence
for ev in evs:
ev.epistemics['direct'] = False
stmt.evidence = evs
self.statements.append(stmt)

self._add_extracted(event_type, event.attrib['id'])
Expand Down
17 changes: 8 additions & 9 deletions indra/statements/evidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ class Evidence(object):
and is set by said Statement. It is useful for tracing ownership of
an Evidence object.
"""
__slots__ = ['source_api', 'source_id', 'pmid', 'text',
'text', 'annotations', 'epistemics', 'context', 'text_refs',
'source_hash', 'stmt_tag']

def __init__(self, source_api=None, source_id=None, pmid=None, text=None,
annotations=None, epistemics=None, context=None,
text_refs=None):
Expand All @@ -80,15 +84,10 @@ def __init__(self, source_api=None, source_id=None, pmid=None, text=None,
self.stmt_tag = None

def __setstate__(self, state):
if 'context' not in state:
state['context'] = None
if 'text_refs' not in state:
state['text_refs'] = {}
if 'stmt_tag' not in state:
state['stmt_tag'] = None
if 'source_hash' not in state:
state['source_hash'] = None
self.__dict__ = state
# With a slots-based object, state is a tuple, otherwise it's a dict
state = state[1] if isinstance(state, tuple) else state
for slot in self.__slots__:
setattr(self, slot, state.get(slot, None))

def get_source_hash(self, refresh=False):
"""Get a hash based off of the source of this statement.
Expand Down
50 changes: 39 additions & 11 deletions indra/statements/statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@
import abc
import sys
import uuid
import json
import gzip
import logging
import networkx
import itertools
Expand Down Expand Up @@ -253,16 +255,7 @@ class Statement(object):
_agent_order = NotImplemented

def __init__(self, evidence=None, supports=None, supported_by=None):
if evidence is None:
self.evidence = []
elif isinstance(evidence, Evidence):
self.evidence = [evidence]
elif isinstance(evidence, list):
self.evidence = evidence
else:
raise ValueError('evidence must be an Evidence object, a list '
'(of Evidence objects), or None.')

self.evidence = evidence
# Initialize supports/supported_by fields, which should be lists
self.supports = supports if supports else []
self.supported_by = supported_by if supported_by else []
Expand All @@ -272,6 +265,41 @@ def __init__(self, evidence=None, supports=None, supported_by=None):
self._shallow_hash = None
return

@property
def evidence(self):
# Decompress, decode, and then deserialize each Evidence from JSON
evs = [Evidence._from_json(e) for e in
json.loads(gzip.decompress(self._evidence).decode('utf-8'))]
return evs

@evidence.setter
def evidence(self, evidence):
if evidence is None:
evs = []
elif isinstance(evidence, Evidence):
evs = [evidence]
elif isinstance(evidence, list):
evs = evidence
else:
raise ValueError('evidence must be an Evidence object, a list '
'(of Evidence objects), or None.')
self._evidence = \
gzip.compress(json.dumps([e.to_json()
for e in evs]).encode('utf-8'))

def add_evidence(self, ev):
"""Extend the Statement's evidence list with a new Evidence.

Parameters
----------
ev : indra.statements.Evidence
An Evidence object to be added to the Statement's list of
evidences.
"""
evs = self.evidence
evs.append(ev)
self.evidence = evs

def matches_key(self):
raise NotImplementedError("Method must be implemented in child class.")

Expand Down Expand Up @@ -564,7 +592,7 @@ def make_generic_copy(self, deeply=False):
kwargs = deepcopy(self.__dict__)
else:
kwargs = self.__dict__.copy()
for attr in ['evidence', 'belief', 'uuid', 'supports', 'supported_by',
for attr in ['_evidence', 'belief', 'uuid', 'supports', 'supported_by',
'is_activation']:
kwargs.pop(attr, None)
for attr in ['_full_hash', '_shallow_hash']:
Expand Down
4 changes: 3 additions & 1 deletion indra/tests/test_assemble_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,11 +588,13 @@ def test_merge_groundings():

def test_merge_deltas():
def add_annots(stmt):
for ev in stmt.evidence:
evs = stmt.evidence
for ev in evs:
ev.annotations['subj_adjectives'] = stmt.subj.delta.adjectives
ev.annotations['obj_adjectives'] = stmt.obj.delta.adjectives
ev.annotations['subj_polarity'] = stmt.subj.delta.polarity
ev.annotations['obj_polarity'] = stmt.obj.delta.polarity
stmt.evidence = evs
return stmt
# d1 = {'adjectives': ['a', 'b', 'c'], 'polarity': 1}
# d2 = {'adjectives': [], 'polarity': -1}
Expand Down
4 changes: 2 additions & 2 deletions indra/tests/test_medscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def test_evidence():
coords = s0.evidence[0].annotations['agents']['coords']
assert isinstance(coords, list), type(coords)
assert len(coords) == 2, len(coords)
assert coords[0] == (90, 97), coords[0]
assert coords[1] == (106, 120), coords[1]
assert tuple(coords[0]) == (90, 97), tuple(coords[0])
assert tuple(coords[1]) == (106, 120), tuple(coords[1])


def test_molsynthesis_positive():
Expand Down
20 changes: 12 additions & 8 deletions indra/tests/test_preassembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ def test_combine_duplicates():
# The statements come out sorted by their matches_key
assert len(pa.unique_stmts) == 4, len(pa.unique_stmts)
num_evs =[len(s.evidence) for s in pa.unique_stmts]
assert pa.unique_stmts[0].matches(p6) # MEK dephos ERK
assert num_evs[0] == 3, num_evs[0]
assert pa.unique_stmts[1].matches(p9) # SRC dephos KRAS
assert pa.unique_stmts[0].matches(p6) # MEK dephos ERK
assert num_evs[0] == 3, num_evs
assert pa.unique_stmts[1].matches(p9) # SRC dephos KRAS
assert num_evs[1] == 1, num_evs[1]
assert pa.unique_stmts[2].matches(p5) # MEK phos ERK
assert pa.unique_stmts[2].matches(p5) # MEK phos ERK
assert num_evs[2] == 1, num_evs[2]
assert pa.unique_stmts[3].matches(p1) # RAF phos MEK
assert pa.unique_stmts[3].matches(p1) # RAF phos MEK
assert num_evs[3] == 4, num_evs[3]


Expand Down Expand Up @@ -510,7 +510,9 @@ def test_flatten_evidence_hierarchy():
supporting_stmt = top_stmt.supported_by[0]
assert len(supporting_stmt.evidence) == 1
assert supporting_stmt.evidence[0].text == 'foo'
supporting_stmt.evidence[0].text = 'changed_foo'
evs = supporting_stmt.evidence
evs[0].text = 'changed_foo'
supporting_stmt.evidence = evs
assert supporting_stmt.evidence[0].text == 'changed_foo'
assert 'changed_foo' not in [e.text for e in top_stmt.evidence]
assert 'foo' in [e.text for e in top_stmt.evidence]
Expand Down Expand Up @@ -930,8 +932,10 @@ def test_agent_coordinates():
evidence_list = unique_stmt.evidence
agent_annots = [ev.annotations['agents'] for ev in unique_stmt.evidence]
assert all(a['raw_text'] == ['MEK1', 'ERK2'] for a in agent_annots)
assert {tuple(a['coords']) for a in agent_annots} == {((21, 25), (0, 4)),
((0, 4), (15, 19))}
expected_coords = {((21, 25), (0, 4)), ((0, 4), (15, 19))}
for annot in agent_annots:
coords = tuple(tuple(a) for a in annot['coords'])
assert coords in expected_coords


def test_association_duplicate():
Expand Down
12 changes: 6 additions & 6 deletions indra/tests/test_reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def test_get_agent_coordinates_phosphorylation():
stmt = rp.statements[0]
annotations = stmt.evidence[0].annotations

coords = [(0, 3), (42, 45)]
coords = [[0, 3], [42, 45]]
assert annotations['agents']['coords'] == coords


Expand All @@ -379,7 +379,7 @@ def test_get_agent_coordinates_activation():
rp = reach.process_text(test_case, offline=offline)
stmt = rp.statements[0]
annotations = stmt.evidence[0].annotations
coords = [(0, 4), (15, 19)]
coords = [[0, 4], [15, 19]]
assert annotations['agents']['coords'] == coords


Expand All @@ -389,7 +389,7 @@ def test_get_agent_coordinates_regulate_amount():
rp = reach.process_text(test_case, offline=offline)
stmt = rp.statements[0]
annotations = stmt.evidence[0].annotations
coords = [(0, 3), (35, 39)]
coords = [[0, 3], [35, 39]]
assert annotations['agents']['coords'] == coords


Expand All @@ -399,7 +399,7 @@ def test_get_agent_coordinates_binding():
rp = reach.process_text(test_case, offline=offline)
stmt = rp.statements[0]
annotations = stmt.evidence[0].annotations
coords = [(27, 31), (38, 42)]
coords = [[27, 31], [38, 42]]
assert annotations['agents']['coords'] == coords


Expand All @@ -412,7 +412,7 @@ def test_get_agent_coordinates_translocation():
stmt = [stmt for stmt in rp.statements if
isinstance(stmt, Translocation)][0]
annotations = stmt.evidence[0].annotations
coords = [(86, 89)]
coords = [[86, 89]]
assert annotations['agents']['coords'] == coords


Expand All @@ -426,5 +426,5 @@ def test_get_agent_coordinates_phosphorylation_missing_controller():
stmt = [stmt for stmt in rp.statements if
isinstance(stmt, Phosphorylation)][0]
annotations = stmt.evidence[0].annotations
coords = [None, (57, 60)]
coords = [None, [57, 60]]
assert annotations['agents']['coords'] == coords
1 change: 1 addition & 0 deletions indra/tests/test_statements_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ def test_evidence_context():
assert evj['pmid'] == '1'
assert evj['annotations'] == {'a': '2'}
assert ev.to_json() == Evidence._from_json(ev.to_json()).to_json()
assert ev.matches_key() == Evidence._from_json(ev.to_json()).matches_key()


def test_file_serialization():
Expand Down
4 changes: 2 additions & 2 deletions indra/tests/test_trips_ekbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def test_53():
assert mek.name == 'MEK'
assert erk.name == 'ERK'
for ev in st.evidence:
assert ev.epistemics.get('direct') is False
assert ev.epistemics.get('direct') is False, ev.epistemics


def test_54():
Expand All @@ -753,7 +753,7 @@ def test_54():
assert mek.name == 'EGF'
assert erk.name == 'ERK'
for ev in st.evidence:
assert ev.epistemics.get('direct') is False
assert ev.epistemics.get('direct') is False, ev.epistemics


def test_55():
Expand Down
2 changes: 2 additions & 0 deletions indra/tools/assemble_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,11 @@ def merge_deltas(stmts_in):
for info in ('polarity', 'adjectives'):
key = (role, info)
deltas[key] = []
evs = stmt.evidence
for ev in stmt.evidence:
entry = ev.annotations.get('%s_%s' % key)
deltas[key].append(entry if entry else None)
stmt.evidence = evs
# POLARITY
# For polarity we need to work in pairs
polarity_pairs = list(zip(deltas[('subj', 'polarity')],
Expand Down
6 changes: 6 additions & 0 deletions indra/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def timed_func(*args, **kwargs):


def unicode_strs(obj, attr_filter=None):
from indra.statements import Statement
if isinstance(obj, non_unicode):
return False
# Check for an iterable
Expand All @@ -46,6 +47,8 @@ def unicode_strs(obj, attr_filter=None):
return False
if hasattr(obj, '__dict__'):
for item_name, item in obj.__dict__.items():
if isinstance(obj, Statement) and item_name == '_evidence':
continue
if attr_filter and item_name in attr_filter:
continue
has_unicode_strs = unicode_strs(item)
Expand All @@ -61,12 +64,15 @@ def unicode_strs(obj, attr_filter=None):


def decode_obj(obj, encoding='utf-8'):
from indra.statements import Statement
if isinstance(obj, non_unicode):
return obj.decode(encoding)
elif isinstance(obj, list) or isinstance(obj, tuple):
return [decode_obj(item) for item in obj]
elif hasattr(obj, '__dict__'):
for k, v in obj.__dict__.items():
if isinstance(obj, Statement) and k == '_evidence':
continue
obj.__dict__[k] = decode_obj(v)
return obj
elif isinstance(obj, dict):
Expand Down