Skip to content

Commit 9eecc05

Browse files
committed
Refactor all *.from_db() routines to use from_db_json()
This wrapper calls json.loads() but also handles None (returning None), which enables the code at many call sites to be simplified. Removed some callers' `if isinstance(field, str): ...` code, which has the effect of newly disallowing field values that are already dicts. However we've verified that all *.from_db() calls have raw database outputs as their arguments, so such fields will be always be strings and IMHO giving a dict to from_db_json() is really a logic error that should be detected. In SequencingGroupInternal.from_db() added `pop(..., None)` so that a missing meta field is now accepted. The previous code suggests that having pop() produce KeyError here was unintended. The expected argument types for from_db_json() are listed in the definition, but we don't list its return type. The best we could say in general is `object` but most call sites expect `dict[str, str]` (or occasionally `list[str]`) due to the shape of their expected JSON. Specifying `object` would lead to mypy errors at these call sites.
1 parent 71dddbe commit 9eecc05

12 files changed

+37
-66
lines changed

db/python/layers/web.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# pylint: disable=too-many-locals, too-many-instance-attributes
22
import asyncio
33
import itertools
4-
import json
54
import re
65
from collections import defaultdict
76
from datetime import date
@@ -15,7 +14,7 @@
1514
from db.python.tables.base import DbBase
1615
from db.python.tables.project import ProjectPermissionsTable
1716
from db.python.tables.sequencing_group import SequencingGroupTable
18-
from db.python.utils import escape_like_term
17+
from db.python.utils import escape_like_term, from_db_json
1918
from models.models import (
2019
AssayInternal,
2120
FamilySimpleInternal,
@@ -114,7 +113,7 @@ def _project_summary_process_assay_rows_by_sample_id(
114113
AssayInternal(
115114
id=seq['id'],
116115
type=seq['type'],
117-
meta=json.loads(seq['meta']),
116+
meta=from_db_json(seq['meta']),
118117
sample_id=seq['sample_id'],
119118
)
120119
for seq in assay_rows
@@ -153,7 +152,7 @@ def _project_summary_process_sequencing_group_rows_by_sample_id(
153152
sg_id_to_sample_id[sg_id] = row['sample_id']
154153
sg_by_id[sg_id] = NestedSequencingGroupInternal(
155154
id=sg_id,
156-
meta=json.loads(row['meta']),
155+
meta=from_db_json(row['meta']),
157156
type=row['type'],
158157
technology=row['technology'],
159158
platform=row['platform'],
@@ -189,9 +188,9 @@ def _project_summary_process_sample_rows(
189188
smodels = [
190189
NestedSampleInternal(
191190
id=s['id'],
192-
external_ids=json.loads(s['external_ids']),
191+
external_ids=from_db_json(s['external_ids']),
193192
type=s['type'],
194-
meta=json.loads(s['meta']) or {},
193+
meta=from_db_json(s['meta']) or {},
195194
created_date=str(sample_id_start_times.get(s['id'], '')),
196195
sequencing_groups=sg_models_by_sample_id.get(s['id'], []),
197196
non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []),
@@ -450,8 +449,8 @@ async def get_project_summary(
450449
pmodels.append(
451450
NestedParticipantInternal(
452451
id=p['id'],
453-
external_ids=json.loads(p['external_ids']),
454-
meta=json.loads(p['meta']),
452+
external_ids=from_db_json(p['external_ids']),
453+
meta=from_db_json(p['meta']),
455454
families=pid_to_families.get(p['id'], []),
456455
samples=list(smodels_by_pid.get(p['id'])),
457456
reported_sex=p['reported_sex'],

db/python/tables/participant_phenotype.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import json
21
from collections import defaultdict
32
from typing import Any, Dict, List, Tuple
43

54
from db.python.tables.base import DbBase
5+
from db.python.utils import from_db_json, to_db_json
66

77

88
class ParticipantPhenotypeTable(DbBase):
@@ -32,7 +32,7 @@ async def add_key_value_rows(self, rows: List[Tuple[int, str, Any]]) -> None:
3232
{
3333
'participant_id': r[0],
3434
'description': r[1],
35-
'value': json.dumps(r[2]),
35+
'value': to_db_json(r[2]),
3636
'audit_log_id': audit_log_id,
3737
}
3838
for r in rows
@@ -67,7 +67,7 @@ async def get_key_value_rows_for_participant_ids(
6767
pid = row['participant_id']
6868
key = row['description']
6969
value = row['value']
70-
formed_key_value_pairs[pid][key] = json.loads(value)
70+
formed_key_value_pairs[pid][key] = from_db_json(value)
7171

7272
return formed_key_value_pairs
7373

@@ -91,6 +91,6 @@ async def get_key_value_rows_for_all_participants(
9191
pid = row['participant_id']
9292
key = row['description']
9393
value = row['value']
94-
formed_key_value_pairs[pid][key] = json.loads(value)
94+
formed_key_value_pairs[pid][key] = from_db_json(value)
9595

9696
return formed_key_value_pairs

db/python/utils.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,10 @@ def get_logger():
431431
return _logger
432432

433433

434-
def from_db_json(text):
435-
"""Convert DB's JSON text to Python object"""
434+
def from_db_json(text: str | bytes | None):
435+
"""Convert database's JSON text to Python object"""
436+
if text is None:
437+
return None
436438
return json.loads(text)
437439

438440

models/models/analysis.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import enum
2-
import json
32
from datetime import date, datetime
43
from typing import Any
54

65
from pydantic import BaseModel
76

7+
from db.python.utils import from_db_json
88
from models.base import SMBase
99
from models.enums import AnalysisStatus
1010
from models.utils.cohort_id_format import (
@@ -40,10 +40,6 @@ def from_db(**kwargs):
4040
analysis_type = kwargs.pop('type', None)
4141
status = kwargs.pop('status', None)
4242
timestamp_completed = kwargs.pop('timestamp_completed', None)
43-
meta = kwargs.get('meta')
44-
45-
if meta and isinstance(meta, str):
46-
meta = json.loads(meta)
4743

4844
if timestamp_completed and isinstance(timestamp_completed, str):
4945
timestamp_completed = datetime.fromisoformat(timestamp_completed)
@@ -65,7 +61,7 @@ def from_db(**kwargs):
6561
output=kwargs.pop('output', []),
6662
timestamp_completed=timestamp_completed,
6763
project=kwargs.get('project'),
68-
meta=meta,
64+
meta=from_db_json(kwargs.get('meta')),
6965
active=bool(kwargs.get('active')),
7066
author=kwargs.get('author'),
7167
)

models/models/analysis_runner.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import datetime
2-
import json
32

3+
from db.python.utils import from_db_json
44
from models.base import SMBase
55
from models.models.project import ProjectId
66

@@ -34,10 +34,6 @@ class AnalysisRunnerInternal(SMBase):
3434
@staticmethod
3535
def from_db(**kwargs):
3636
"""Convert from db Record"""
37-
meta = kwargs.pop('meta')
38-
if meta:
39-
meta = json.loads(meta)
40-
4137
_timestamp = kwargs.pop('timestamp')
4238
# if _timestamp:
4339
# _timestamp = datetime.datetime.fromisoformat(_timestamp)
@@ -58,7 +54,7 @@ def from_db(**kwargs):
5854
hail_version=kwargs.pop('hail_version'),
5955
batch_url=kwargs.pop('batch_url'),
6056
submitting_user=kwargs.pop('submitting_user'),
61-
meta=meta,
57+
meta=from_db_json(kwargs.pop('meta')),
6258
audit_log_id=kwargs.pop('audit_log_id'),
6359
output_path=kwargs.pop('output_path'),
6460
)

models/models/assay.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import json
21
from typing import Any
32

3+
from db.python.utils import from_db_json
44
from models.base import OpenApiGenNoneType, SMBase
55
from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw
66

@@ -26,13 +26,7 @@ def __eq__(self, other):
2626
def from_db(d: dict):
2727
"""Take DB mapping object, and return SampleSequencing"""
2828
meta = d.pop('meta', None)
29-
30-
if meta:
31-
if isinstance(meta, bytes):
32-
meta = meta.decode()
33-
if isinstance(meta, str):
34-
meta = json.loads(meta)
35-
return AssayInternal(meta=meta, **d)
29+
return AssayInternal(meta=from_db_json(meta), **d)
3630

3731
def to_external(self):
3832
"""Convert to transport model"""

models/models/audit_log.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import datetime
2-
import json
32

3+
from db.python.utils import from_db_json
44
from models.base import SMBase
55
from models.models.project import ProjectId
66

@@ -24,8 +24,5 @@ class AuditLogInternal(SMBase):
2424
@staticmethod
2525
def from_db(d: dict):
2626
"""Take DB mapping object, and return SampleSequencing"""
27-
meta = {}
28-
if 'meta' in d:
29-
meta = json.loads(d.pop('meta'))
30-
31-
return AuditLogInternal(meta=meta, **d)
27+
meta = d.pop('meta', None)
28+
return AuditLogInternal(meta=from_db_json(meta) or {}, **d)

models/models/cohort.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import json
2-
1+
from db.python.utils import from_db_json
32
from models.base import SMBase
43
from models.models.project import ProjectId
54
from models.utils.cohort_id_format import cohort_id_format
@@ -92,9 +91,7 @@ def from_db(d: dict):
9291
_id = d.pop('id', None)
9392
name = d.pop('name', None)
9493
description = d.pop('description', None)
95-
criteria = d.pop('criteria', None)
96-
if criteria and isinstance(criteria, str):
97-
criteria = json.loads(criteria)
94+
criteria = from_db_json(d.pop('criteria', None))
9895
project = d.pop('project', None)
9996

10097
return CohortTemplateInternal(

models/models/participant.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import json
2-
1+
from db.python.utils import from_db_json
32
from models.base import OpenApiGenNoneType, SMBase
43
from models.models.family import FamilySimple, FamilySimpleInternal
54
from models.models.project import ProjectId
@@ -28,8 +27,8 @@ class ParticipantInternal(SMBase):
2827
def from_db(cls, data: dict):
2928
"""Convert from db keys, mainly converting JSON-encoded fields"""
3029
for key in ['external_ids', 'meta']:
31-
if key in data and isinstance(data[key], str):
32-
data[key] = json.loads(data[key])
30+
if key in data:
31+
data[key] = from_db_json(data[key])
3332

3433
return ParticipantInternal(**data)
3534

models/models/project.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import json
21
from typing import Optional
32

3+
from db.python.utils import from_db_json
44
from models.base import SMBase
55

66
ProjectId = int
@@ -20,5 +20,5 @@ class Project(SMBase):
2020
def from_db(kwargs):
2121
"""From DB row, with db keys"""
2222
kwargs = dict(kwargs)
23-
kwargs['meta'] = json.loads(kwargs['meta']) if kwargs.get('meta') else {}
23+
kwargs['meta'] = from_db_json(kwargs.get('meta')) or {}
2424
return Project(**kwargs)

models/models/sample.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import json
2-
1+
from db.python.utils import from_db_json
32
from models.base import OpenApiGenNoneType, SMBase, parse_sql_bool
43
from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal
54
from models.models.sequencing_group import (
@@ -30,17 +29,11 @@ def from_db(d: dict):
3029
"""
3130
_id = d.pop('id', None)
3231
type_ = d.pop('type', None)
33-
meta = d.pop('meta', None)
32+
meta = from_db_json(d.pop('meta', None))
3433
active = parse_sql_bool(d.pop('active', None))
3534

36-
if meta:
37-
if isinstance(meta, bytes):
38-
meta = meta.decode()
39-
if isinstance(meta, str):
40-
meta = json.loads(meta)
41-
42-
if 'external_ids' in d and isinstance(d['external_ids'], str):
43-
d['external_ids'] = json.loads(d['external_ids'])
35+
if 'external_ids' in d:
36+
d['external_ids'] = from_db_json(d['external_ids'])
4437

4538
return SampleInternal(id=_id, type=str(type_), meta=meta, active=active, **d)
4639

models/models/sequencing_group.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import json
21
from typing import Any
32

3+
from db.python.utils import from_db_json
44
from models.base import OpenApiGenNoneType, SMBase
55
from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal
66
from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw
@@ -47,9 +47,7 @@ class SequencingGroupInternal(SMBase):
4747
@classmethod
4848
def from_db(cls, **kwargs):
4949
"""From database model"""
50-
meta = kwargs.pop('meta')
51-
if meta and isinstance(meta, str):
52-
meta = json.loads(meta)
50+
meta = from_db_json(kwargs.pop('meta', None))
5351

5452
_archived = kwargs.pop('archived', None)
5553
if _archived is not None:

0 commit comments

Comments
 (0)