From 0b2a7c87eb638bda690bff1b65400f29ec8d279d Mon Sep 17 00:00:00 2001 From: Yash Pankhania Date: Thu, 25 Jan 2024 14:57:59 +1100 Subject: [PATCH 1/2] Replaced json with orjson for large size loads --- db/python/layers/web.py | 24 ++++++++++-------------- models/models/analysis.py | 4 ++-- models/models/assay.py | 5 +++-- models/models/audit_log.py | 5 +++-- models/models/participant.py | 4 ++-- models/models/project.py | 5 +++-- models/models/sample.py | 4 ++-- models/models/sequencing_group.py | 4 ++-- openapi-templates/api_client.mustache | 4 ++-- requirements.txt | 1 + 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 20e6c82fd..9b3667c91 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -1,11 +1,12 @@ # pylint: disable=too-many-locals, too-many-instance-attributes import asyncio import itertools -import json import re from collections import defaultdict from datetime import date +import orjson + from api.utils import group_by from db.python.layers.base import BaseLayer from db.python.layers.sample import SampleLayer @@ -15,15 +16,10 @@ from db.python.tables.base import DbBase from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable -from models.models import ( - AssayInternal, - FamilySimpleInternal, - NestedParticipantInternal, - NestedSampleInternal, - NestedSequencingGroupInternal, - SearchItem, - parse_sql_bool, -) +from models.models import (AssayInternal, FamilySimpleInternal, + NestedParticipantInternal, NestedSampleInternal, + NestedSequencingGroupInternal, SearchItem, + parse_sql_bool) from models.models.web import ProjectSummaryInternal, WebProject @@ -109,7 +105,7 @@ def _project_summary_process_assay_rows_by_sample_id( AssayInternal( id=seq['id'], type=seq['type'], - meta=json.loads(seq['meta']), + meta=orjson.loads(seq['meta']), # pylint: disable=maybe-no-member sample_id=seq['sample_id'], ) for seq in assay_rows @@ -148,7 +144,7 @@ def _project_summary_process_sequencing_group_rows_by_sample_id( sg_id_to_sample_id[sg_id] = row['sample_id'] sg_by_id[sg_id] = NestedSequencingGroupInternal( id=sg_id, - meta=json.loads(row['meta']), + meta=orjson.loads(row['meta']), # pylint: disable=maybe-no-member type=row['type'], technology=row['technology'], platform=row['platform'], @@ -186,7 +182,7 @@ def _project_summary_process_sample_rows( id=s['id'], external_id=s['external_id'], type=s['type'], - meta=json.loads(s['meta']) or {}, + meta=orjson.loads(s['meta']) or {}, # pylint: disable=maybe-no-member created_date=str(sample_id_start_times.get(s['id'], '')), sequencing_groups=sg_models_by_sample_id.get(s['id'], []), non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []), @@ -443,7 +439,7 @@ async def get_project_summary( NestedParticipantInternal( id=p['id'], external_id=p['external_id'], - meta=json.loads(p['meta']), + meta=orjson.loads(p['meta']), # pylint: disable=maybe-no-member families=pid_to_families.get(p['id'], []), samples=list(smodels_by_pid.get(p['id'])), reported_sex=p['reported_sex'], diff --git a/models/models/analysis.py b/models/models/analysis.py index fb6e3152d..fd85f3660 100644 --- a/models/models/analysis.py +++ b/models/models/analysis.py @@ -1,9 +1,9 @@ import enum -import json from datetime import date, datetime from typing import Any from pydantic import BaseModel +import orjson from models.base import SMBase from models.enums import AnalysisStatus @@ -38,7 +38,7 @@ def from_db(**kwargs): meta = kwargs.get('meta') if meta and isinstance(meta, str): - meta = json.loads(meta) + meta = orjson.loads(meta) # pylint: disable=maybe-no-member if timestamp_completed and isinstance(timestamp_completed, str): timestamp_completed = datetime.fromisoformat(timestamp_completed) diff --git a/models/models/assay.py b/models/models/assay.py index 139d60327..fdf0a0a13 100644 --- a/models/models/assay.py +++ b/models/models/assay.py @@ -1,6 +1,7 @@ -import json from typing import Any +import orjson + from models.base import OpenApiGenNoneType, SMBase from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw @@ -31,7 +32,7 @@ def from_db(d: dict): if isinstance(meta, bytes): meta = meta.decode() if isinstance(meta, str): - meta = json.loads(meta) + meta = orjson.loads(meta) # pylint: disable=maybe-no-member return AssayInternal(meta=meta, **d) def to_external(self): diff --git a/models/models/audit_log.py b/models/models/audit_log.py index 20969dd50..b01a1e96c 100644 --- a/models/models/audit_log.py +++ b/models/models/audit_log.py @@ -1,5 +1,6 @@ import datetime -import json + +import orjson from models.base import SMBase from models.models.project import ProjectId @@ -26,6 +27,6 @@ def from_db(d: dict): """Take DB mapping object, and return SampleSequencing""" meta = {} if 'meta' in d: - meta = json.loads(d.pop('meta')) + meta = orjson.loads(d.pop('meta')) # pylint: disable=maybe-no-member return AuditLogInternal(meta=meta, **d) diff --git a/models/models/participant.py b/models/models/participant.py index de0dc6ef5..fc00e5943 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -1,4 +1,4 @@ -import json +import orjson from models.base import OpenApiGenNoneType, SMBase from models.models.family import FamilySimple, FamilySimpleInternal @@ -28,7 +28,7 @@ class ParticipantInternal(SMBase): def from_db(cls, data: dict): """Convert from db keys, mainly converting parsing meta""" if 'meta' in data and isinstance(data['meta'], str): - data['meta'] = json.loads(data['meta']) + data['meta'] = orjson.loads(data['meta']) # pylint: disable=maybe-no-member return ParticipantInternal(**data) diff --git a/models/models/project.py b/models/models/project.py index 9ca19542f..896375e22 100644 --- a/models/models/project.py +++ b/models/models/project.py @@ -1,6 +1,7 @@ -import json from typing import Optional +import orjson + from models.base import SMBase ProjectId = int @@ -20,5 +21,5 @@ class Project(SMBase): def from_db(kwargs): """From DB row, with db keys""" kwargs = dict(kwargs) - kwargs['meta'] = json.loads(kwargs['meta']) if kwargs.get('meta') else {} + kwargs['meta'] = orjson.loads(kwargs['meta']) if kwargs.get('meta') else {} # pylint: disable=maybe-no-member return Project(**kwargs) diff --git a/models/models/sample.py b/models/models/sample.py index 5f183ff2e..2a1d169aa 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -1,4 +1,4 @@ -import json +import orjson from models.base import OpenApiGenNoneType, SMBase, parse_sql_bool from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal @@ -37,7 +37,7 @@ def from_db(d: dict): if isinstance(meta, bytes): meta = meta.decode() if isinstance(meta, str): - meta = json.loads(meta) + meta = orjson.loads(meta) # pylint: disable=maybe-no-member return SampleInternal(id=_id, type=str(type_), meta=meta, active=active, **d) diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py index 1ccd1a991..a5cb2e029 100644 --- a/models/models/sequencing_group.py +++ b/models/models/sequencing_group.py @@ -1,4 +1,4 @@ -import json +import orjson from models.base import OpenApiGenNoneType, SMBase from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal @@ -49,7 +49,7 @@ def from_db(cls, **kwargs): """From database model""" meta = kwargs.pop('meta') if meta and isinstance(meta, str): - meta = json.loads(meta) + meta = orjson.loads(meta) # pylint: disable=maybe-no-member _archived = kwargs.pop('archived', None) if _archived is not None: diff --git a/openapi-templates/api_client.mustache b/openapi-templates/api_client.mustache index d78bf1909..55627653d 100644 --- a/openapi-templates/api_client.mustache +++ b/openapi-templates/api_client.mustache @@ -1,6 +1,6 @@ {{>partial_header}} -import json +import orjson import atexit import mimetypes from multiprocessing.pool import ThreadPool @@ -333,7 +333,7 @@ class ApiClient(object): # fetch data from response object try: - received_data = json.loads(response.data) + received_data = orjson.loads(response.data) # pylint: disable=maybe-no-member except ValueError: received_data = response.data diff --git a/requirements.txt b/requirements.txt index 1f9640ea2..77f96f696 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ SQLAlchemy==1.4.41 cryptography>=41.0.0 python-dateutil==2.8.2 slack-sdk==3.20.2 +orjson==3.9.12 From 66dbebc70515ca180862131069436cca4b931f2e Mon Sep 17 00:00:00 2001 From: Yash Pankhania Date: Thu, 25 Jan 2024 15:41:20 +1100 Subject: [PATCH 2/2] added orjson to setup.py --- db/python/layers/web.py | 13 +++++++++---- setup.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 9b3667c91..c4a81a10b 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -16,10 +16,15 @@ from db.python.tables.base import DbBase from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable -from models.models import (AssayInternal, FamilySimpleInternal, - NestedParticipantInternal, NestedSampleInternal, - NestedSequencingGroupInternal, SearchItem, - parse_sql_bool) +from models.models import ( + AssayInternal, + FamilySimpleInternal, + NestedParticipantInternal, + NestedSampleInternal, + NestedSequencingGroupInternal, + SearchItem, + parse_sql_bool, +) from models.models.web import ProjectSummaryInternal, WebProject diff --git a/setup.py b/setup.py index 0b812b931..38221aa47 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ # for get id-token 'cpg-utils >= 4.9.4', 'gql[aiohttp,requests]', + 'orjson==3.9.12', ], entry_points={ 'metamist_parser': [