Skip to content

Commit

Permalink
Insights details update (#930)
Browse files Browse the repository at this point in the history
* Updating seqr project test data generating script

* Remove outputs field in Analysis upserts

* Insights dashboard - use cram timestamp, improve project selector

* Linting fixes

* Revert package.json update

* Remove commented out code

* ESLint fix

* Bump version: 7.4.1 → 7.4.2

* Remove a comment
  • Loading branch information
EddieLF authored Sep 30, 2024
1 parent e685129 commit 7bd9882
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 101 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 7.4.1
current_version = 7.4.2
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>[A-z0-9-]+)
Expand Down
2 changes: 1 addition & 1 deletion api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from db.python.utils import get_logger

# This tag is automatically updated by bump2version
_VERSION = '7.4.1'
_VERSION = '7.4.2'


logger = get_logger()
Expand Down
158 changes: 121 additions & 37 deletions db/python/layers/project_insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@ def get_sg_web_report_links(

return report_links

def get_cram_record(self, cram_row: AnalysisRow | None):
"""Get the CRAM record for a sequencing group"""
return {
'id': cram_row.id if cram_row else None,
'output': cram_row.output if cram_row else None,
'timestamp_completed': cram_row.timestamp_completed.strftime('%d-%m-%y')
if cram_row
else None,
}

def get_analysis_stats_internal_from_record(
self,
analysis_row: AnalysisRow | None,
Expand Down Expand Up @@ -339,7 +349,7 @@ def get_insights_details_internal_row(
sequencing_platform: SequencingPlatform,
sequencing_technology: SequencingTechnology,
sequencing_group_details: SequencingGroupDetailRow,
sequencing_groups_with_crams: list[SequencingGroupInternalId],
sequencing_group_cram: AnalysisRow,
analysis_sequencing_groups: dict[AnalysisId, list[SequencingGroupInternalId]],
latest_annotate_dataset_id: AnalysisId | None,
latest_snv_es_index_id: AnalysisId | None,
Expand All @@ -363,8 +373,8 @@ def get_insights_details_internal_row(
sgs_in_latest_sv_es_index = analysis_sequencing_groups.get(
latest_sv_es_index_id, []
)
sg_cram = self.get_cram_record(sequencing_group_cram)

# participant_ext_ids = self.convert_to_external_ids(sequencing_group_details.participant_external_id)
sample_ext_ids = self.convert_to_external_ids(
sequencing_group_details.sample_external_ids
)
Expand All @@ -382,8 +392,7 @@ def get_insights_details_internal_row(
sample_id=sequencing_group_details.sample_id,
sample_ext_ids=sample_ext_ids,
sequencing_group_id=sequencing_group_details.sequencing_group_id,
completed_cram=sequencing_group_details.sequencing_group_id
in sequencing_groups_with_crams,
cram=sg_cram,
in_latest_annotate_dataset=sequencing_group_details.sequencing_group_id
in sgs_in_latest_annotate_dataset,
in_latest_snv_es_index=sequencing_group_details.sequencing_group_id
Expand Down Expand Up @@ -528,24 +537,24 @@ async def _crams_by_project_id_and_seq_fields(
sequencing_types: list[SequencingType],
) -> dict[ProjectSeqTypeTechnologyKey, list[SequencingGroupInternalId]]:
_query = """
SELECT
a.project,
sg.type as sequencing_type,
sg.technology as sequencing_technology,
GROUP_CONCAT(DISTINCT asg.sequencing_group_id) as sequencing_group_ids
FROM
analysis a
LEFT JOIN analysis_sequencing_group asg ON a.id = asg.analysis_id
LEFT JOIN sequencing_group sg ON sg.id = asg.sequencing_group_id
WHERE
a.project IN :projects
AND sg.type IN :sequencing_types
AND a.type = 'CRAM'
AND a.status = 'COMPLETED'
GROUP BY
a.project,
sg.type,
sg.technology;
SELECT
a.project,
sg.type as sequencing_type,
sg.technology as sequencing_technology,
GROUP_CONCAT(DISTINCT asg.sequencing_group_id) as sequencing_group_ids
FROM
analysis a
LEFT JOIN analysis_sequencing_group asg ON a.id = asg.analysis_id
LEFT JOIN sequencing_group sg ON sg.id = asg.sequencing_group_id
WHERE
a.project IN :projects
AND sg.type IN :sequencing_types
AND a.type = 'CRAM'
AND a.status = 'COMPLETED'
GROUP BY
a.project,
sg.type,
sg.technology;
"""

_query_results = await self.connection.fetch_all(
Expand All @@ -559,6 +568,72 @@ async def _crams_by_project_id_and_seq_fields(
_query_results, 'sequencing_group_ids'
)

async def _sg_crams_by_project_id_and_seq_fields(
self, project_ids: list[ProjectId], sequencing_types: list[str]
) -> dict[
ProjectSeqTypeTechnologyKey, dict[SequencingGroupInternalId, AnalysisRow]
]:
_query = """
SELECT
a.project,
a.id as analysis_id,
sg.id as sequencing_group_id,
sg.type as sequencing_type,
sg.technology as sequencing_technology,
COALESCE(a.output, ao.output, of.path) as output,
a.timestamp_completed
FROM
analysis a
LEFT JOIN analysis_sequencing_group asg ON a.id = asg.analysis_id
LEFT JOIN analysis_outputs ao ON a.id = ao.analysis_id
LEFT JOIN output_file of ON ao.file_id = of.id
LEFT JOIN sequencing_group sg ON sg.id = asg.sequencing_group_id
INNER JOIN (
SELECT
asg.sequencing_group_id,
MAX(a.timestamp_completed) as max_timestamp
FROM analysis a
INNER JOIN analysis_sequencing_group asg ON a.id = asg.analysis_id
WHERE a.type='CRAM'
AND a.status='COMPLETED'
AND a.project IN :projects
GROUP BY asg.sequencing_group_id
) max_timestamps ON asg.sequencing_group_id = max_timestamps.sequencing_group_id
AND a.timestamp_completed = max_timestamps.max_timestamp
WHERE
a.project IN :projects
AND sg.type IN :sequencing_types
AND a.type = 'CRAM'
AND a.status = 'COMPLETED';
"""
_query_results = await self.connection.fetch_all(
_query,
{
'projects': project_ids,
'sequencing_types': sequencing_types,
},
)

cram_timestamps_by_project_id_and_seq_fields: dict[
ProjectSeqTypeTechnologyKey, dict[SequencingGroupInternalId, AnalysisRow]
] = {}
for row in _query_results:
key = ProjectSeqTypeTechnologyKey(
row['project'],
row['sequencing_type'],
row['sequencing_technology'],
)
sg_id = row['sequencing_group_id']
cram_row = AnalysisRow(
id=row['analysis_id'],
output=row['output'],
timestamp_completed=row['timestamp_completed'],
)
if key not in cram_timestamps_by_project_id_and_seq_fields:
cram_timestamps_by_project_id_and_seq_fields[key] = {}
cram_timestamps_by_project_id_and_seq_fields[key][sg_id] = cram_row
return cram_timestamps_by_project_id_and_seq_fields

async def _latest_annotate_dataset_by_project_id_and_seq_type(
self, project_ids: list[ProjectId], sequencing_types: list[str]
) -> dict[ProjectSeqTypeKey, AnalysisRow]:
Expand All @@ -585,11 +660,12 @@ async def _latest_annotate_dataset_by_project_id_and_seq_type(
) max_timestamps ON a.project = max_timestamps.project
AND a.timestamp_completed = max_timestamps.max_timestamp
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) = max_timestamps.sequencing_type
WHERE a.type = 'CUSTOM'
AND a.status = 'COMPLETED'
AND a.project IN :projects
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) IN :sequencing_types
AND JSON_EXTRACT(a.meta, '$.stage') = 'AnnotateDataset';
WHERE
a.type = 'CUSTOM'
AND a.status = 'COMPLETED'
AND a.project IN :projects
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) IN :sequencing_types
AND JSON_EXTRACT(a.meta, '$.stage') = 'AnnotateDataset';
-- JSON_UNQUOTE is necessary to compare JSON values with IN operator
"""
_query_results = await self.connection.fetch_all(
Expand Down Expand Up @@ -634,8 +710,9 @@ async def _latest_es_indices_by_project_id_and_seq_type_and_stage(
AND a.timestamp_completed = max_timestamps.max_timestamp
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) = max_timestamps.sequencing_type
AND JSON_EXTRACT(a.meta, '$.stage') = max_timestamps.stage
WHERE a.project IN :projects
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) in :sequencing_types;
WHERE
a.project IN :projects
AND JSON_UNQUOTE(JSON_EXTRACT(a.meta, '$.sequencing_type')) in :sequencing_types;
"""
_query_results = await self.connection.fetch_all(
_query,
Expand Down Expand Up @@ -736,12 +813,14 @@ async def _details_stripy_reports(
SELECT
a.project,
a.id,
a.output,
coalesce(a.output, ao.output, of.path) as output,
a.timestamp_completed,
asg.sequencing_group_id,
JSON_EXTRACT(meta, '$.outliers_detected') as outliers_detected,
JSON_QUERY(meta, '$.outlier_loci') as outlier_loci
JSON_EXTRACT(a.meta, '$.outliers_detected') as outliers_detected,
JSON_QUERY(a.meta, '$.outlier_loci') as outlier_loci
FROM analysis a
LEFT JOIN analysis_outputs ao on a.id=ao.analysis_id
LEFT JOIN output_file of on of.id = ao.file_id
LEFT JOIN analysis_sequencing_group asg on asg.analysis_id=a.id
INNER JOIN (
SELECT
Expand Down Expand Up @@ -782,10 +861,12 @@ async def _details_mito_reports(
SELECT
a.project,
a.id,
a.output,
coalesce(a.output, ao.output, of.path) as output,
a.timestamp_completed,
asg.sequencing_group_id
FROM analysis a
LEFT JOIN analysis_outputs ao on a.id=ao.analysis_id
LEFT JOIN output_file of on of.id = ao.file_id
LEFT JOIN analysis_sequencing_group asg on asg.analysis_id=a.id
INNER JOIN (
SELECT
Expand Down Expand Up @@ -979,7 +1060,7 @@ async def get_project_insights_details(
self._sequencing_group_details_by_project_and_seq_fields(
project_ids, sequencing_types
),
self._crams_by_project_id_and_seq_fields(project_ids, sequencing_types),
self._sg_crams_by_project_id_and_seq_fields(project_ids, sequencing_types),
self._latest_annotate_dataset_by_project_id_and_seq_type(
project_ids, sequencing_types
),
Expand Down Expand Up @@ -1021,8 +1102,10 @@ async def get_project_insights_details(
):
continue

sequencing_groups_with_crams = crams_by_project_id_and_seq_fields.get(
(project.id, seq_type, seq_tech), []
sequencing_groups_crams: dict[SequencingGroupInternalId, AnalysisRow] = (
crams_by_project_id_and_seq_fields.get(
(project.id, seq_type, seq_tech), {}
)
)
(
latest_annotate_dataset_row,
Expand All @@ -1039,14 +1122,15 @@ async def get_project_insights_details(
for details_row in details_rows:
if not details_row:
continue
sg_id = details_row.sequencing_group_id
response.append(
self.get_insights_details_internal_row(
project=project,
sequencing_type=seq_type,
sequencing_platform=seq_platform,
sequencing_technology=seq_tech,
sequencing_group_details=details_row,
sequencing_groups_with_crams=sequencing_groups_with_crams,
sequencing_group_cram=sequencing_groups_crams.get(sg_id),
analysis_sequencing_groups=analysis_sequencing_groups,
latest_annotate_dataset_id=(
latest_annotate_dataset_row.id
Expand Down
2 changes: 1 addition & 1 deletion deploy/python/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
7.4.1
7.4.2
6 changes: 3 additions & 3 deletions models/models/project_insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class ProjectInsightsDetailsInternal:
sample_id: int
sample_ext_ids: list[str]
sequencing_group_id: int
completed_cram: bool
cram: dict[str, Any]
in_latest_annotate_dataset: bool
in_latest_snv_es_index: bool
in_latest_sv_es_index: bool
Expand All @@ -83,7 +83,7 @@ def to_external(self):
sequencing_group_id=sequencing_group_id_format.sequencing_group_id_format(
self.sequencing_group_id
),
completed_cram=self.completed_cram,
cram=self.cram,
in_latest_annotate_dataset=self.in_latest_annotate_dataset,
in_latest_snv_es_index=self.in_latest_snv_es_index,
in_latest_sv_es_index=self.in_latest_sv_es_index,
Expand All @@ -107,7 +107,7 @@ class ProjectInsightsDetails(SMBase):
sample_id: str
sample_ext_ids: list[str]
sequencing_group_id: str
completed_cram: bool
cram: dict[str, Any]
in_latest_annotate_dataset: bool
in_latest_snv_es_index: bool
in_latest_sv_es_index: bool
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
setup(
name=PKG,
# This tag is automatically updated by bump2version
version='7.4.1',
version='7.4.2',
description='Python API for interacting with the Sample API system',
long_description=readme,
long_description_content_type='text/markdown',
Expand Down
Loading

0 comments on commit 7bd9882

Please sign in to comment.