@@ -32,6 +32,13 @@ class SequencingGroupFilter(GenericFilterModel):
32
32
active_only : GenericFilter [bool ] | None = GenericFilter (eq = True )
33
33
meta : GenericMetaFilter | None = None
34
34
35
+ # These fields are manually handled in the query to speed things up, because multiple table
36
+ # joins and dynamic computation are required.
37
+ created_on : GenericFilter [date ] | None = None
38
+ assay_meta : GenericMetaFilter | None = None
39
+ has_cram : bool | None = None
40
+ has_gvcf : bool | None = None
41
+
35
42
def __hash__ (self ): # pylint: disable=useless-super-delegation
36
43
return super ().__hash__ ()
37
44
@@ -69,17 +76,108 @@ async def query(
69
76
'platform' : 'sg.platform' ,
70
77
'active_only' : 'NOT sg.archived' ,
71
78
'external_id' : 'sgexid.external_id' ,
79
+ 'created_on' : 'DATE(row_start)' ,
80
+ 'assay_meta' : 'meta' ,
72
81
}
73
82
74
- wheres , values = filter_ .to_sql (sql_overrides )
75
- _query = f"""
76
- SELECT { self .common_get_keys_str }
77
- FROM sequencing_group sg
83
+ # Progressively build up the query and query values based on the filters provided to
84
+ # avoid uneccessary joins and improve performance.
85
+ _query : list [str ] = []
86
+ query_values : dict [str , Any ] = {}
87
+ # These fields are manually handled in the query
88
+ exclude_fields : list [str ] = []
89
+
90
+ # Base query
91
+ _query .append (
92
+ f"""
93
+ SELECT
94
+ { self .common_get_keys_str }
95
+ FROM sequencing_group AS sg
78
96
LEFT JOIN sample s ON s.id = sg.sample_id
79
- LEFT JOIN sequencing_group_external_id sgexid ON sg.id = sgexid.sequencing_group_id
80
- WHERE { wheres }
81
- """
82
- rows = await self .connection .fetch_all (_query , values )
97
+ LEFT JOIN sequencing_group_external_id sgexid ON sg.id = sgexid.sequencing_group_id"""
98
+ )
99
+
100
+ if filter_ .assay_meta is not None :
101
+ exclude_fields .append ('assay_meta' )
102
+ wheres , values = filter_ .to_sql (sql_overrides , only = ['assay_meta' ])
103
+ query_values .update (values )
104
+ _query .append (
105
+ f"""
106
+ INNER JOIN (
107
+ SELECT DISTINCT
108
+ sequencing_group_id
109
+ FROM
110
+ sequencing_group_assay
111
+ INNER JOIN (
112
+ SELECT
113
+ id
114
+ FROM
115
+ assay
116
+ WHERE
117
+ { wheres }
118
+ ) AS assay_subquery ON sequencing_group_assay.assay_id = assay_subquery.id
119
+ ) AS sga_subquery ON sg.id = sga_subquery.sequencing_group_id
120
+ """
121
+ )
122
+
123
+ if filter_ .created_on is not None :
124
+ exclude_fields .append ('created_on' )
125
+ wheres , values = filter_ .to_sql (sql_overrides , only = ['created_on' ])
126
+ query_values .update (values )
127
+ _query .append (
128
+ f"""
129
+ INNER JOIN (
130
+ SELECT
131
+ id,
132
+ TIMESTAMP(min(row_start)) AS created_on
133
+ FROM
134
+ sequencing_group FOR SYSTEM_TIME ALL
135
+ WHERE
136
+ { wheres }
137
+ GROUP BY
138
+ id
139
+ ) AS sg_timequery ON sg.id = sg_timequery.id
140
+ """
141
+ )
142
+
143
+ if filter_ .has_cram is not None or filter_ .has_gvcf is not None :
144
+ exclude_fields .extend (['has_cram' , 'has_gvcf' ])
145
+ wheres , values = filter_ .to_sql (
146
+ sql_overrides , only = ['has_cram' , 'has_gvcf' ]
147
+ )
148
+ query_values .update (values )
149
+ _query .append (
150
+ f"""
151
+ INNER JOIN (
152
+ SELECT
153
+ sequencing_group_id,
154
+ FIND_IN_SET('cram', GROUP_CONCAT(LOWER(anlysis_query.type))) > 0 AS has_cram,
155
+ FIND_IN_SET('gvcf', GROUP_CONCAT(LOWER(anlysis_query.type))) > 0 AS has_gvcf
156
+ FROM
157
+ analysis_sequencing_group
158
+ INNER JOIN (
159
+ SELECT
160
+ id, type
161
+ FROM
162
+ analysis
163
+ ) AS anlysis_query ON analysis_sequencing_group.analysis_id = anlysis_query.id
164
+ GROUP BY
165
+ sequencing_group_id
166
+ HAVING
167
+ { wheres }
168
+ ) AS sg_filequery ON sg.id = sg_filequery.sequencing_group_id
169
+ """
170
+ )
171
+
172
+ # Add the rest of the filters
173
+ wheres , values = filter_ .to_sql (sql_overrides , exclude = exclude_fields )
174
+ _query .append (
175
+ f"""
176
+ WHERE { wheres } """
177
+ )
178
+ query_values .update (values )
179
+
180
+ rows = await self .connection .fetch_all ('\n ' .join (_query ), query_values )
83
181
sgs = [SequencingGroupInternal .from_db (** dict (r )) for r in rows ]
84
182
projects = set (sg .project for sg in sgs )
85
183
return projects , sgs
0 commit comments