From d47431ae26ae395c6e0a6417e6440e1a8c861af0 Mon Sep 17 00:00:00 2001
From: Riley Grant
Date: Tue, 14 Jan 2025 10:00:17 -0600
Subject: [PATCH 1/4] feat(data-pipeline): repartition gene tables
---
data-pipeline/src/data_pipeline/data_types/gene.py | 2 +-
data-pipeline/src/data_pipeline/pipelines/genes.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/data-pipeline/src/data_pipeline/data_types/gene.py b/data-pipeline/src/data_pipeline/data_types/gene.py
index f6221afb5..cfa469b19 100644
--- a/data-pipeline/src/data_pipeline/data_types/gene.py
+++ b/data-pipeline/src/data_pipeline/data_types/gene.py
@@ -234,7 +234,7 @@ def prepare_gene_table_for_release(genes_path, keep_mane_version_global_annotati
else:
ds = ds.select_globals()
- ds = ds.repartition(50)
+ ds = ds.repartition(100)
return ds
diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py
index 7b7ab14eb..96ed9e156 100644
--- a/data-pipeline/src/data_pipeline/pipelines/genes.py
+++ b/data-pipeline/src/data_pipeline/pipelines/genes.py
@@ -401,7 +401,7 @@ def annotate_with_preferred_transcript(table_path):
pipeline.add_task(
"prepare_grch37_genes_table_for_public_release",
prepare_gene_table_for_release,
- f"/{genes_subdir}/gnomad.browser.GRCh37.GENCODEv19.ht",
+ f"/{genes_subdir}/gnomad.browser.GRCh37.GENCODEv19.pext.ht",
{
"genes_path": pipeline.get_task("annotate_grch37_genes_step_5"),
},
@@ -489,7 +489,7 @@ def annotate_with_constraint(genes_path, constraint_path):
pipeline.add_task(
"prepare_grch38_genes_table_for_public_release",
prepare_gene_table_for_release,
- f"/{genes_subdir}/gnomad.browser.GRCh38.GENCODEv39.ht",
+ f"/{genes_subdir}/gnomad.browser.GRCh38.GENCODEv39.pext.ht",
{
"genes_path": pipeline.get_task("remove_grch38_genes_constraint_for_release"),
},
From 35a262a6000cf08f773a31ba34889a5c2e2a0eb9 Mon Sep 17 00:00:00 2001
From: Riley Grant
Date: Tue, 14 Jan 2025 10:02:38 -0600
Subject: [PATCH 2/4] feat(data-pipeline): update Download links
---
browser/src/DataPage/GnomadV2Downloads.tsx | 2 +-
browser/src/DataPage/GnomadV4Downloads.tsx | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/browser/src/DataPage/GnomadV2Downloads.tsx b/browser/src/DataPage/GnomadV2Downloads.tsx
index 524d1e902..e060482b3 100644
--- a/browser/src/DataPage/GnomadV2Downloads.tsx
+++ b/browser/src/DataPage/GnomadV2Downloads.tsx
@@ -303,7 +303,7 @@ const GnomadV2Downloads = () => {
diff --git a/browser/src/DataPage/GnomadV4Downloads.tsx b/browser/src/DataPage/GnomadV4Downloads.tsx
index 06104295a..c78957a54 100644
--- a/browser/src/DataPage/GnomadV4Downloads.tsx
+++ b/browser/src/DataPage/GnomadV4Downloads.tsx
@@ -293,7 +293,7 @@ const GnomadV4Downloads = () => {
From 0d211c7ef7ef73f4b203c590564ae18ee1ddb26b Mon Sep 17 00:00:00 2001
From: Riley Grant
Date: Tue, 14 Jan 2025 13:23:43 -0600
Subject: [PATCH 3/4] feat(browser): update browser gene model help text
---
browser/help/topics/v4-browser-hts.md | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/browser/help/topics/v4-browser-hts.md b/browser/help/topics/v4-browser-hts.md
index 61dd76253..0ba3a287c 100644
--- a/browser/help/topics/v4-browser-hts.md
+++ b/browser/help/topics/v4-browser-hts.md
@@ -182,12 +182,15 @@ Row fields:
- `xstart`: Transcript genomic start position (format: chromosomeposition).
- `xstop`: Transcript genomic stop position (format: chromosomeposition).
- `exons`: Array containing transcript exon information.
- - `feature_type`: Exon type (e.g., CDS).
- - `start`: Exon genomic start position (position only).
- - `stop`: Exon genomic stop position (position only).
- - `xstart`: Exon genomic start position (format: chromosomeposition).
- - `xstop`: Exon genomic start position (format: chromosomeposition).
+ - `feature_type`: Exon type (e.g., CDS).
+ - `start`: Exon genomic start position (position only).
+ - `stop`: Exon genomic stop position (position only).
+ - `xstart`: Exon genomic start position (format: chromosomeposition).
+ - `xstop`: Exon genomic start position (format: chromosomeposition).
- `reference_genome`: Reference genome associated with this transcript.
+ - `gtex_tissue_expression`: Array containing [GTEx](https://gtexportal.org/home/) v10 information.
+ - `tissue`: The tissue type, e.g. 'brain_cerebellum'.
+ - `value`: The Transript Per Million (TPM) value associated with the tissue.
- `refseq_id`: Transcript RefSeq ID.
- `refseq_version`: RefSeq version.
- `hgnc_id`: HGNC gene ID.
@@ -208,6 +211,15 @@ Row fields:
- `ensembl_version`: Ensembl version.
- `refseq_id`: Transcript RefSeq ID.
- `refseq_version`: RefSeq version.
+- `pext`: Struct containing [pext](https://gnomad.broadinstitute.org/help/pext) information.
+ - `regions`: Array containing pext information by region.
+ - `chrom`: The chromosome in which the region is located.
+ - `start`: Region genomic start position (position only).
+ - `stop`: Region genomic stop position (position only).
+ - `mean`: Mean expression across all tissues for the region.
+ - `tissues`: Array containing tissue information.
+ - `tissue`: The tissue type, e.g. 'brain_cerebellum'.
+ - `value`: The pext score for the tissue in the region.
- `preferred_transcript_id`: Transcript shown on the gene page by default. Field contains MANE Select transcript ID if it exists, otherwise contains Ensembl canonical transcript ID.
- `preferred_transcript_source`: Source of transcript ID used for `preferred_transcript_id` field; either "`mane_select`" or "`ensembl_canonical`".
- `gnomad_constraint`: Struct containing gnomAD constraint information for gene. Struct is only present on the GRCh37 Hail Table.
From 846558c1b8a84a7b13d7d8f9ce8495e54d5b8827 Mon Sep 17 00:00:00 2001
From: Riley Grant
Date: Tue, 14 Jan 2025 14:04:00 -0600
Subject: [PATCH 4/4] feat(browser): update gene page variant note
Update the warning/note on the gene page to explicitly state that the
valid coding exons that define the padded regions to display variants
for the gene only includes CDS exons, not UTRs.
Clarifies the text in the case of genes with only non-coding transcripts.
---
browser/src/GenePage/GenePage.tsx | 1 +
browser/src/GenePage/VariantsInGene.tsx | 11 ++++++++---
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/browser/src/GenePage/GenePage.tsx b/browser/src/GenePage/GenePage.tsx
index f93a8712b..ad5cca082 100644
--- a/browser/src/GenePage/GenePage.tsx
+++ b/browser/src/GenePage/GenePage.tsx
@@ -574,6 +574,7 @@ const GenePage = ({ datasetId, gene, geneId }: Props) => {
includeNonCodingTranscripts={includeNonCodingTranscripts}
includeUTRs={includeUTRs}
zoomRegion={zoomRegion}
+ hasOnlyNonCodingTranscripts={!hasCodingExons && hasNonCodingTranscripts}
/>
)}
diff --git a/browser/src/GenePage/VariantsInGene.tsx b/browser/src/GenePage/VariantsInGene.tsx
index da367cf91..ebff5cadc 100644
--- a/browser/src/GenePage/VariantsInGene.tsx
+++ b/browser/src/GenePage/VariantsInGene.tsx
@@ -82,6 +82,7 @@ type OwnVariantsInGeneProps = {
start: number
stop: number
}
+ hasOnlyNonCodingTranscripts?: boolean
}
// @ts-expect-error TS(2456) FIXME: Type alias 'VariantsInGeneProps' circularly refere... Remove this comment to see the full error message
@@ -97,6 +98,7 @@ const VariantsInGene = ({
includeUTRs,
variants,
zoomRegion,
+ hasOnlyNonCodingTranscripts,
}: VariantsInGeneProps) => {
const datasetLabel = labelForDataset(datasetId)
@@ -134,9 +136,12 @@ const VariantsInGene = ({
{includeNonCodingTranscripts || includeUTRs ? 'Warning' : 'Note'}
{' '}
- Only variants located in or within 75 base pairs of a coding exon are shown here. To see
- variants in UTRs or introns, use the{' '}
- region view.
+ {hasOnlyNonCodingTranscripts && <>This gene has no coding transcripts. >}
+ Only variants located in or within 75 base pairs of{' '}
+ {!hasOnlyNonCodingTranscripts ? <>a coding exon (CDS)> : <>an exon>} are shown here.
+ To see variants {!hasOnlyNonCodingTranscripts ? <>in UTRs or introns> : <>in introns>}
+ , use the region view
+ .
The table below shows the HGVS consequence and VEP annotation for each variant's most