From d014dbeaa3e9a5d4c1085cb48ffdf0424216254e Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Fri, 1 Oct 2021 12:31:52 +0930 Subject: [PATCH] #494 - using gene version in key, don't use old GTFs w/o versions --- genes/management/commands/import_gene_annotation2.py | 2 +- .../ensembl/download_ensembl_gene_annotation_grch37.sh | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/genes/management/commands/import_gene_annotation2.py b/genes/management/commands/import_gene_annotation2.py index e42c78e8f..cf4f958f1 100644 --- a/genes/management/commands/import_gene_annotation2.py +++ b/genes/management/commands/import_gene_annotation2.py @@ -96,7 +96,7 @@ def _convert_to_merged_data(self, pyreference_data: Iterable[Dict], most_recent_ need_gene = True if need_gene: - gene_version[gene_id] = convert_gene_pyreference_to_gene_version_data(gene) + gene_version[gv_accession] = convert_gene_pyreference_to_gene_version_data(gene) for transcript_accession in transcripts: transcript = prd["transcripts_by_id"][transcript_accession] diff --git a/genes/scripts/ensembl/download_ensembl_gene_annotation_grch37.sh b/genes/scripts/ensembl/download_ensembl_gene_annotation_grch37.sh index f81fb1180..57017fa5e 100755 --- a/genes/scripts/ensembl/download_ensembl_gene_annotation_grch37.sh +++ b/genes/scripts/ensembl/download_ensembl_gene_annotation_grch37.sh @@ -1,14 +1,7 @@ #!/bin/bash -# 75 is last GRCh37 in main directory -for release in 60 65 70 75; do - filename=Homo_sapiens.GRCh37.${release}.gtf.gz - if [[ ! -e ${filename} ]]; then - wget ftp://ftp.ensembl.org/pub/release-${release}/gtf/homo_sapiens/${filename} - fi -done +# v81 (points to 75) and earlier at GTFs that don't have transcript versions - just skip them -#81 is 75 again #82 is first GFF3 for GRCh37 #83 has no data #84 is 82 again