Skip to content

Commit

Permalink
Provide NCBI API keys when possible (#203)
Browse files Browse the repository at this point in the history
* Provide NCBI API keys when possible

* Fix requirements.txt
  • Loading branch information
tcezard authored Apr 22, 2024
1 parent 0daf447 commit 083da74
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 6 deletions.
3 changes: 2 additions & 1 deletion bin/insert_new_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def main():
insert_new_assembly_and_taxonomy(
metadata_connection_handle=conn,
assembly_accession=assembly_accession,
taxonomy_id=taxon_id
taxonomy_id=taxon_id,
ncbi_api_key=cfg['eutils_api_key']
)
elif taxon_id:
ensure_taxonomy_is_in_evapro(conn, taxon_id)
Expand Down
1 change: 1 addition & 0 deletions eva_submission/eload_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def check_variant_db(self):
metadata_connection_handle=conn,
assembly_accession=assembly,
taxonomy_id=self.taxonomy,
ncbi_api_key=cfg['eutils_api_key']
)

for db_info in assembly_to_db_name.values():
Expand Down
2 changes: 1 addition & 1 deletion eva_submission/eload_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def resolve_accession_from_text(reference_text):
if NCBIAssembly.is_assembly_accession_format(reference_text):
return [reference_text]
# Search for a reference genome that resolve this text
accession = retrieve_genbank_assembly_accessions_from_ncbi(reference_text)
accession = retrieve_genbank_assembly_accessions_from_ncbi(reference_text, api_key=cfg['eutils_api_key'])
if accession:
return accession

Expand Down
4 changes: 2 additions & 2 deletions eva_submission/vep_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def get_species_and_assembly(assembly_acc):
Returns None if the taxonomy is not known.
"""
# We first need to search for the species associated with the assembly
assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc)
assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc, api_key=cfg['eutils_api_key'])
taxid_and_assembly_name = set([
(assembly_dict.get('taxid'), assembly_dict.get('assemblyname'))
for assembly_dict in assembly_dicts
Expand Down Expand Up @@ -253,7 +253,7 @@ def recursive_nlst(ftp, root, pattern):

@retry(tries=4, delay=2, backoff=1.2, jitter=(1, 3), logger=logger)
def download_and_extract_vep_cache(ftp, vep_cache_file, taxonomy_id):
scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id)
scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id, api_key=cfg['eutils_api_key'])
species_name = scientific_name.replace(' ', '_').lower()

tmp_dir = tempfile.TemporaryDirectory()
Expand Down
2 changes: 1 addition & 1 deletion eva_submission/xlsx/xlsx_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def check_reference_genome(self):
"""Check if the references can be retrieved"""
references = set([row['Reference'] for row in self.metadata['Analysis'] if row['Reference']])
for reference in references:
accessions = retrieve_genbank_assembly_accessions_from_ncbi(reference)
accessions = retrieve_genbank_assembly_accessions_from_ncbi(reference, api_key=cfg['eutils_api_key'])
if len(accessions) == 0:
self.error_list.append(f'In Analysis, Reference {reference} did not resolve to any accession')
elif len(accessions) > 1:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cached-property
cerberus
ebi-eva-common-pyutils[eva-internal]==0.6.4
ebi-eva-common-pyutils[eva-internal]==0.6.6
eva-vcf-merge==0.0.8
humanize
lxml
Expand Down

0 comments on commit 083da74

Please sign in to comment.