Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3681 - Allow overwrite and override mode to change existing values on samples #226

Merged
merged 2 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion bin/modify_existing_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.

import argparse
import logging

from ebi_eva_common_pyutils.logger import logging_config as log_cfg

Expand All @@ -38,9 +39,12 @@ def main():
'"curate" will create curation object on top of the BioSample. These are not '
'used by ENA. '
'"derive" will create a new BioSample derived from the old one.')
arg_parser.add_argument('--debug', action='store_true', default=False,
help='Set the script to output logging information at debug level')
args = arg_parser.parse_args()

log_cfg.add_stdout_handler()
if args.debug:
log_cfg.add_stdout_handler(level=logging.DEBUG)

# Load the config_file from default location
load_config()
Expand Down
18 changes: 9 additions & 9 deletions eva_submission/biosample_submission/biosamples_submitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,16 @@ def convert_sample_data_to_curation_object(self, future_sample):
return dict(curation=curation_object, sample=future_sample.get('accession'))

@staticmethod
def _update_from_array(key, sample_source, sample_dest):
def _update_from_array(key, sample_source, sample_dest, allow_overwrite=False):
"""Add the element of an array stored in specified key from source to destination"""
if key in sample_source:
if key not in sample_dest:
sample_dest[key] = []
for element in sample_source[key]:
if element not in sample_dest[key]:
if element not in sample_dest[key] or allow_overwrite:
sample_dest[key].append(element)

def _update_samples_with(self, sample_source, sample_dest):
def _update_samples_with(self, sample_source, sample_dest, allow_overwrite=False):
"""Update a BioSample object with the value of another"""
if 'override' in self.submit_type:
# Ensure that override only change geographic location and collection date
Expand All @@ -173,12 +173,12 @@ def _update_samples_with(self, sample_source, sample_dest):
tmp_sample_source['characteristics'][attribute] = sample_source['characteristics'][attribute]
sample_source = tmp_sample_source
for attribute in sample_source['characteristics']:
if attribute not in sample_dest['characteristics']:
if attribute not in sample_dest['characteristics'] or allow_overwrite:
sample_dest['characteristics'][attribute] = sample_source['characteristics'][attribute]
self._update_from_array('externalReferences', sample_source, sample_dest)
self._update_from_array('relationships', sample_source, sample_dest)
self._update_from_array('contact', sample_source, sample_dest)
self._update_from_array('organization', sample_source, sample_dest)
self._update_from_array('externalReferences', sample_source, sample_dest, allow_overwrite)
self._update_from_array('relationships', sample_source, sample_dest, allow_overwrite)
self._update_from_array('contact', sample_source, sample_dest, allow_overwrite)
self._update_from_array('organization', sample_source, sample_dest, allow_overwrite)
for key in ['taxId', 'accession', 'name', 'release']:
if key in sample_source and key not in sample_dest:
sample_dest[key] = sample_source[key]
Expand All @@ -189,7 +189,7 @@ def create_sample_to_overwrite(self, sample):
if self.can_overwrite(sample) and not self.allow_removal:
# retrieve the sample without any curation and add the new data on top
destination_sample = self._get_existing_sample(sample.get('accession'))
self._update_samples_with(sample, destination_sample)
self._update_samples_with(sample, destination_sample, allow_overwrite=True)
return destination_sample

def create_derived_sample(self, sample):
Expand Down
Loading