From adbf4b5778c452f9779514b8909c4e84059de7c5 Mon Sep 17 00:00:00 2001 From: Brian Helba Date: Mon, 2 Nov 2020 14:00:31 -0500 Subject: [PATCH] Add rewrite-submissions task --- setup.py | 2 +- .../isic-2017-p3-validation-images.txt | 150 +++++++++++++ .../commands/rewrite-submissions.py | 206 ++++++++++++++++++ 3 files changed, 357 insertions(+), 1 deletion(-) create mode 100644 stade/core/management/commands/isic-2017-p3-validation-images.txt create mode 100644 stade/core/management/commands/rewrite-submissions.py diff --git a/setup.py b/setup.py index 600b134d..55824a10 100644 --- a/setup.py +++ b/setup.py @@ -68,5 +68,5 @@ 'django-debug-toolbar', 'django-minio-storage', ], - extras_require={'dev': ['ipython', 'tox']}, + extras_require={'dev': ['django-click', 'ipython', 'tox']}, ) diff --git a/stade/core/management/commands/isic-2017-p3-validation-images.txt b/stade/core/management/commands/isic-2017-p3-validation-images.txt new file mode 100644 index 00000000..089b8819 --- /dev/null +++ b/stade/core/management/commands/isic-2017-p3-validation-images.txt @@ -0,0 +1,150 @@ +ISIC_0001769 +ISIC_0001852 +ISIC_0001871 +ISIC_0003462 +ISIC_0003539 +ISIC_0003582 +ISIC_0003657 +ISIC_0003805 +ISIC_0004337 +ISIC_0006651 +ISIC_0006671 +ISIC_0006815 +ISIC_0006914 +ISIC_0007141 +ISIC_0007156 +ISIC_0007235 +ISIC_0007241 +ISIC_0007332 +ISIC_0007344 +ISIC_0007528 +ISIC_0007796 +ISIC_0008025 +ISIC_0008524 +ISIC_0009995 +ISIC_0010459 +ISIC_0012099 +ISIC_0012109 +ISIC_0012126 +ISIC_0012127 +ISIC_0012143 +ISIC_0012151 +ISIC_0012159 +ISIC_0012160 +ISIC_0012191 +ISIC_0012201 +ISIC_0012204 +ISIC_0012206 +ISIC_0012210 +ISIC_0012221 +ISIC_0012222 +ISIC_0012254 +ISIC_0012256 +ISIC_0012288 +ISIC_0012306 +ISIC_0012313 +ISIC_0012316 +ISIC_0012335 +ISIC_0012380 +ISIC_0012383 +ISIC_0012400 +ISIC_0012417 +ISIC_0012434 +ISIC_0012492 +ISIC_0012513 +ISIC_0012538 +ISIC_0012547 +ISIC_0012660 +ISIC_0012684 +ISIC_0012720 +ISIC_0012746 +ISIC_0012876 +ISIC_0012927 +ISIC_0012956 +ISIC_0012959 +ISIC_0012965 +ISIC_0013010 +ISIC_0013082 +ISIC_0013104 +ISIC_0013127 +ISIC_0013128 +ISIC_0013132 +ISIC_0013188 +ISIC_0013215 +ISIC_0013232 +ISIC_0013421 +ISIC_0013491 +ISIC_0013501 +ISIC_0013518 +ISIC_0013527 +ISIC_0013549 +ISIC_0013561 +ISIC_0013562 +ISIC_0013632 +ISIC_0013637 +ISIC_0013644 +ISIC_0013651 +ISIC_0013663 +ISIC_0013702 +ISIC_0013736 +ISIC_0013793 +ISIC_0013828 +ISIC_0013863 +ISIC_0013898 +ISIC_0013945 +ISIC_0014037 +ISIC_0014038 +ISIC_0014055 +ISIC_0014139 +ISIC_0014162 +ISIC_0014178 +ISIC_0014211 +ISIC_0014212 +ISIC_0014217 +ISIC_0014302 +ISIC_0014310 +ISIC_0014382 +ISIC_0014428 +ISIC_0014558 +ISIC_0014568 +ISIC_0014572 +ISIC_0014597 +ISIC_0014601 +ISIC_0014608 +ISIC_0014610 +ISIC_0014611 +ISIC_0014616 +ISIC_0014618 +ISIC_0014620 +ISIC_0014623 +ISIC_0014624 +ISIC_0014633 +ISIC_0014635 +ISIC_0014637 +ISIC_0014688 +ISIC_0014712 +ISIC_0014809 +ISIC_0014829 +ISIC_0014857 +ISIC_0014931 +ISIC_0014937 +ISIC_0014945 +ISIC_0014946 +ISIC_0014979 +ISIC_0014985 +ISIC_0014989 +ISIC_0015043 +ISIC_0015062 +ISIC_0015124 +ISIC_0015144 +ISIC_0015211 +ISIC_0015243 +ISIC_0015256 +ISIC_0015313 +ISIC_0015372 +ISIC_0015401 +ISIC_0015443 +ISIC_0015445 +ISIC_0015483 +ISIC_0015496 +ISIC_0015627 diff --git a/stade/core/management/commands/rewrite-submissions.py b/stade/core/management/commands/rewrite-submissions.py new file mode 100644 index 00000000..17a32748 --- /dev/null +++ b/stade/core/management/commands/rewrite-submissions.py @@ -0,0 +1,206 @@ +import csv +import functools +import io +import pathlib +from typing import BinaryIO, List, TextIO, Tuple +import zipfile + +from django.core.files.base import ContentFile +import djclick as click +from isic_challenge_scoring import ClassificationScore +import numpy as np + +from stade.core.models import Submission, Task + +# TODO: test whether storage.open('r') works on both boto3 and minio +# TODO: 2016 BACC is always 1.0 +# TODO: rescore doesn't allow different metrics + + +@click.command() +def rewrite_submissions(): + for task in Task.objects.filter( + id__in=[ + 39, # 2016 + # 41, # 2016b + 44, # 44 + 47, # 47 + ] + ): + rewrite_task(task) + + +def rewrite_task(task: Task): + dialect = task.challenge.slug + + with task.test_ground_truth_file.open() as gt_stream: + gt_stream = rewrite(gt_stream, dialect) + + submissions = Submission.objects.filter(approach__task=task) + + old_sub_scores = {} + new_sub_scores = {} + + for submission in submissions: + old_sub_scores[submission.id] = submission.overall_score + + rewrite_submission(dialect, gt_stream, submission) + + new_sub_scores[submission.id] = submission.overall_score + + old_sub_scores = { + item[0]: item[1] + for item in sorted(old_sub_scores.items(), key=lambda item: item[1], reverse=True) + } + new_sub_scores = { + item[0]: item[1] + for item in sorted(new_sub_scores.items(), key=lambda item: item[1], reverse=True) + } + + click.echo(old_sub_scores) + click.echo(new_sub_scores) + click.echo() + if dialect != '2016': + # The new algorithm for AP changed the ordering at the bottom of the leaderboard + assert list(old_sub_scores.keys()) == list(new_sub_scores.keys()) + + +def rewrite_submission(dialect: str, gt_stream: TextIO, submission: Submission) -> float: + # Print old + click.echo(submission.approach.name) + click.echo(submission.test_prediction_file) + click.echo(submission.overall_score) + old_average_scores = [ + score['metrics'] for score in submission.score if score['dataset'] == 'Average' + ][0] + old_average_scores = {metric['name']: metric['value'] for metric in old_average_scores} + old_aggregate_scores = [ + score['metrics'] for score in submission.score if score['dataset'] == 'aggregate' + ][0] + old_aggregate_scores = {metric['name']: metric['value'] for metric in old_aggregate_scores} + + # Rewrite and rescore + if submission.test_prediction_file.name.endswith('.zip'): + with submission.test_prediction_file.open() as prediction_stream: + prediction_stream, prediction_file_name = from_zip(prediction_stream) + prediction_stream = rewrite(prediction_stream, dialect) + else: + with submission.test_prediction_file.open() as prediction_stream: + prediction_stream = rewrite(prediction_stream, dialect) + prediction_file_name = submission.test_prediction_file.name.partition('/')[2] + + score = ClassificationScore.from_stream(gt_stream, prediction_stream) + gt_stream.seek(0) + prediction_stream.seek(0) + + assert score.overall == score.validation + # Cannot assert that average precision is unchanged; sklearn changed their algorithm since + # the original scoring + if dialect == '2016': + assert np.isclose(old_average_scores['area_under_roc'], score.macro_average['auc']) + assert np.isclose(old_average_scores['accuracy'], score.macro_average['accuracy']) + elif dialect == '2017': + assert np.isclose(old_average_scores['area_under_roc_mean'], score.macro_average['auc']) + assert np.isclose(old_average_scores['accuracy_mean'], score.macro_average['accuracy']) + assert np.isclose(old_average_scores['accuracy_mean'], score.macro_average['accuracy']) + elif dialect == '2018': + assert np.isclose(old_average_scores['auc'], score.macro_average['auc']) + assert np.isclose(old_average_scores['accuracy'], score.macro_average['accuracy']) + assert np.isclose( + old_aggregate_scores['balanced_accuracy'], score.aggregate['balanced_accuracy'] + ) + + if dialect == '2016': + submission.overall_score = score.macro_average['ap'] + elif dialect == '2017': + submission.overall_score = score.macro_average['auc'] + elif dialect == '2018': + submission.overall_score = score.aggregate['balanced_accuracy'] + else: + raise Exception(f'Unknown dialect {dialect}') + + # Save updates + submission.score = score.to_dict() + submission.validation_score = submission.overall_score + submission.test_prediction_file = ContentFile( + prediction_stream.read().encode('utf-8'), + name=prediction_file_name, + ) + submission.save() + + # Print new + click.echo(submission.overall_score) + click.echo(old_average_scores) + click.echo(score.macro_average.to_dict()) + click.echo() + + return submission.overall_score + + +@functools.lru_cache +def validation_images_2017() -> List[str]: + with ( + pathlib.Path(__file__).parent / 'isic-2017-p3-validation-images.txt' + ).open() as input_stream: + images = [line.strip() for line in input_stream.readlines()] + assert len(images) == 150 + return images + + +def rewrite(input_stream: BinaryIO, dialect) -> io.StringIO: + # Enable automatic newline translation with newline=None + text_input_stream = io.TextIOWrapper(input_stream, newline=None) + csv_reader = csv.reader(text_input_stream) + + output_stream = io.StringIO(newline=None) + csv_writer = csv.writer(output_stream) + + if dialect == '2016': + csv_writer.writerow(['image', 'malignant']) + elif dialect == '2017': + csv_writer.writerow(['image', 'melanoma', 'seborrheic_keratosis']) + for row in csv_reader: + row = [field.strip() for field in row] + # Remove empty fields + row = [field for field in row if field] + if not row: + continue + if dialect == '2017' and row[0] == 'image_id': + continue + + # Some 2017 submissions included validation images + if dialect == '2017' and row[0] in validation_images_2017(): + continue + + if dialect == '2016': + assert len(row) == 2 + if row[1] == '0': + row[1] = '0.0' + elif row[1] == '1': + row[1] = '1.0' + elif dialect == '2017': + assert len(row) == 3 + elif dialect == '2017': + assert len(row) == 8 + + csv_writer.writerow(row) + + output_stream.seek(0) + return output_stream + + +def from_zip(input_stream: BinaryIO) -> Tuple[io.BytesIO, str]: + zip_file = zipfile.ZipFile(input_stream) + csv_infos = [ + zip_info + for zip_info in zip_file.infolist() + if zip_info.filename.endswith('.csv') and '__MACOSX' not in zip_info.filename + ] + assert len(csv_infos) == 1 + csv_info = csv_infos[0] + + output_stream = io.BytesIO() + with zip_file.open(csv_info) as csv_stream: + output_stream.write(csv_stream.read()) + output_stream.seek(0) + return output_stream, csv_info.filename