Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rewrite-submissions task #256

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,5 @@
'django-debug-toolbar',
'django-minio-storage',
],
extras_require={'dev': ['ipython', 'tox']},
extras_require={'dev': ['django-click', 'ipython', 'tox']},
)
150 changes: 150 additions & 0 deletions stade/core/management/commands/isic-2017-p3-validation-images.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
ISIC_0001769
ISIC_0001852
ISIC_0001871
ISIC_0003462
ISIC_0003539
ISIC_0003582
ISIC_0003657
ISIC_0003805
ISIC_0004337
ISIC_0006651
ISIC_0006671
ISIC_0006815
ISIC_0006914
ISIC_0007141
ISIC_0007156
ISIC_0007235
ISIC_0007241
ISIC_0007332
ISIC_0007344
ISIC_0007528
ISIC_0007796
ISIC_0008025
ISIC_0008524
ISIC_0009995
ISIC_0010459
ISIC_0012099
ISIC_0012109
ISIC_0012126
ISIC_0012127
ISIC_0012143
ISIC_0012151
ISIC_0012159
ISIC_0012160
ISIC_0012191
ISIC_0012201
ISIC_0012204
ISIC_0012206
ISIC_0012210
ISIC_0012221
ISIC_0012222
ISIC_0012254
ISIC_0012256
ISIC_0012288
ISIC_0012306
ISIC_0012313
ISIC_0012316
ISIC_0012335
ISIC_0012380
ISIC_0012383
ISIC_0012400
ISIC_0012417
ISIC_0012434
ISIC_0012492
ISIC_0012513
ISIC_0012538
ISIC_0012547
ISIC_0012660
ISIC_0012684
ISIC_0012720
ISIC_0012746
ISIC_0012876
ISIC_0012927
ISIC_0012956
ISIC_0012959
ISIC_0012965
ISIC_0013010
ISIC_0013082
ISIC_0013104
ISIC_0013127
ISIC_0013128
ISIC_0013132
ISIC_0013188
ISIC_0013215
ISIC_0013232
ISIC_0013421
ISIC_0013491
ISIC_0013501
ISIC_0013518
ISIC_0013527
ISIC_0013549
ISIC_0013561
ISIC_0013562
ISIC_0013632
ISIC_0013637
ISIC_0013644
ISIC_0013651
ISIC_0013663
ISIC_0013702
ISIC_0013736
ISIC_0013793
ISIC_0013828
ISIC_0013863
ISIC_0013898
ISIC_0013945
ISIC_0014037
ISIC_0014038
ISIC_0014055
ISIC_0014139
ISIC_0014162
ISIC_0014178
ISIC_0014211
ISIC_0014212
ISIC_0014217
ISIC_0014302
ISIC_0014310
ISIC_0014382
ISIC_0014428
ISIC_0014558
ISIC_0014568
ISIC_0014572
ISIC_0014597
ISIC_0014601
ISIC_0014608
ISIC_0014610
ISIC_0014611
ISIC_0014616
ISIC_0014618
ISIC_0014620
ISIC_0014623
ISIC_0014624
ISIC_0014633
ISIC_0014635
ISIC_0014637
ISIC_0014688
ISIC_0014712
ISIC_0014809
ISIC_0014829
ISIC_0014857
ISIC_0014931
ISIC_0014937
ISIC_0014945
ISIC_0014946
ISIC_0014979
ISIC_0014985
ISIC_0014989
ISIC_0015043
ISIC_0015062
ISIC_0015124
ISIC_0015144
ISIC_0015211
ISIC_0015243
ISIC_0015256
ISIC_0015313
ISIC_0015372
ISIC_0015401
ISIC_0015443
ISIC_0015445
ISIC_0015483
ISIC_0015496
ISIC_0015627
206 changes: 206 additions & 0 deletions stade/core/management/commands/rewrite-submissions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import csv
import functools
import io
import pathlib
from typing import BinaryIO, List, TextIO, Tuple
import zipfile

from django.core.files.base import ContentFile
import djclick as click
from isic_challenge_scoring import ClassificationScore
import numpy as np

from stade.core.models import Submission, Task

# TODO: test whether storage.open('r') works on both boto3 and minio
# TODO: 2016 BACC is always 1.0
# TODO: rescore doesn't allow different metrics


@click.command()
def rewrite_submissions():
for task in Task.objects.filter(
id__in=[
39, # 2016
# 41, # 2016b
44, # 44
47, # 47
]
):
rewrite_task(task)


def rewrite_task(task: Task):
dialect = task.challenge.slug

with task.test_ground_truth_file.open() as gt_stream:
gt_stream = rewrite(gt_stream, dialect)

submissions = Submission.objects.filter(approach__task=task)

old_sub_scores = {}
new_sub_scores = {}

for submission in submissions:
old_sub_scores[submission.id] = submission.overall_score

rewrite_submission(dialect, gt_stream, submission)

new_sub_scores[submission.id] = submission.overall_score

old_sub_scores = {
item[0]: item[1]
for item in sorted(old_sub_scores.items(), key=lambda item: item[1], reverse=True)
}
new_sub_scores = {
item[0]: item[1]
for item in sorted(new_sub_scores.items(), key=lambda item: item[1], reverse=True)
}

click.echo(old_sub_scores)
click.echo(new_sub_scores)
click.echo()
if dialect != '2016':
# The new algorithm for AP changed the ordering at the bottom of the leaderboard
assert list(old_sub_scores.keys()) == list(new_sub_scores.keys())


def rewrite_submission(dialect: str, gt_stream: TextIO, submission: Submission) -> float:
# Print old
click.echo(submission.approach.name)
click.echo(submission.test_prediction_file)
click.echo(submission.overall_score)
old_average_scores = [
score['metrics'] for score in submission.score if score['dataset'] == 'Average'
][0]
old_average_scores = {metric['name']: metric['value'] for metric in old_average_scores}
old_aggregate_scores = [
score['metrics'] for score in submission.score if score['dataset'] == 'aggregate'
][0]
old_aggregate_scores = {metric['name']: metric['value'] for metric in old_aggregate_scores}

# Rewrite and rescore
if submission.test_prediction_file.name.endswith('.zip'):
with submission.test_prediction_file.open() as prediction_stream:
prediction_stream, prediction_file_name = from_zip(prediction_stream)
prediction_stream = rewrite(prediction_stream, dialect)
else:
with submission.test_prediction_file.open() as prediction_stream:
prediction_stream = rewrite(prediction_stream, dialect)
prediction_file_name = submission.test_prediction_file.name.partition('/')[2]

score = ClassificationScore.from_stream(gt_stream, prediction_stream)
gt_stream.seek(0)
prediction_stream.seek(0)

assert score.overall == score.validation
# Cannot assert that average precision is unchanged; sklearn changed their algorithm since
# the original scoring
if dialect == '2016':
assert np.isclose(old_average_scores['area_under_roc'], score.macro_average['auc'])
assert np.isclose(old_average_scores['accuracy'], score.macro_average['accuracy'])
elif dialect == '2017':
assert np.isclose(old_average_scores['area_under_roc_mean'], score.macro_average['auc'])
assert np.isclose(old_average_scores['accuracy_mean'], score.macro_average['accuracy'])
assert np.isclose(old_average_scores['accuracy_mean'], score.macro_average['accuracy'])
elif dialect == '2018':
assert np.isclose(old_average_scores['auc'], score.macro_average['auc'])
assert np.isclose(old_average_scores['accuracy'], score.macro_average['accuracy'])
assert np.isclose(
old_aggregate_scores['balanced_accuracy'], score.aggregate['balanced_accuracy']
)

if dialect == '2016':
submission.overall_score = score.macro_average['ap']
elif dialect == '2017':
submission.overall_score = score.macro_average['auc']
elif dialect == '2018':
submission.overall_score = score.aggregate['balanced_accuracy']
else:
raise Exception(f'Unknown dialect {dialect}')

# Save updates
submission.score = score.to_dict()
submission.validation_score = submission.overall_score
submission.test_prediction_file = ContentFile(
prediction_stream.read().encode('utf-8'),
name=prediction_file_name,
)
submission.save()

# Print new
click.echo(submission.overall_score)
click.echo(old_average_scores)
click.echo(score.macro_average.to_dict())
click.echo()

return submission.overall_score


@functools.lru_cache
def validation_images_2017() -> List[str]:
with (
pathlib.Path(__file__).parent / 'isic-2017-p3-validation-images.txt'
).open() as input_stream:
images = [line.strip() for line in input_stream.readlines()]
assert len(images) == 150
return images


def rewrite(input_stream: BinaryIO, dialect) -> io.StringIO:
# Enable automatic newline translation with newline=None
text_input_stream = io.TextIOWrapper(input_stream, newline=None)
csv_reader = csv.reader(text_input_stream)

output_stream = io.StringIO(newline=None)
csv_writer = csv.writer(output_stream)

if dialect == '2016':
csv_writer.writerow(['image', 'malignant'])
elif dialect == '2017':
csv_writer.writerow(['image', 'melanoma', 'seborrheic_keratosis'])
for row in csv_reader:
row = [field.strip() for field in row]
# Remove empty fields
row = [field for field in row if field]
if not row:
continue
if dialect == '2017' and row[0] == 'image_id':
continue

# Some 2017 submissions included validation images
if dialect == '2017' and row[0] in validation_images_2017():
continue

if dialect == '2016':
assert len(row) == 2
if row[1] == '0':
row[1] = '0.0'
elif row[1] == '1':
row[1] = '1.0'
elif dialect == '2017':
assert len(row) == 3
elif dialect == '2017':
assert len(row) == 8

csv_writer.writerow(row)

output_stream.seek(0)
return output_stream


def from_zip(input_stream: BinaryIO) -> Tuple[io.BytesIO, str]:
zip_file = zipfile.ZipFile(input_stream)
csv_infos = [
zip_info
for zip_info in zip_file.infolist()
if zip_info.filename.endswith('.csv') and '__MACOSX' not in zip_info.filename
]
assert len(csv_infos) == 1
csv_info = csv_infos[0]

output_stream = io.BytesIO()
with zip_file.open(csv_info) as csv_stream:
output_stream.write(csv_stream.read())
output_stream.seek(0)
return output_stream, csv_info.filename