Skip to content

Commit

Permalink
Merge pull request #1169 from SalahAdDin/eval
Browse files Browse the repository at this point in the history
Implemented F1 score for assessments.
  • Loading branch information
hgaiser authored Dec 12, 2019
2 parents 216e6de + 89d3f52 commit 56c6998
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 17 deletions.
14 changes: 13 additions & 1 deletion keras_retinanet/bin/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def main(args=None):
from ..utils.coco_eval import evaluate_coco
evaluate_coco(generator, model, args.score_threshold)
else:
average_precisions, inference_time = evaluate(
average_precisions, inference_time, f1_scores = evaluate(
generator,
model,
iou_threshold=args.iou_threshold,
Expand All @@ -164,11 +164,19 @@ def main(args=None):
# print evaluation
total_instances = []
precisions = []
scores = []

for label, (average_precision, num_annotations) in average_precisions.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)

for label, (f1_score, num_annotations) in f1_scores.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with F1 score: {:.4f}'.format(f1_score))
# total_instances.append(num_annotations)
scores.append(f1_score)

if sum(total_instances) == 0:
print('No test instances found.')
Expand All @@ -178,6 +186,10 @@ def main(args=None):

print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))

print('mF1 using the weighted F1 scores among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, scores)]) / sum(total_instances)))
print('mF1: {:.4f}'.format(sum(scores) / sum(x > 0 for x in total_instances)))



if __name__ == '__main__':
Expand Down
3 changes: 2 additions & 1 deletion keras_retinanet/bin/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,8 @@ def main(args=None):
workers=args.workers,
use_multiprocessing=args.multiprocessing,
max_queue_size=args.max_queue_size,
validation_data=validation_generator,
validation_steps = args.steps_for_validation,
validation_data=validation_generator
initial_epoch=args.initial_epoch
)

Expand Down
50 changes: 37 additions & 13 deletions keras_retinanet/callbacks/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,22 @@ def __init__(
weighted_average : Compute the mAP using the weighted average of precisions among classes.
verbose : Set the verbosity level, by default this is set to 1.
"""
self.generator = generator
self.iou_threshold = iou_threshold
self.generator = generator
self.iou_threshold = iou_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.save_path = save_path
self.tensorboard = tensorboard
self.max_detections = max_detections
self.save_path = save_path
self.tensorboard = tensorboard
self.weighted_average = weighted_average
self.verbose = verbose
self.verbose = verbose

super(Evaluate, self).__init__()

def on_epoch_end(self, epoch, logs=None):
logs = logs or {}

# run evaluation
average_precisions, _ = evaluate(
average_precisions, _, f1_scores = evaluate(
self.generator,
self.model,
iou_threshold=self.iou_threshold,
Expand All @@ -72,27 +72,51 @@ def on_epoch_end(self, epoch, logs=None):
# compute per class average precision
total_instances = []
precisions = []
scores = []

for label, (average_precision, num_annotations) in average_precisions.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if self.weighted_average:
self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
self.mean_ap = sum(
[a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
else:
self.mean_ap = sum(precisions) / \
sum(x > 0 for x in total_instances)

# compute per class F1 score
for label, (f1_score, num_annotations) in f1_scores.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
self.generator.label_to_name(label), ' with F1 score: {:.4f}'.format(f1_score))
total_instances.append(num_annotations)
scores.append(f1_score)

if self.weighted_average:
self.mean_f1_score = sum(
[a * b for a, b in zip(total_instances, scores)]) / sum(total_instances)
else:
self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)
self.mean_f1_score = sum(scores) / \
sum(x > 0 for x in total_instances)

if self.tensorboard:
import tensorflow as tf
if tf.version.VERSION < '2.0.0' and self.tensorboard.writer:
summary = tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = self.mean_ap
summary_value.tag = "mAP"
summary_value_map = summary.value.add()
summary_value_map.simple_value = self.mean_ap
summary_value_map.tag = "mAP"
summary_value_f1 = summary.value.add()
summary_value_f1.simple_value = self.mean_f1_score
summary_value_f1.tag = "mF1"
self.tensorboard.writer.add_summary(summary, epoch)

logs['mAP'] = self.mean_ap
logs['mF1'] = self.mean_f1_score

if self.verbose == 1:
print('mAP: {:.4f}'.format(self.mean_ap))
print('mAP: {:.4f}\nmF1: {:.4f}'.format(
self.mean_ap, self.mean_f1_score))
37 changes: 35 additions & 2 deletions keras_retinanet/utils/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,33 @@ def _compute_ap(recall, precision):
return ap


def _compute_f1(recall, precision):
"""
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The F1 score
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))

# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]

# and sum (\Delta recall) * prec
f1 = np.sum(2 * (mpre[i] * mrec[i]) / (mpre[i] + mrec[i]))
print(f1)
return f1


def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None):
""" Get the detections from the model using the generator.
Expand Down Expand Up @@ -174,6 +201,7 @@ def evaluate(
all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
all_annotations = _get_annotations(generator)
average_precisions = {}
f1_scores = {}

# all_detections = pickle.load(open('all_detections.pkl', 'rb'))
# all_annotations = pickle.load(open('all_annotations.pkl', 'rb'))
Expand Down Expand Up @@ -237,7 +265,12 @@ def evaluate(
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations


# inference time
inference_time = np.sum(all_inferences) / generator.size()

# compute F1 scores
f1_score = _compute_f1(recall, precision)
f1_scores[label] = f1_score, num_annotations

return average_precisions, inference_time
return average_precisions, inference_time, f1_scores

0 comments on commit 56c6998

Please sign in to comment.