From cda83aeeaf3786932fe9c50577b74459432fd1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eloy=20P=C3=A9rez=20Torres?= <99720527+eloy-encord@users.noreply.github.com> Date: Tue, 31 Jan 2023 13:53:35 +0000 Subject: [PATCH] Feat: improve area and aspect ratio metrics efficiency (#130) Use image width and height from metadata when possible, otherwise read it from the file. It's at least 100x faster. --- .../lib/metrics/heuristic/img_features.py | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/encord_active/lib/metrics/heuristic/img_features.py b/src/encord_active/lib/metrics/heuristic/img_features.py index 6f12f55bc..4c8f1aa23 100644 --- a/src/encord_active/lib/metrics/heuristic/img_features.py +++ b/src/encord_active/lib/metrics/heuristic/img_features.py @@ -4,6 +4,7 @@ import numpy as np from encord_active.lib.common.iterator import Iterator +from encord_active.lib.common.utils import get_du_size from encord_active.lib.metrics.metric import ( AnnotationType, DataType, @@ -230,36 +231,39 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): class AspectRatioMetric(Metric): TITLE = "Aspect Ratio" SHORT_DESCRIPTION = "Ranks images by their aspect ratio (width/height)." - LONG_DESCRIPTION = r"""Ranks images by their aspect ratio (width/height). + LONG_DESCRIPTION = r"""Ranks images by their aspect ratio. -Aspect ratio is computed as the ratio of image width to image height. +Aspect ratio is computed as the ratio of image width to image height ($\frac{width}{height}$). """ METRIC_TYPE = MetricType.HEURISTIC DATA_TYPE = DataType.IMAGE ANNOTATION_TYPE = AnnotationType.NONE - @staticmethod - def rank_by_aspect_ratio(image): - return image.shape[1] / image.shape[0] - def execute(self, iterator: Iterator, writer: CSVMetricWriter): - return iterate_with_rank_fn(iterator, writer, self.rank_by_aspect_ratio, self.TITLE) + for data_unit, img_pth in iterator.iterate(desc=f"Computing {self.TITLE}"): + size = get_du_size(data_unit, img_pth) + if not size: + continue + img_h, img_w = size + aspect_ratio = img_w / img_h + writer.write(aspect_ratio) class AreaMetric(Metric): TITLE = "Area" SHORT_DESCRIPTION = "Ranks images by their area (width*height)." - LONG_DESCRIPTION = r"""Ranks images by their area (width*height). + LONG_DESCRIPTION = r"""Ranks images by their area. -Area is computed as the product of image width and image height. +Area is computed as the product of image width and image height ($width \times height$). """ METRIC_TYPE = MetricType.HEURISTIC DATA_TYPE = DataType.IMAGE ANNOTATION_TYPE = AnnotationType.NONE - @staticmethod - def rank_by_area(image): - return image.shape[0] * image.shape[1] - def execute(self, iterator: Iterator, writer: CSVMetricWriter): - return iterate_with_rank_fn(iterator, writer, self.rank_by_area, self.TITLE) + for data_unit, img_pth in iterator.iterate(desc=f"Computing {self.TITLE}"): + size = get_du_size(data_unit, img_pth) + if not size: + continue + image_area = size[0] * size[1] # H * W + writer.write(image_area)