diff --git a/src/encord_active/lib/common/iterator.py b/src/encord_active/lib/common/iterator.py index 492c81b8f..787fad780 100644 --- a/src/encord_active/lib/common/iterator.py +++ b/src/encord_active/lib/common/iterator.py @@ -44,7 +44,9 @@ def __init__(self, cache_dir: Path, subset_size: Optional[int] = None, **kwargs) self.label_rows = self.project.label_rows @abstractmethod - def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: + def iterate( + self, desc: str = "", include_images: bool = True + ) -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: pass @abstractmethod @@ -77,7 +79,9 @@ def __init__(self, cache_dir: Path, subset_size: Optional[int] = None, skip_labe 0, ) - def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: + def iterate( + self, desc: str = "", include_images: bool = True + ) -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: with PrismaConnection(self.project_file_structure) as cache_db: pbar = tqdm(total=self.length, desc=desc, leave=False) for label_hash, label_row in self.label_rows.items(): @@ -87,7 +91,6 @@ def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image] self.num_frames = len(label_row.data_units) data_units = sorted(label_row.data_units.values(), key=lambda du: int(du["data_sequence"])) for data_unit in data_units: - if self._skip_labeled_data: du_label = data_unit.get("labels", {}) if du_label.get("objects", []) != [] or du_label.get("classifications", []) != []: @@ -104,7 +107,7 @@ def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image] None, ) image = None - if img_metadata is not None: + if img_metadata is not None and include_images: image = download_image( img_metadata.signed_url, project_dir=self.project_file_structure.project_dir, @@ -156,7 +159,7 @@ def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image] continue image_path = next(video_images_dir.glob(f"{self.du_hash}_{frame_id}.*"), None) - if image_path: + if image_path and include_images: yield fake_data_unit, Image.open(image_path) else: yield fake_data_unit, None diff --git a/src/encord_active/lib/metrics/geometric/annotation_duplicates.py b/src/encord_active/lib/metrics/geometric/annotation_duplicates.py index f9535b894..c1afebc27 100644 --- a/src/encord_active/lib/metrics/geometric/annotation_duplicates.py +++ b/src/encord_active/lib/metrics/geometric/annotation_duplicates.py @@ -34,7 +34,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type} found_any = False - for data_unit, _ in iterator.iterate(desc="Looking for duplicates"): + for data_unit, _ in iterator.iterate(desc="Looking for duplicates", include_images=False): objects = [obj for obj in data_unit["labels"].get("objects", []) if obj["shape"] in valid_annotation_types] polygons = [get_polygon(obj) for obj in objects] diff --git a/src/encord_active/lib/metrics/geometric/hu_temporal.py b/src/encord_active/lib/metrics/geometric/hu_temporal.py index 64474b651..63b2d571c 100644 --- a/src/encord_active/lib/metrics/geometric/hu_temporal.py +++ b/src/encord_active/lib/metrics/geometric/hu_temporal.py @@ -63,7 +63,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): hu_moments_df = get_hu_embeddings(iterator) hu_moments_identifiers = set(hu_moments_df["identifier"]) - for data_unit, _ in iterator.iterate(desc="Computing moment similarity"): + for data_unit, _ in iterator.iterate(desc="Computing moment similarity", include_images=False): for obj in data_unit["labels"].get("objects", []): if obj["shape"] not in valid_annotation_types: continue diff --git a/src/encord_active/lib/metrics/geometric/image_border_closeness.py b/src/encord_active/lib/metrics/geometric/image_border_closeness.py index c477e9667..ff3650335 100644 --- a/src/encord_active/lib/metrics/geometric/image_border_closeness.py +++ b/src/encord_active/lib/metrics/geometric/image_border_closeness.py @@ -33,7 +33,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type} found_any = False - for data_unit, _ in iterator.iterate(desc="Computing closeness to border"): + for data_unit, _ in iterator.iterate(desc="Computing closeness to border", include_images=False): for obj in data_unit["labels"].get("objects", []): if obj["shape"] not in valid_annotation_types: continue diff --git a/src/encord_active/lib/metrics/geometric/object_size.py b/src/encord_active/lib/metrics/geometric/object_size.py index 7cc99109f..eb35e3406 100644 --- a/src/encord_active/lib/metrics/geometric/object_size.py +++ b/src/encord_active/lib/metrics/geometric/object_size.py @@ -19,7 +19,6 @@ def get_area(obj: dict) -> float: if obj["shape"] in {*BoxShapes, ObjectShape.POLYGON}: - points = get_object_coordinates(obj) if points is None or len(points) < 3: logger.debug("Less than 3 points") @@ -65,7 +64,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type} found_any = False - for data_unit, _ in iterator.iterate(desc="Computing object area"): + for data_unit, _ in iterator.iterate(desc="Computing object area", include_images=False): for obj in data_unit["labels"].get("objects", []): if obj["shape"] not in valid_annotation_types: continue diff --git a/src/encord_active/lib/metrics/geometric/occlusion_detection_video.py b/src/encord_active/lib/metrics/geometric/occlusion_detection_video.py index 7b0955e4c..cdb12a8b2 100644 --- a/src/encord_active/lib/metrics/geometric/occlusion_detection_video.py +++ b/src/encord_active/lib/metrics/geometric/occlusion_detection_video.py @@ -105,7 +105,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): if not videos: logger.info("[Skipping] No videos in dataset. ") - for data_unit, _ in iterator.iterate(desc="Storing occlusion index"): + for data_unit, _ in iterator.iterate(desc="Storing occlusion index", include_images=False): label_row_hash = iterator.label_hash if label_row_hash not in videos.keys(): continue diff --git a/src/encord_active/lib/metrics/heuristic/high_iou_changing_classes.py b/src/encord_active/lib/metrics/heuristic/high_iou_changing_classes.py index 909da20c1..3020a5df3 100644 --- a/src/encord_active/lib/metrics/heuristic/high_iou_changing_classes.py +++ b/src/encord_active/lib/metrics/heuristic/high_iou_changing_classes.py @@ -70,7 +70,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): label_hash = "" previous_objects = None previous_polygons = None - for data_unit, _ in iterator.iterate(desc="Looking for overlapping objects"): + for data_unit, _ in iterator.iterate(desc="Looking for overlapping objects", include_images=False): label_row = iterator.label_rows[iterator.label_hash] data_type = label_row["data_type"] if not (data_type == "video" or (data_type == "img_group" and len(label_row["data_units"]) > 1)): diff --git a/src/encord_active/lib/metrics/heuristic/missing_objects_and_wrong_tracks.py b/src/encord_active/lib/metrics/heuristic/missing_objects_and_wrong_tracks.py index 544cb321f..d2a47a04e 100644 --- a/src/encord_active/lib/metrics/heuristic/missing_objects_and_wrong_tracks.py +++ b/src/encord_active/lib/metrics/heuristic/missing_objects_and_wrong_tracks.py @@ -96,7 +96,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): # Prepare sliding window of previous two frames to compare polygons over time window: List[List[Tuple[dict, Polygon]]] = [] - for data_unit, _ in iterator.iterate(desc="Looking for broken tracks"): + for data_unit, _ in iterator.iterate(desc="Looking for broken tracks", include_images=False): label_row = iterator.label_rows[iterator.label_hash] frame = iterator.frame @@ -211,7 +211,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): # Collect the results in the CSV file. # Everything not found above with get score "1" meaning "no issues". annotated = {k: False for k in error_store.errors} - for data_unit, _ in iterator.iterate(desc="Storing results"): + for data_unit, _ in iterator.iterate(desc="Storing results", include_images=False): for obj in data_unit["labels"].get("objects", []): key = (obj["objectHash"], iterator.frame) if key in error_store.errors: diff --git a/src/encord_active/lib/metrics/heuristic/object_counting.py b/src/encord_active/lib/metrics/heuristic/object_counting.py index c5561329b..b4572c9a1 100644 --- a/src/encord_active/lib/metrics/heuristic/object_counting.py +++ b/src/encord_active/lib/metrics/heuristic/object_counting.py @@ -20,6 +20,6 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): if not iterator.project.ontology.objects: return - for data_unit, _ in iterator.iterate(desc="Counting objects"): + for data_unit, _ in iterator.iterate(desc="Counting objects", include_images=False): score = len(data_unit["labels"]["objects"]) if "objects" in data_unit["labels"] else 0 writer.write(score) diff --git a/src/encord_active/lib/metrics/heuristic/random.py b/src/encord_active/lib/metrics/heuristic/random.py index 3724ba273..d758665af 100644 --- a/src/encord_active/lib/metrics/heuristic/random.py +++ b/src/encord_active/lib/metrics/heuristic/random.py @@ -22,7 +22,7 @@ def __init__(self): ) def execute(self, iterator: Iterator, writer: CSVMetricWriter): - for _ in iterator.iterate(desc="Assigning random values to images"): + for _ in iterator.iterate(desc="Assigning random values to images", include_images=False): writer.write(np.random.uniform()) @@ -46,7 +46,9 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type} found_any = False - for data_unit, _ in iterator.iterate(desc="Searching for objects and assigning random scores"): + for data_unit, _ in iterator.iterate( + desc="Searching for objects and assigning random scores", include_images=False + ): for obj in data_unit["labels"].get("objects", []): if not obj["shape"] in valid_annotation_types: continue diff --git a/src/encord_active/lib/metrics/semantic/image_diversity.py b/src/encord_active/lib/metrics/semantic/image_diversity.py index 4addbf29c..ecabd318f 100644 --- a/src/encord_active/lib/metrics/semantic/image_diversity.py +++ b/src/encord_active/lib/metrics/semantic/image_diversity.py @@ -108,7 +108,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): data_hash_to_score = self._get_difficulty_ranking(cluster_size) - for data_unit, _ in iterator.iterate(desc="Writing scores to a file"): + for data_unit, _ in iterator.iterate(desc="Writing scores to a file", include_images=False): score = data_hash_to_score.get(data_unit["data_hash"]) if score is not None: writer.write(score=score) diff --git a/src/encord_active/lib/metrics/semantic/image_singularity.py b/src/encord_active/lib/metrics/semantic/image_singularity.py index 570d7b2c8..974478887 100644 --- a/src/encord_active/lib/metrics/semantic/image_singularity.py +++ b/src/encord_active/lib/metrics/semantic/image_singularity.py @@ -92,7 +92,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): fix_duplicate_image_orders_in_knn_graph_all_rows(query_res.indices) scores = self.score_images(embedding_info, query_res, iterator.project.project_hash) - for data_unit, _ in iterator.iterate(desc="Writing scores to a file"): + for data_unit, _ in iterator.iterate(desc="Writing scores to a file", include_images=False): data_unit_info = scores.get(data_unit["data_hash"]) if data_unit_info is not None: writer.write( diff --git a/src/encord_active/lib/metrics/semantic/img_classification_quality.py b/src/encord_active/lib/metrics/semantic/img_classification_quality.py index 2d19edaef..ffd68fdd3 100644 --- a/src/encord_active/lib/metrics/semantic/img_classification_quality.py +++ b/src/encord_active/lib/metrics/semantic/img_classification_quality.py @@ -279,7 +279,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): nearest_indexes = self.get_nearest_indexes() self.fix_nearest_indexes(nearest_indexes) key_score_pairs = self.create_key_score_pairs(nearest_indexes) - for data_unit, _ in iterator.iterate(desc="Storing index"): + for data_unit, _ in iterator.iterate(desc="Storing index", include_images=False): key = iterator.get_identifier() is_multiclass = is_multiclass_ontology(iterator.project.ontology) diff --git a/src/encord_active/lib/metrics/semantic/img_object_quality.py b/src/encord_active/lib/metrics/semantic/img_object_quality.py index 3b2ee5777..c1f84e729 100644 --- a/src/encord_active/lib/metrics/semantic/img_object_quality.py +++ b/src/encord_active/lib/metrics/semantic/img_object_quality.py @@ -135,7 +135,7 @@ def execute(self, iterator: Iterator, writer: CSVMetricWriter): label_scores = label_matches.mean(axis=-1) valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type} - for data_unit, _ in iterator.iterate(desc="Storing index"): + for data_unit, _ in iterator.iterate(desc="Storing index", include_images=False): for obj in data_unit["labels"].get("objects", []): if obj["shape"] not in valid_annotation_types: continue diff --git a/src/encord_active/lib/model_predictions/iterator.py b/src/encord_active/lib/model_predictions/iterator.py index 955c2f39f..a1fcea424 100644 --- a/src/encord_active/lib/model_predictions/iterator.py +++ b/src/encord_active/lib/model_predictions/iterator.py @@ -165,7 +165,9 @@ def get_encord_classification(self, pred: Series, ontology_classification: Class manualAnnotation=False, ) - def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: + def iterate( + self, desc: str = "", include_images: bool = True + ) -> Generator[Tuple[dict, Optional[Image.Image]], None, None]: pbar = tqdm(total=self.length, desc=desc, leave=False) with PrismaConnection(self.project_file_structure) as cache_db: for label_hash, lh_group in self.predictions.groupby("label_hash"): @@ -203,9 +205,7 @@ def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image] logger.error("The prediction is not in the ontology objects or classifications") du["labels"] = {"objects": objects, "classifications": classifications} - image = self.get_image(fr_preds.iloc[0], cache_db=cache_db) - if image is None: - logger.error(f"Failed to open Image at frame: {self.du_hash}/{fr_preds.iloc[0]}") + image = (include_images and self.get_image(fr_preds.iloc[0], cache_db=cache_db)) or None yield du, image pbar.update(1)