diff --git a/.gitignore b/.gitignore index 1e50ab4f0..00cbd0dc0 100644 --- a/.gitignore +++ b/.gitignore @@ -347,5 +347,7 @@ tags # End of https://www.toptal.com/developers/gitignore/api/macos,linux,vim,python,pycharm /conf/config.yaml /pocs/outputs/ + .hydra -.idea \ No newline at end of file +.idea + diff --git a/README.md b/README.md index e9b687e4f..4a0c2ae8e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ To install dependencies, run ```shell > poetry install ``` - +If you encounter any error, please check [here](https://github.com/encord-team/data-quality-pocs/wiki/Installation). ## Configuration Before you start, you should copy the `conf/config_example.yaml` to `conf/config.yaml` and update the settings to match your local system / preferences. diff --git a/pocs/img_features.py b/pocs/img_features.py index eef882354..1b3c9c9cd 100644 --- a/pocs/img_features.py +++ b/pocs/img_features.py @@ -42,10 +42,21 @@ def rank_by_blur(image): return 1 - cv2.Laplacian(image, cv2.CV_64F).var() +options = { + "contrast": (rank_by_contrast, "Contrast"), + "green": (rank_by_green, "Greenness"), + "red": (rank_by_red, "Redness"), + "blue": (rank_by_blue, "Blueness"), + "brightness": (rank_by_brightness, "Brightness"), + "sharpness": (rank_by_sharpness, "Sharpness"), + "blurriness": (rank_by_blur, "Blurriness"), +} + + @hydra.main(version_base=None, config_path="../conf", config_name="config") def main(cfg: DictConfig): cache_dir, project = fetch_project_info(cfg) - iterator = DatasetIterator(project, cache_dir, use_images=True, subset_size=100) + iterator = DatasetIterator(project, cache_dir, use_images=True, subset_size=-1) # When you log stuff, this will be stored in the `pocs/outputs` directory. @@ -53,23 +64,24 @@ def main(cfg: DictConfig): # You can build/load databases of embeddings, compute statistics etc, # ... - with CSVIndexWriter(cache_dir, iterator, prefix="brightness-indexer") as writer: - for data_unit, img_pth in tqdm(iterator.iterate(), desc="Iterating data units", total=len(iterator)): - key = iterator.get_identifier() + for rank_fn, name in options.values(): + with CSVIndexWriter(cache_dir, iterator, prefix=f"{name}-indexer") as writer: + for data_unit, img_pth in tqdm(iterator.iterate(), desc=f"Looking for {name}", total=len(iterator)): + key = iterator.get_identifier() - # This is where you do the actual inference. + # This is where you do the actual inference. - # Some convenient properties of the current data. - # ``iterator.label_hash`` the label hash of the current data unit - # ``iterator.du_hash`` the data unit hash of - # ``iterator.frame`` the frame of the current data unit hash of - # ``iterator.num_frame`` the total number of frames in the label row. + # Some convenient properties of the current data. + # ``iterator.label_hash`` the label hash of the current data unit + # ``iterator.du_hash`` the data unit hash of + # ``iterator.frame`` the frame of the current data unit hash of + # ``iterator.num_frame`` the total number of frames in the label row. - # Do your thing (inference) - image = cv2.imread(img_pth.as_posix()) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + # Do your thing (inference) + image = cv2.imread(img_pth.as_posix()) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - writer.write_score(key, rank_by_brightness(image), "Brightness") + writer.write_score(key, rank_fn(image), "") if __name__ == '__main__':