Skip to content

Commit

Permalink
fix(export): Use mappings instead of renaming files (#273)
Browse files Browse the repository at this point in the history
* fix(export): Use mappings instead of renaming files

Now when exporting projects and creating subsets we create file mappings so as to avoid issues with versioning.
This also fixes a versioning bug that discards you are changes when you're already on latest
  • Loading branch information
yogesh-encord authored Mar 22, 2023
1 parent 7c9e78b commit f95a504
Show file tree
Hide file tree
Showing 14 changed files with 171 additions and 97 deletions.
3 changes: 2 additions & 1 deletion src/encord_active/app/actions_page/export_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"Choose tags to filter", options=Tags().all(), format_func=lambda x: x.name, key=key
)
filtered_rows = [True if set(tag_filters) <= set(x) else False for x in filtered["tags"]]
filtered = filtered.loc[filtered_rows]
filtered_items = filtered.loc[filtered_rows]
filtered = filtered[filtered.data_row_id.isin(filtered_items["data_row_id"])]

# Treat columns with < 10 unique values as categorical
elif is_categorical_dtype(filtered[column]) or filtered[column].nunique() < 10:
Expand Down
2 changes: 1 addition & 1 deletion src/encord_active/app/common/components/metric_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def render_metric_summary(


def render_summary_item(row, metric_name: str, iqr_outliers: IqrOutliers, metric_scope: MetricScope):
image = show_image_and_draw_polygons(row, get_state().project_paths.data, get_state().object_drawing_configurations)
image = show_image_and_draw_polygons(row, get_state().project_paths, get_state().object_drawing_configurations)
st.image(image)

multiselect_tag(row, f"{metric_name}_summary")
Expand Down
29 changes: 15 additions & 14 deletions src/encord_active/app/common/components/prediction_grid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from pathlib import Path
from typing import List, Optional

import pandas as pd
Expand Down Expand Up @@ -27,12 +26,13 @@
LabelMatchSchema,
PredictionMatchSchema,
)
from encord_active.lib.project import ProjectFileStructure


def build_card_for_labels(
label: pd.Series,
predictions: DataFrame[PredictionMatchSchema],
data_dir: Path,
project_file_structure: ProjectFileStructure,
label_color: Color = Color.RED,
):
class_colors = {
Expand All @@ -42,7 +42,7 @@ def build_card_for_labels(
image = show_image_with_predictions_and_label(
label,
predictions,
data_dir,
project_file_structure,
label_color=label_color,
class_colors=class_colors,
draw_configurations=get_state().object_drawing_configurations,
Expand All @@ -57,9 +57,9 @@ def build_card_for_labels(
build_data_tags(label, get_state().predictions.metric_datas.selected_label)


def build_card_for_predictions(row: pd.Series, data_dir: Path, box_color=Color.GREEN):
def build_card_for_predictions(row: pd.Series, project_file_structure: ProjectFileStructure, box_color=Color.GREEN):
conf = get_state().object_drawing_configurations
image = show_image_and_draw_polygons(row, data_dir, draw_configurations=conf, skip_object_hash=True)
image = show_image_and_draw_polygons(row, project_file_structure, draw_configurations=conf, skip_object_hash=True)
image = draw_object(image, row, draw_configuration=conf, color=box_color, with_box=True)
st.image(image)
multiselect_tag(row, "metric_view", is_predictions=True)
Expand All @@ -74,17 +74,17 @@ def build_card_for_predictions(row: pd.Series, data_dir: Path, box_color=Color.G
def build_card(
row: pd.Series,
predictions: Optional[DataFrame[PredictionMatchSchema]],
data_dir: Path,
project_file_structure: ProjectFileStructure,
box_color: Color = Color.GREEN,
):
if predictions is not None:
build_card_for_labels(row, predictions, data_dir, box_color)
build_card_for_labels(row, predictions, project_file_structure, box_color)
else:
build_card_for_predictions(row, data_dir, box_color)
build_card_for_predictions(row, project_file_structure, box_color)


def build_card_classifications(row: pd.Series, data_dir: Path):
image = show_image_and_draw_polygons(row, data_dir)
def build_card_classifications(row: pd.Series, project_file_structure: ProjectFileStructure):
image = show_image_and_draw_polygons(row, project_file_structure)
st.image(image)
multiselect_tag(row, "metric_view_classification", is_predictions=True)
build_data_tags(row, get_state().predictions.metric_datas_classification.selected_prediction)
Expand All @@ -94,7 +94,7 @@ def build_card_classifications(row: pd.Series, data_dir: Path):


def prediction_grid(
data_dir: Path,
project_file_structure: ProjectFileStructure,
model_predictions: DataFrame[PredictionMatchSchema],
labels: Optional[DataFrame[LabelMatchSchema]] = None,
box_color: Color = Color.GREEN,
Expand Down Expand Up @@ -134,11 +134,12 @@ def prediction_grid(
divider()
cols = list(st.columns(n_cols))
with cols.pop(0):
build_card(row, frame_additionals, data_dir, box_color=box_color)
build_card(row, frame_additionals, project_file_structure, box_color=box_color)


def prediction_grid_classifications(
data_dir: Path, model_predictions: DataFrame[ClassificationPredictionMatchSchemaWithClassNames]
project_file_structure: ProjectFileStructure,
model_predictions: DataFrame[ClassificationPredictionMatchSchemaWithClassNames],
):
df = model_predictions
selected_metric = get_state().predictions.metric_datas_classification.selected_prediction or ""
Expand All @@ -162,4 +163,4 @@ def prediction_grid_classifications(
divider()
cols = list(st.columns(n_cols))
with cols.pop(0):
build_card_classifications(row, data_dir)
build_card_classifications(row, project_file_structure)
2 changes: 1 addition & 1 deletion src/encord_active/app/common/components/similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def show_similarities(identifier: str, expander: DeltaGenerator, embedding_infor
if embedding_information.has_annotations:
load_image = show_image_and_draw_polygons

image = load_image(nearest_image["key"], get_state().project_paths.data)
image = load_image(nearest_image["key"], get_state().project_paths)

st_columns[column_id].image(image)
st_columns[column_id].write(f"Annotated as `{nearest_image['name']}`")
Expand Down
6 changes: 3 additions & 3 deletions src/encord_active/app/data_quality/sub_pages/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ def build_card(
"""
Builds each sub card (the content displayed for each row in a csv file).
"""
data_dir = get_state().project_paths.data

identifier_parts = 4 if embedding_information.has_annotations else 3
identifier = "_".join(str(row["identifier"]).split("_")[:identifier_parts])

Expand All @@ -271,7 +269,9 @@ def build_card(
st.write(f"{embedding_information.type.value} card type is not defined in EmbeddingTypes")
return

image = show_image_and_draw_polygons(row, data_dir, draw_configurations=get_state().object_drawing_configurations)
image = show_image_and_draw_polygons(
row, get_state().project_paths, draw_configurations=get_state().object_drawing_configurations
)
st.image(image)

# === Write scores and link to editor === #
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _build_classifications(
else:
histogram = get_histogram(fp_df, metric_name)
st.altair_chart(histogram, use_container_width=True)
prediction_grid_classifications(get_state().project_paths.data, model_predictions=fp_df)
prediction_grid_classifications(get_state().project_paths, model_predictions=fp_df)

def build(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def _build_classifications(
else:
histogram = get_histogram(tp_df, metric_name)
st.altair_chart(histogram, use_container_width=True)
prediction_grid_classifications(get_state().project_paths.data, model_predictions=tp_df)
prediction_grid_classifications(get_state().project_paths, model_predictions=tp_df)

def build(
self,
Expand Down
66 changes: 38 additions & 28 deletions src/encord_active/lib/common/image_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union

import cv2
Expand All @@ -16,6 +15,11 @@
from encord_active.lib.db.predictions import BoundingBox
from encord_active.lib.labels.object import ObjectShape
from encord_active.lib.model_predictions.reader import PredictionMatchSchema
from encord_active.lib.project import (
DataUnitStructure,
LabelRowStructure,
ProjectFileStructure,
)


@dataclass
Expand Down Expand Up @@ -96,7 +100,7 @@ def draw_object(
def show_image_with_predictions_and_label(
label: pd.Series,
predictions: DataFrame[PredictionMatchSchema],
data_dir: Path,
project_file_structure: ProjectFileStructure,
draw_configurations: Optional[ObjectDrawingConfigurations] = None,
label_color: Color = Color.RED,
class_colors: Optional[Dict[int, str]] = None,
Expand All @@ -109,12 +113,12 @@ def show_image_with_predictions_and_label(
:param label: The csv row of the false-negative label to display (from a LabelSchema).
:param predictions: All the predictions on the same image with the samme predicted class (from a PredictionSchema).
:param data_dir: The data directory of the project
:param project_file_structure: The directory of the project
:param label_color: The hex color to use when drawing the prediction.
:param class_colors: Dict of [class_id, hex_color] pairs.
"""
class_colors = class_colors or {}
image = load_or_fill_image(label, data_dir)
image = load_or_fill_image(label, project_file_structure)

for _, pred in predictions.iterrows():
color = class_colors.get(pred["class_id"], Color.PURPLE)
Expand All @@ -125,45 +129,49 @@ def show_image_with_predictions_and_label(

def show_image_and_draw_polygons(
row: Union[Series, str],
data_dir: Path,
project_file_structure: ProjectFileStructure,
draw_configurations: Optional[ObjectDrawingConfigurations] = None,
skip_object_hash: bool = False,
) -> np.ndarray:
image = load_or_fill_image(row, data_dir)
image = load_or_fill_image(row, project_file_structure)

if draw_configurations is None:
draw_configurations = ObjectDrawingConfigurations()

if draw_configurations.draw_objects:
img_h, img_w = image.shape[:2]
for color, geometry in get_geometries(row, img_h, img_w, data_dir, skip_object_hash=skip_object_hash):
for color, geometry in get_geometries(
row, img_h, img_w, project_file_structure, skip_object_hash=skip_object_hash
):
image = draw_object_with_background_color(image, geometry, color, draw_configurations)
return image


def load_or_fill_image(row: Union[pd.Series, str], data_dir: Path) -> np.ndarray:
def load_or_fill_image(row: Union[pd.Series, str], project_file_structure: ProjectFileStructure) -> np.ndarray:
"""
Tries to read the infered image path. If not possible, generates a white image
and indicates what the error seemd to be embedded in the image.
Tries to read the inferred image path. If not possible, generates a white image
and indicates what the error seemed to be embedded in the image.
:param row: A csv row from either a metric, a prediction, or a label csv file.
:return: Numpy / cv2 image.
"""
key = __get_key(row)

img_pth: Optional[Path] = key_to_image_path(key, data_dir)
img_du: Optional[DataUnitStructure] = key_to_data_unit(key, project_file_structure)

if img_pth and img_pth.is_file():
if img_du and img_du.path.is_file():
try:
image = cv2.imread(img_pth.as_posix())
image = cv2.imread(img_du.path.as_posix())
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
except Exception:
pass

# Read not successful, so tell the user why
error_text = "Image not found" if not img_pth else "File seems broken"
error_text = "Image not found" if not img_du else "File seems broken"

_, du_hash, *_ = key.split("_")
lr = json.loads(key_to_lr_path(key, data_dir).read_text(encoding="utf-8"))
# lr = json.loads(key_to_lr_path(key, project_file_structure).read_text(encoding="utf-8"))
label_row_structure = key_to_label_row_structure(key, project_file_structure)
lr = json.loads(label_row_structure.label_row_file.read_text())

h, w = get_du_size(lr["data_units"].get(du_hash, {}), None) or (600, 900)

Expand Down Expand Up @@ -235,7 +243,11 @@ def __to_absolute_points(bounding_box: BoundingBox, height: int, width: int):


def get_geometries(
row: Union[pd.Series, str], img_h: int, img_w: int, data_dir: Path, skip_object_hash: bool = False
row: Union[pd.Series, str],
img_h: int,
img_w: int,
project_file_structure: ProjectFileStructure,
skip_object_hash: bool = False,
) -> List[Tuple[str, np.ndarray]]:
"""
Loads cached label row and computes geometries from the label row.
Expand All @@ -247,10 +259,8 @@ def get_geometries(
key = __get_key(row)
_, du_hash, frame, *remainder = key.split("_")

lr_pth = key_to_lr_path(key, data_dir)
with lr_pth.open("r") as f:
label_row = json.load(f)

label_row_structure = key_to_label_row_structure(key, project_file_structure)
label_row = json.loads(label_row_structure.label_row_file.read_text())
du = label_row["data_units"][du_hash]

geometries = []
Expand All @@ -277,22 +287,22 @@ def get_geometries(
return valid_geometries


def key_to_lr_path(key: str, data_dir: Path) -> Path:
def key_to_label_row_structure(key: str, project_file_structure: ProjectFileStructure) -> LabelRowStructure:
label_hash, *_ = key.split("_")
return data_dir / label_hash / "label_row.json"
return project_file_structure.label_row_structure(label_hash)


def key_to_image_path(key: str, data_dir: Path) -> Optional[Path]:
def key_to_data_unit(key: str, project_file_structure: ProjectFileStructure) -> Optional[DataUnitStructure]:
"""
Infer image path from the identifier stored in the csv files.
:param key: the row["identifier"] from a csv row
:return: The associated image path if it exists or a path to a placeholder otherwise
"""
label_hash, du_hash, frame, *_ = key.split("_")
img_folder = data_dir / label_hash / "images"
label_row_structure = project_file_structure.label_row_structure(label_hash)

# check if it is a video frame
frame_pth = next(img_folder.glob(f"{du_hash}_{int(frame)}.*"), None)
if frame_pth is not None:
return frame_pth
return next(img_folder.glob(f"{du_hash}.*"), None) # So this is an img_group image
frame_du: Optional[DataUnitStructure] = next(label_row_structure.iter_data_unit(du_hash, int(frame)), None)
if frame_du is not None:
return frame_du
return next(label_row_structure.iter_data_unit(du_hash), None) # So this is an img_group image
10 changes: 5 additions & 5 deletions src/encord_active/lib/dataset/summary_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_median_value_of_2d_array(array: np.ndarray) -> np.ndarray:
return array[item_index[0][0], :]


def get_all_annotation_numbers(project_paths: ProjectFileStructure) -> AnnotationStatistics:
def get_all_annotation_numbers(project_file_structure: ProjectFileStructure) -> AnnotationStatistics:
"""
Returns label statistics for both objects and classifications. Does not count nested
labels, only counts the immediate labels.
Expand All @@ -66,7 +66,7 @@ def get_all_annotation_numbers(project_paths: ProjectFileStructure) -> Annotatio
classification_label_counter = 0
object_label_counter = 0

project_ontology = json.loads((project_paths.ontology).read_text(encoding="utf-8"))
project_ontology = json.loads((project_file_structure.ontology).read_text(encoding="utf-8"))
ontology = OntologyStructure.from_dict(project_ontology)

for object_item in ontology.objects:
Expand All @@ -79,9 +79,9 @@ def get_all_annotation_numbers(project_paths: ProjectFileStructure) -> Annotatio
for option in classification_item.attributes[0].options:
labels.classifications[classification_item.attributes[0].name][option.label] = 0

for label_row in (project_paths.data).iterdir():
if (label_row / "label_row.json").exists():
label_row_meta = json.loads((label_row / "label_row.json").read_text(encoding="utf-8"))
for label_row_structure in project_file_structure.iter_labels():
if label_row_structure.label_row_file.exists():
label_row_meta = json.loads(label_row_structure.label_row_file.read_text(encoding="utf-8"))
if label_row_meta["data_type"] in [DataType.IMAGE.value, DataType.IMG_GROUP.value]:
for data_unit in label_row_meta["data_units"].values():

Expand Down
5 changes: 5 additions & 0 deletions src/encord_active/lib/db/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ def all(self) -> List[Tag]:
Tag(name, scope) for name, scope, in conn.execute(f"SELECT name, scope FROM {TABLE_NAME}").fetchall()
]

@ensure_existence
def create_many(self, tags: List[Tag]):
with DBConnection() as conn:
return conn.executemany(f"INSERT INTO {TABLE_NAME} (name, scope) VALUES(?, ?) ", tags)

@ensure_existence
def create_tag(self, tag: Tag):
stripped = tag.name.strip()
Expand Down
Loading

0 comments on commit f95a504

Please sign in to comment.