Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Videolab #243

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ coverage.xml
# Misc
results/
image_files*
data/
frames

# datasets
cifar*
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ pytorch = ["torchvision>=0.12.0"]
azure = ["adlfs>=2022.2.0"] # latest compatible with Python 3.7
gcs = ["gcsfs>=2022.1.0"] # latest compatible with Python 3.7
s3 = ["s3fs>=2023.1.0"] # latest compatible with Python 3.7
video = ["av>=10.0.0"]

all = ["cleanvision[huggingface,pytorch,azure,gcs,s3]"]
all = ["cleanvision[huggingface,pytorch,azure,gcs,s3,video]"]

[project.urls]
"Source" = "https://github.com/cleanlab/cleanvision"
"Bug Tracker" = "https://github.com/cleanlab/cleanvision/issues"
"Documentation" = "https://cleanvision.readthedocs.io/"

3 changes: 3 additions & 0 deletions src/cleanvision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import sys

from cleanvision.imagelab import Imagelab as _Imagelab
from cleanvision.videolab import Videolab as _Videolab

PYTHON_VERSION_INFO = sys.version_info

Expand All @@ -21,3 +23,4 @@ def get_version() -> str:
pass

Imagelab = _Imagelab
Videolab = _Videolab
59 changes: 59 additions & 0 deletions src/cleanvision/dataset/video_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from cleanvision.dataset.base_dataset import Dataset
from pathlib import Path
from typing import Generator, Iterator, List, Optional, Union
from cleanvision.utils.constants import VIDEO_FILE_EXTENSIONS


class VideoDataset(Dataset):
"""Wrapper class to handle video datasets."""

def __init__(
self,
data_folder: Optional[str] = None,
filepaths: Optional[List[str]] = None,
) -> None:
"""Determine video dataset source and populate index."""
# check if data folder is given
if data_folder:

Check warning on line 17 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L17

Added line #L17 was not covered by tests
# get filepaths from video dataset directory
self._filepaths = [
str(path) for path in self.__get_filepaths(Path(data_folder))
]

else:
# store user supplied video file paths
# todo: raise an exception if assert fails
assert filepaths is not None
self._filepaths = filepaths

Check warning on line 27 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L27

Added line #L27 was not covered by tests

# create index
self._set_index()
self.frames_dir = Path.cwd() / "frames"

Check warning on line 31 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L30-L31

Added lines #L30 - L31 were not covered by tests

def __len__(self) -> int:
"""Get video dataset file count."""
return len(self.index)

Check warning on line 35 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L35

Added line #L35 was not covered by tests

def __iter__(self) -> Iterator[Union[int, str]]:
"""Defining the iteration behavior."""
return iter(self.index)

Check warning on line 39 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L39

Added line #L39 was not covered by tests

def _set_index(self) -> None:
"""Create internal storage for filepaths."""
self.index = [path for path in self._filepaths]

def __get_filepaths(self, dataset_path: Path) -> Generator[Path, None, None]:
"""Scan file system for video files and grab their file paths."""
# notify user
print(f"Reading videos from {dataset_path}")

Check warning on line 48 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L48

Added line #L48 was not covered by tests

# iterate over video file extensions
for ext in VIDEO_FILE_EXTENSIONS:
# loop through video paths matching ext
yield from dataset_path.glob(f"**/{ext}")

Check warning on line 53 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L53

Added line #L53 was not covered by tests

def __getitem__(self, item: int) -> str:
return self.index[item]

Check warning on line 56 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L56

Added line #L56 was not covered by tests

def set_frames_dir(self, frames_dir: Path):
self.frames_dir = frames_dir

Check warning on line 59 in src/cleanvision/dataset/video_dataset.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/dataset/video_dataset.py#L59

Added line #L59 was not covered by tests
4 changes: 2 additions & 2 deletions src/cleanvision/imagelab.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from cleanvision.utils.base_issue_manager import IssueManager
from cleanvision.utils.constants import (
DEFAULT_ISSUE_TYPES,
DEFAULT_ISSUE_TYPES_IMAGELAB,
DUPLICATE,
DUPLICATE_ISSUE_TYPES_LIST,
IMAGE_PROPERTY,
Expand Down Expand Up @@ -166,7 +166,7 @@ def _set_default_config(self) -> Dict[str, Any]:
@staticmethod
def list_default_issue_types() -> List[str]:
"""Returns a list of the issue types that are run by default in :py:meth:`Imagelab.find_issues`"""
return DEFAULT_ISSUE_TYPES
return DEFAULT_ISSUE_TYPES_IMAGELAB

@staticmethod
def list_possible_issue_types() -> List[str]:
Expand Down
14 changes: 13 additions & 1 deletion src/cleanvision/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"*.WEBP",
] # filetypes supported by PIL

DEFAULT_ISSUE_TYPES = [
DEFAULT_ISSUE_TYPES_IMAGELAB = [
"dark",
"light",
"odd_aspect_ratio",
Expand All @@ -48,3 +48,15 @@
"grayscale",
"odd_size",
]

DEFAULT_ISSUE_TYPES_VIDEOLAB = [
"dark",
"light",
"odd_aspect_ratio",
"low_information",
"blurry",
"grayscale",
"odd_size",
]

VIDEO_FILE_EXTENSIONS: List[str] = ["*.mp4", "*.avi", "*.mkv", "*.mov", "*.webm"]
39 changes: 39 additions & 0 deletions src/cleanvision/utils/frame_sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from importlib import import_module
from pathlib import Path


class FrameSampler:
"""Simplest frame sampling strategy."""

def __init__(self, k: int) -> None:
"""Store frame sample interval k and import PyAV."""
# storing frame sampling interval
self.k = k

Check warning on line 11 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L11

Added line #L11 was not covered by tests

# attempting to import PyAV
try:
self.av = import_module("av")

Check warning on line 15 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L14-L15

Added lines #L14 - L15 were not covered by tests
except ImportError as error:
raise ImportError(
"Cannot import package `av`. "
"Please install it via `pip install av` and then try again."
) from error

def sample(self, video_path: str, output_dir: Path) -> None:
"""Loop through frames and store every k-th frame."""
with self.av.open(video_path) as container:
# get video stream
stream = container.streams.video[0]

Check warning on line 26 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L26

Added line #L26 was not covered by tests

# iterate frames
for frame_indx, frame in enumerate(container.decode(stream)):
# check for k-th frame
if not frame_indx % self.k:
# get PIL image
frame_pil = frame.to_image()

Check warning on line 33 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L33

Added line #L33 was not covered by tests

# use frame timestamp as image file name
image_file_name = str(frame.time) + ".jpg"

Check warning on line 36 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L36

Added line #L36 was not covered by tests

# save to output dir
frame_pil.save(output_dir / image_file_name)

Check warning on line 39 in src/cleanvision/utils/frame_sampler.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/utils/frame_sampler.py#L39

Added line #L39 was not covered by tests
Loading
Loading