Skip to content

Commit

Permalink
Fix (frame extraction): Use fps in ffmpeg command (#665)
Browse files Browse the repository at this point in the history
* fix: remove forbidden chars in file name

* fix:styling

* fix: skip non existing frames

* fix: add fps info to ffmpeg command

* fix: raise error instead of dkipping
  • Loading branch information
Gorkem-Encord authored Oct 10, 2023
1 parent cb09648 commit d391dd6
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/encord_active/db/scripts/migrate_disk_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"Random Values on Objects": "metric_random",
"Image-level Annotation Quality": "metric_annotation_quality",
"Shape outlier detection": "metric_label_shape_outlier",
"Detect Occlusion in Video": "$SKIP"
"Detect Occlusion in Video": "$SKIP",
}

# Metrics that need to be migrated to the normalised format from percentage for consistency with other metrics.
Expand Down
21 changes: 17 additions & 4 deletions src/encord_active/lib/common/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@
_EXTRACT_FRAMES_FOLDER: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory()


def extract_frames(video_file_name: Path, img_dir: Path, data_hash: str, symlink_folder: bool = True) -> None:
def extract_frames(
video_file_name: Path,
img_dir: Path,
data_hash: str,
frame_rate: Optional[float] = None,
symlink_folder: bool = True,
) -> None:
if data_hash not in _EXTRACT_FRAMES_CACHE:
while True:
try:
Expand All @@ -34,7 +40,7 @@ def extract_frames(video_file_name: Path, img_dir: Path, data_hash: str, symlink
pass

try:
_extract_frames(video_file_name, tempdir, data_hash)
_extract_frames(video_file_name, tempdir, data_hash, frame_rate)
except Exception:
shutil.rmtree(tempdir, ignore_errors=True)
raise
Expand All @@ -50,10 +56,17 @@ def extract_frames(video_file_name: Path, img_dir: Path, data_hash: str, symlink
(img_dir / frame.name).symlink_to(frame, target_is_directory=False)


def _extract_frames(video_file_name: Path, img_dir: Path, data_hash: str) -> None:
def _extract_frames(video_file_name: Path, img_dir: Path, data_hash: str, frame_rate: Optional[float]) -> None:
# DENIS: for the rest to work, I will need to throw if the current directory exists and give a nice user warning.
img_dir.mkdir(parents=True, exist_ok=True)
command = f'ffmpeg -i "{video_file_name}" -start_number 0 {img_dir}/{data_hash}_%d.png -hide_banner'

if frame_rate:
command = (
f'ffmpeg -i "{video_file_name}" -r {frame_rate} -start_number 0 {img_dir}/{data_hash}_%d.png -hide_banner'
)
else:
command = f'ffmpeg -i "{video_file_name}" -start_number 0 {img_dir}/{data_hash}_%d.png -hide_banner'

if subprocess.run(command, shell=True, capture_output=True, stdout=None, check=False).returncode != 0:
raise RuntimeError(
"Failed to split the video into multiple image files. Please ensure that you have FFMPEG "
Expand Down
6 changes: 4 additions & 2 deletions src/encord_active/lib/common/iterator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
import tempfile
from abc import abstractmethod
from collections.abc import Sized
Expand Down Expand Up @@ -130,14 +131,15 @@ def iterate(self, desc: str = "") -> Generator[Tuple[dict, Optional[Image.Image]
# Create temporary folder containing the video
with tempfile.TemporaryDirectory() as working_dir:
working_path = Path(working_dir)
video_path = working_path / str(data_unit["data_title"])
safe_data_title = re.sub(r'[\\/:*?"<>|\x00-\x1F\x7F ]', "_", data_unit["data_title"])
video_path = working_path / safe_data_title
video_images_dir = working_path / "images"
download_file(
video_metadata.signed_url,
project_dir=self.project_file_structure.project_dir,
destination=video_path,
)
extract_frames(video_path, video_images_dir, self.du_hash)
extract_frames(video_path, video_images_dir, self.du_hash, data_unit["data_fps"])

fake_data_unit = deepcopy(data_unit)

Expand Down
2 changes: 1 addition & 1 deletion src/encord_active/lib/project/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def split_lr_video(label_row: LabelRow, project_file_structure: ProjectFileStruc
num_frames = count_frames(video_path)
frames_per_second = get_frames_per_second(video_path)
video_images = Path(video_dir) / "images"
extract_frames(video_path, video_images, data_hash)
extract_frames(video_path, video_images, data_hash, frames_per_second)
image_path = next(video_images.iterdir())
image = Image.open(image_path)

Expand Down
8 changes: 6 additions & 2 deletions src/encord_active/lib/project/project_file_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,9 @@ def iter_data_unit_with_image(
download_file(
data_unit_struct.signed_url, project_dir=self._project.project_dir, destination=video_file
)
extract_frames(video_file, images_dir, data_unit_struct.du_hash)
extract_frames(
video_file, images_dir, data_unit_struct.du_hash, data_unit_struct.frames_per_second
)
downloaded_image = next(images_dir.glob(f"{data_unit_struct.du_hash}_{data_unit_struct.frame}.*"))
yield data_unit_struct, Image.open(downloaded_image)
else:
Expand Down Expand Up @@ -284,7 +286,9 @@ def iter_data_unit_with_image_or_signed_url(
else:
video_file = video_dir / label_row_json["data_title"]
download_file(data_unit_struct.signed_url, project_dir=self._project, destination=video_file)
extract_frames(video_file, images_dir, data_unit_struct.du_hash)
extract_frames(
video_file, images_dir, data_unit_struct.du_hash, data_unit_struct.frames_per_second
)
downloaded_image = next(images_dir.glob(f"{data_unit_struct.du_hash}_{data_unit_struct.frame}.*"))
yield data_unit_struct, Image.open(downloaded_image)
else:
Expand Down

0 comments on commit d391dd6

Please sign in to comment.