Initial commit

1ssb · Aug 23, 2023 · a435969 · a435969
commit a435969
Show file tree

Hide file tree

Showing 11 changed files with 501 additions and 0 deletions.
diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Setup Pages
+        uses: actions/configure-pages@v3
+      - name: Build with Jekyll
+        uses: actions/jekyll-build-pages@v1
+        with:
+          source: ./
+          destination: ./_site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v2
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
diff --git a/Filter_Code_Documentation.pdf b/Filter_Code_Documentation.pdf
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Subhransu Bhattacharjee
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,17 @@
+# Epic Filters
+
+Epic Filters is a project focused on providing data filtering solutions for the EPIC Kitchens Dataset, specifically designed for Neural Radiance Fields (NeRF) training. This repository contains a collection of Python scripts that implement various filters to preprocess and enhance the dataset.
+
+Minimal dataset for NeRF training coming soon.
+
+## Project page: https://1ssb.github.io/epic-filters/
+
+## Cite as
+
+@misc{epic-filters,
+title={Epic Kitchen Filters},
+author={Subhransu S. Bhattacharjee},
+year={2023},
+howpublished= {https://github.com/1ssb/epic-filters/}
+}
+
diff --git a/filters/dark-filter.py b/filters/dark-filter.py
@@ -0,0 +1,18 @@
+import os
+from PIL import Image
+import numpy as np
+
+def remove_dark_images(directory, threshold):
+    for filename in os.listdir(directory):
+        if filename.endswith(".jpg") or filename.endswith(".png"):
+            file_path = os.path.join(directory, filename)
+            image = Image.open(file_path)
+            image_data = np.asarray(image)
+            avg_pixel_value = np.mean(image_data)
+            if avg_pixel_value < threshold:
+                os.remove(file_path)
+                print(f"Removed {filename}")
+
+# Example usage
+remove_dark_images("./images/", 50)
+
diff --git a/filters/fit.py b/filters/fit.py
@@ -0,0 +1,28 @@
+import os
+from PIL import Image
+
+def resize_and_center_crop_images(directory, size):
+    for filename in os.listdir(directory):
+        if filename.endswith(".jpg") or filename.endswith(".png"):
+            file_path = os.path.join(directory, filename)
+            image = Image.open(file_path)
+            width, height = image.size
+            if width > height:
+                left = (width - height) / 2
+                top = 0
+                right = (width + height) / 2
+                bottom = height
+            else:
+                left = 0
+                top = (height - width) / 2
+                right = width
+                bottom = (height + width) / 2
+            image = image.crop((left, top, right, bottom))
+            image = image.resize(size, Image.LANCZOS)
+            image.save(file_path)
+            print(f"Resized and center cropped {filename}")
+
+# Example usage
+s = 256
+resize_and_center_crop_images("./images/", (s, s))
+
diff --git a/filters/hand-filter.py b/filters/hand-filter.py
@@ -0,0 +1,82 @@
+from ultralytics import YOLO
+import cv2
+import os
+import shutil
+from tqdm import tqdm
+
+def person_area_ratio(image_path: str, model_path: str = "yolov8s.pt", total_area: float = None):
+    # Load YOLO model
+    model = YOLO(model_path)
+
+    # Predict objects in image
+    results = model(image_path, verbose=False)
+
+    # Find person (object class 0) and get area of bounding box
+    area_ratio = 0
+    for r in results:
+        # Get the Boxes object containing the detection boxes
+        boxes = r.boxes
+
+        # Filter the boxes by class value 0 (person)
+        person_boxes = boxes[boxes.cls == 0]
+
+        # Check if there is any person box
+        if len(person_boxes) > 0:
+            # Get the first person box
+            x1, y1, x2, y2 = person_boxes.data[0][:4]
+
+            # Calculate the area of the bounding box
+            area = (x2 - x1) * (y2 - y1)
+
+            # Calculate the total image area if not provided
+            if total_area is None:
+                image = cv2.imread(image_path)
+                h, w = image.shape[:2]
+                total_area = h * w
+
+            # Calculate the ratio of the bounding box area to the total image area
+            area_ratio = round(float(area / total_area), 2)
+
+            # Break the loop
+            break
+
+    return area_ratio
+
+def main():
+    source_dir = "./images/"
+    destination_dir = "./new-images/"
+
+    # Create destination directory if it doesn't exist
+    if not os.path.exists(destination_dir):
+        os.makedirs(destination_dir)
+
+    # Calculate total image area for first image in source directory
+    total_area = None
+    for filename in os.listdir(source_dir):
+        if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
+            image_path = os.path.join(source_dir, filename)
+            image = cv2.imread(image_path)
+            h, w = image.shape[:2]
+            total_area = h * w
+            break
+
+    # Iterate over images in source directory with tqdm progress bar
+    with tqdm(os.listdir(source_dir), desc="Filtering images", unit="image") as pbar:
+        for filename in pbar:
+            # Check if file is an image
+            if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
+                # Get image path
+                image_path = os.path.join(source_dir, filename)
+
+                # Calculate ratio of bounding box area of detected person to total image area
+                area_ratio = person_area_ratio(image_path, "yolov8s.pt", total_area)
+
+                # Check if ratio is below 0.35
+                if area_ratio < 0.35:
+                    # Copy image to destination directory
+                    shutil.copy(image_path, destination_dir)
+                    pbar.write(f"Copied {filename} to {destination_dir}")
+
+if __name__ == "__main__":
+    main()
+
diff --git a/filters/overlap-filter.py b/filters/overlap-filter.py
@@ -0,0 +1,108 @@
+import os
+import json
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+from tqdm import tqdm
+from shutil import copyfile
+
+def calculate_frustum_overlap(frustum1, frustum2, K):
+    # Calculate the overlap between two camera frustums using the projection method
+    corners1 = np.dot(frustum1, K.T)
+    corners1 = corners1[:, :2] / corners1[:, 2:]
+    min_x1, min_y1 = np.min(corners1, axis=0)
+    max_x1, max_y1 = np.max(corners1, axis=0)
+    corners2 = np.dot(frustum2, K.T)
+    corners2 = corners2[:, :2] / corners2[:, 2:]
+    min_x2, min_y2 = np.min(corners2, axis=0)
+    max_x2, max_y2 = np.max(corners2, axis=0)
+    dx = min(max_x1, max_x2) - max(min_x1, min_x2)
+    dy = min(max_y1, max_y2) - max(min_y1, min_y2)
+    if dx >= 0 and dy >= 0:
+        overlap_area = dx * dy
+        area1 = (max_x1 - min_x1) * (max_y1 - min_y1)
+        overlap_ratio = overlap_area / area1
+        return overlap_ratio
+    else:
+        return 0.0
+
+def select_frames(file_path, overlap_threshold=0.60, target_ratio=1/3):
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+    camera = data['camera']
+    fx, fy, cx, cy = camera['params'][:4]
+    K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+    images = data['images']
+    selected_frames = []
+
+    print("Calculating camera frustums for all frames...")
+    for frame_name in tqdm(images.keys()):
+        pose = images[frame_name]
+        q = pose[:4]
+        t = pose[4:]
+        r = R.from_quat(q)
+        rot_mat = r.as_matrix()
+        trans_vec = np.array(t).reshape(3, 1)
+        transform_mat = np.hstack((rot_mat, trans_vec))
+        transform_mat = np.vstack((transform_mat, [0, 0, 0, 1]))
+        transform_mat = np.linalg.inv(transform_mat)
+        transform_mat[1,:] *= -1
+        transform_mat[:, 1] *= -1
+        # Calculate the camera frustum for the current frame
+        n=0.01 # near plane
+        f=100.0 # far plane
+        t=n/fx*(camera['height']/2-cy)
+        b=-t
+        r_=n/fy*(camera['width']/2-cx)
+        l_=-r_
+        n_corners=np.array([[l_,b,-n],[l_,t,-n],[r_,t,-n],[r_,b,-n]])
+        f_corners=np.array([[l_,b,-f],[l_,t,-f],[r_,t,-f],[r_,b,-f]])
+        corners_cam=np.vstack((n_corners,f_corners))
+        corners_cam_homo=np.hstack((corners_cam,np.ones((corners_cam.shape[0], 1))))
+        corners_world_homo=np.dot(corners_cam_homo , transform_mat.T)
+        selected_frames.append((frame_name,corners_world_homo))
+
+    target_num_frames=int(len(selected_frames)*target_ratio)
+
+    print("Removing frames with high overlap...")
+    batch_num = 0
+    while len(selected_frames)>target_num_frames:
+      print(f"Processing batch {batch_num}...")
+      overlaps=[]
+      for i in tqdm(range(0,len(selected_frames)-len(selected_frames)%2-1,2)):
+          frame1_frustum=selected_frames[i][1]
+          frame2_frustum=selected_frames[i+1][1]
+          overlap=calculate_frustum_overlap(frame1_frustum[:,:3],frame2_frustum[:,:3],K)
+          overlaps.append(overlap)
+
+      remove_indices=[]
+      for i in range(len(overlaps)):
+          if i+1 < len(overlaps) and overlaps[i]>overlap_threshold:
+              remove_indices.append(2*i+np.argmax([overlaps[i],overlaps[i+1]]))
+
+      for idx in sorted(remove_indices)[::-1]:
+          del selected_frames[idx]
+
+      if len(selected_frames)%2==1:
+          del selected_frames[-1]
+
+      batch_num += 1
+
+    return [frame[0] for frame in selected_frames]
+
+json_data_dir = './JSON_DATA'
+frames_dir = './frames'
+selected_frames_dir = './selected_frames'
+
+for file_name in tqdm(os.listdir(json_data_dir)):
+    file_path = os.path.join(json_data_dir, file_name)
+    folder_name = os.path.splitext(file_name)[0]
+    src_folder_path = os.path.join(frames_dir, folder_name)
+    dst_folder_path = os.path.join(selected_frames_dir, folder_name)
+    if not os.path.exists(dst_folder_path):
+        os.makedirs(dst_folder_path)
+    selected_frames = select_frames(file_path)
+    for frame in tqdm(selected_frames):
+        # Copy the selected frame to the destination folder
+        src_file_path = os.path.join(src_folder_path, frame)
+        dst_file_path = os.path.join(dst_folder_path, frame)
+        copyfile(src_file_path, dst_file_path)
diff --git a/filters/unzipper.py b/filters/unzipper.py
@@ -0,0 +1,60 @@
+import os
+import tarfile
+import zipfile
+import send2trash
+from tqdm import tqdm
+
+def check_files(directory):
+    corrupted = []
+    for filename in tqdm(os.listdir(directory), desc="Checking files"):
+        path = os.path.join(directory, filename)
+        if filename.endswith('.zip'):
+            with zipfile.ZipFile(path, 'r') as zip_ref:
+                if zip_ref.testzip():
+                    corrupted.append(filename)
+        elif filename.endswith(('.tar', '.tar.gz', '.tar.bz2')):
+            try:
+                with tarfile.open(path, 'r'):
+                    pass
+            except tarfile.ReadError:
+                corrupted.append(filename)
+    if corrupted:
+        print(f'Corrupted files: {", ".join(corrupted)}')
+        return False
+    return True
+
+def extract_files(directory):
+    for filename in tqdm(os.listdir(directory), desc="Extracting files"):
+        path = os.path.join(directory, filename)
+        extracted_dir = os.path.splitext(filename)[0]
+        extracted_path = os.path.join(directory, extracted_dir)
+        if filename.endswith('.zip'):
+            with zipfile.ZipFile(path, 'r') as zip_ref:
+                os.makedirs(extracted_path, exist_ok=True)
+                zip_ref.extractall(extracted_path)
+        elif filename.endswith(('.tar', '.tar.gz', '.tar.bz2')):
+            with tarfile.open(path, 'r') as tar_ref:
+                os.makedirs(extracted_path, exist_ok=True)
+                tar_ref.extractall(extracted_path)
+
+def delete_archives(directory):
+    for filename in tqdm(os.listdir(directory), desc="Deleting archives"):
+        if filename.endswith(('.tar', '.tar.gz', '.tar.bz2')):
+            file_path = os.path.join(directory, filename)
+            send2trash.send2trash(file_path)
+
+def process_directories(base_directory):
+    for sub_directory in tqdm(os.listdir(base_directory), desc="Processing directories"):
+        sub_dir_path = os.path.join(base_directory, sub_directory, 'rgb_frames')
+        if os.path.exists(sub_dir_path) and os.path.isdir(sub_dir_path):
+            print(f"Processing {sub_dir_path}")
+            if check_files(sub_dir_path):
+                extract_files(sub_dir_path)
+                delete_archives(sub_dir_path)
+                print(f"Unzipped and Cleaned {sub_dir_path}!")
+        else:
+            print("Could not find the folders correctly")
+
+# Main input directory
+main_directory = '../EPIC-KITCHENS'
+process_directories(main_directory)