diff --git a/create_dataset.py b/create_dataset.py index ec3187a..d63888d 100644 --- a/create_dataset.py +++ b/create_dataset.py @@ -1,6 +1,7 @@ import os from dataclasses import dataclass from enum import Enum +from itertools import chain from multiprocessing import Pool, cpu_count, freeze_support from pathlib import Path from typing import TYPE_CHECKING, Optional @@ -248,9 +249,7 @@ def hrlr_pair(path: Path) -> tuple[Path, Path | None]: s.next("Gathering images...") available_extensions: list[str] = extensions.split(",") s.print(f"Searching extensions: {available_extensions}") - file_list: Generator[Path, None, None] = get_file_list( - *[input_folder / "**" / f"*.{ext}" for ext in available_extensions] - ) + file_list: Generator[Path, None, None] = get_file_list(input_folder, *(f"*.{ext}" for ext in available_extensions)) image_list: list[Path] = [x.relative_to(input_folder) for x in sorted(file_list)] if limit and limit == LimitModes.BEFORE: image_list = image_list[:limit] @@ -260,7 +259,7 @@ def hrlr_pair(path: Path) -> tuple[Path, Path | None]: # * Purge existing images if purge_all: # This could be cleaner - to_delete: set[Path] = set(get_file_list(hr_folder / "**" / "*", lr_folder / "**" / "*")) + to_delete = set(chain(get_file_list(hr_folder, "*"), get_file_list(lr_folder, "*"))) if to_delete: s.next("Purging...") for file in ipbar(to_delete, total=len(to_delete)): diff --git a/src/datafilters/dataset_builder.py b/src/datafilters/dataset_builder.py index 047cc57..22aeca0 100644 --- a/src/datafilters/dataset_builder.py +++ b/src/datafilters/dataset_builder.py @@ -106,7 +106,7 @@ def populate_df( from_full_to_relative: dict[str, Path] = self.get_absolutes(lst) if new_paths := set(from_full_to_relative) - set(self.df.get_column("path")): - self.df = pl.concat((self.df, DataFrame({"path": new_paths})), how="diagonal") + self.df = pl.concat((self.df, DataFrame({"path": list(new_paths)})), how="diagonal") for filter_ in self.filters: filter_.filedict = from_full_to_relative diff --git a/util/file_list.py b/util/file_list.py index 8b451bc..a24fbf4 100644 --- a/util/file_list.py +++ b/util/file_list.py @@ -1,15 +1,14 @@ -from pathlib import Path -from glob import glob -from os import sep from collections.abc import Generator +from os import sep +from pathlib import Path -def get_file_list(*folders: Path) -> Generator[Path, None, None]: +def get_file_list(folder, *patterns: str) -> Generator[Path, None, None]: """ Args folders: One or more folder paths. Returns list[Path]: paths in the specified folders.""" - return (Path(y) for x in (glob(str(p), recursive=True) for p in folders) for y in x) + return (y for pattern in patterns for y in folder.rglob(pattern)) def to_recursive(path: Path, recursive: bool = False, replace_spaces: bool = False) -> Path: