Skip to content

Commit

Permalink
fix set bug
Browse files Browse the repository at this point in the history
  • Loading branch information
zeptofine committed Aug 25, 2023
1 parent b4c86ad commit c9ca6e0
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 10 deletions.
7 changes: 3 additions & 4 deletions create_dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from dataclasses import dataclass
from enum import Enum
from itertools import chain
from multiprocessing import Pool, cpu_count, freeze_support
from pathlib import Path
from typing import TYPE_CHECKING, Optional
Expand Down Expand Up @@ -248,9 +249,7 @@ def hrlr_pair(path: Path) -> tuple[Path, Path | None]:
s.next("Gathering images...")
available_extensions: list[str] = extensions.split(",")
s.print(f"Searching extensions: {available_extensions}")
file_list: Generator[Path, None, None] = get_file_list(
*[input_folder / "**" / f"*.{ext}" for ext in available_extensions]
)
file_list: Generator[Path, None, None] = get_file_list(input_folder, *(f"*.{ext}" for ext in available_extensions))
image_list: list[Path] = [x.relative_to(input_folder) for x in sorted(file_list)]
if limit and limit == LimitModes.BEFORE:
image_list = image_list[:limit]
Expand All @@ -260,7 +259,7 @@ def hrlr_pair(path: Path) -> tuple[Path, Path | None]:
# * Purge existing images
if purge_all:
# This could be cleaner
to_delete: set[Path] = set(get_file_list(hr_folder / "**" / "*", lr_folder / "**" / "*"))
to_delete = set(chain(get_file_list(hr_folder, "*"), get_file_list(lr_folder, "*")))
if to_delete:
s.next("Purging...")
for file in ipbar(to_delete, total=len(to_delete)):
Expand Down
2 changes: 1 addition & 1 deletion src/datafilters/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def populate_df(

from_full_to_relative: dict[str, Path] = self.get_absolutes(lst)
if new_paths := set(from_full_to_relative) - set(self.df.get_column("path")):
self.df = pl.concat((self.df, DataFrame({"path": new_paths})), how="diagonal")
self.df = pl.concat((self.df, DataFrame({"path": list(new_paths)})), how="diagonal")

for filter_ in self.filters:
filter_.filedict = from_full_to_relative
Expand Down
9 changes: 4 additions & 5 deletions util/file_list.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from pathlib import Path
from glob import glob
from os import sep
from collections.abc import Generator
from os import sep
from pathlib import Path


def get_file_list(*folders: Path) -> Generator[Path, None, None]:
def get_file_list(folder, *patterns: str) -> Generator[Path, None, None]:
"""
Args folders: One or more folder paths.
Returns list[Path]: paths in the specified folders."""

return (Path(y) for x in (glob(str(p), recursive=True) for p in folders) for y in x)
return (y for pattern in patterns for y in folder.rglob(pattern))


def to_recursive(path: Path, recursive: bool = False, replace_spaces: bool = False) -> Path:
Expand Down

0 comments on commit c9ca6e0

Please sign in to comment.