more ruff rules

zeptofine · Aug 25, 2023 · b4c86ad · b4c86ad
1 parent 86b3724
commit b4c86ad
Show file tree

Hide file tree

Showing 8 changed files with 42 additions and 24 deletions.
diff --git a/.ruff.toml b/.ruff.toml
@@ -1 +1,4 @@
 line-length = 120
+
+extend-select = ["I", "F", "RET", "SIM", "PTH", "TRY", "NPY", "PERF", "RUF"]
+fixable = ["ALL"]
diff --git a/create_dataset.py b/create_dataset.py
@@ -63,11 +63,11 @@ def fileparse(dfile: Scenario) -> Scenario:
     # Save the HR / LR version of the image
     # TODO: Create a dynamic input / output system so this could be replaced with a list ofoutputs with actions
 
-    if not os.path.exists(dfile.hr_path):
+    if not dfile.hr_path.exists():
         cv2.imwrite(str(dfile.hr_path), image)
         os.utime(str(dfile.hr_path), (mtime, mtime))
 
-    if dfile.lr_path is not None and not os.path.exists(dfile.lr_path):
+    if dfile.lr_path is not None and not dfile.lr_path.exists():
         cv2.imwrite(str(dfile.lr_path), cv2.resize(image, (int(image.shape[1] // scale), int(image.shape[0] // scale))))
         os.utime(str(dfile.lr_path), (mtime, mtime))
 
@@ -183,7 +183,7 @@ def check_for_images(image_list: list[Path]) -> bool:
     def recurse(path: Path):
         return to_recursive(path, recursive, convert_spaces)
 
-    if not input_folder or not os.path.exists(input_folder):
+    if not input_folder or not input_folder.exists():
         rprint("Please select a directory.")
         return 1
 
@@ -285,7 +285,7 @@ def hrlr_pair(path: Path) -> tuple[Path, Path | None]:
 
     # * Run filters
     s.next("Using: ")
-    s.print(*[f" - {str(filter_)}" for filter_ in db.filters])
+    s.print(*[f" - {filter_!s}" for filter_ in db.filters])
 
     s.print("Populating df...")
     db.populate_df(

diff --git a/scripts/find_broken_links.py b/scripts/find_broken_links.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+
 from tqdm import tqdm
 
 src = "/mnt/Toshiba/.Grabber/"
@@ -15,4 +16,5 @@
 response = input("These links lead to empty files. Delete them? y/N:")
 if response.lower().startswith("y"):
     print("Deleting...")
-    [i.unlink() for i in lst]
+    for i in lst:
+        i.unlink()
diff --git a/src/datafilters/base_filters.py b/src/datafilters/base_filters.py
@@ -76,7 +76,7 @@ def get_cfg(cls) -> dict:
     def _obj_to_comment(obj) -> str:
         if type(obj) is EnumType:
             return " | ".join(obj._member_map_.values())  # type: ignore
-        elif hasattr(obj, "__metadata__"):
+        if hasattr(obj, "__metadata__"):
             return str(obj.__metadata__[0])
 
         return ""

diff --git a/src/datafilters/data_filters.py b/src/datafilters/data_filters.py
@@ -35,11 +35,11 @@ def __init__(
         if after is not None:
             self.after = timeparser.parse(after)
         if self.before is not None and self.after is not None and self.after > self.before:
-            raise timeparser.ParserError(f"{self.after} is older than {self.before}")
+            raise self.AgeError(self.after, self.before)
 
     @staticmethod
     def get_modified_time(path: str) -> datetime:
-        return datetime.fromtimestamp(os.stat(path).st_mtime)
+        return datetime.fromtimestamp(os.stat(path).st_mtime)  # noqa: PTH116
 
     def fast_comp(self) -> Expr | bool:
         param: Expr | bool = True
@@ -49,6 +49,10 @@ def fast_comp(self) -> Expr | bool:
             param &= self.before > col("modifiedtime")
         return param
 
+    class AgeError(timeparser.ParserError):
+        def __init__(self, older, newer):
+            super().__init__(f"{older} is older than {newer}")
+
 
 class BlacknWhitelistFilter(DataFilter, FastComparable):
     config_keyword = "blackwhitelists"

diff --git a/src/datafilters/dataset_builder.py b/src/datafilters/dataset_builder.py
@@ -38,7 +38,7 @@ def __init__(self, origin: str, db_path: Path) -> None:
         self.filter_type_schema: dict[str, pl.DataType | type] = self.basic_schema.copy()
         self.columns: dict[str, Column] = {}
 
-        if os.path.exists(self.filepath):
+        if self.filepath.exists():
             self.df: DataFrame = pl.read_ipc(self.filepath, use_pyarrow=True)
         else:
             self.df: DataFrame = DataFrame(schema=self.basic_schema)
@@ -59,11 +59,11 @@ def add_filter(self, filter_: type[DataFilter] | DataFilter):
                 for column in filter_.schema:
                     self.add_schema(column)
         else:
-            raise TypeError(f"{filter_} is not a filter.")
+            raise self.NotFilterError(filter_)
 
     def fill_from_config(self, cfg: dict[str, dict], no_warn=False):
         if not len(self.unready_filters):
-            raise KeyError("Unready filters is empty")
+            raise self.UnreadyFilterError()
 
         for kwd, dct in cfg.items():
             if kwd in self.unready_filters:
@@ -177,9 +177,7 @@ def filter(self, lst, sort_col="path", ignore_missing_columns=False) -> Iterable
             warnings.warn(f"{len(self.unready_filters)} filters are not initialized and will not be populated")
 
         if (missing_requirements := set(self.columns) - set(self.build_schema)) and not ignore_missing_columns:
-            raise ValueError(
-                f"the following columns are required but may not be in the database: {missing_requirements}"
-            )
+            raise self.MissingRequirementError(missing_requirements)
 
         from_full_to_relative: dict[str, Path] = self.get_absolutes(lst)
         paths: set[str] = set(from_full_to_relative.keys())
@@ -228,6 +226,18 @@ def _split_into_chunks(df: DataFrame, chunksize: int, column="_idx"):
             .groupby(column, maintain_order=True)
         )
 
+    class NotFilterError(TypeError):
+        def __init__(self, obj: object):
+            super().__init__(f"{obj} is not a valid filter")
+
+    class UnreadyFilterError(KeyError):
+        def __init__(self):
+            super().__init__("Unready filters is empty")
+
+    class MissingRequirementError(KeyError):
+        def __init__(self, reqs: Iterable[str]):
+            super().__init__(f"Possibly missing columns: {reqs}")
+
     def __enter__(self, *args, **kwargs):
         self.__init__(*args, **kwargs)
         return self

diff --git a/src/datafilters/external_filters.py b/src/datafilters/external_filters.py
@@ -71,6 +71,10 @@ def fast_comp(self) -> Expr | bool:
         return (self.min_channels <= col("channels")) & (col("channels") <= self.max_channels)
 
 
+def get_size(pth):
+    return os.stat(pth).st_size  # noqa: PTH116
+
+
 _HASHERS: dict[str, Callable] = {
     "average": imagehash.average_hash,
     "crop_resistant": imagehash.crop_resistant_hash,
@@ -136,10 +140,7 @@ def __init__(
                 Column(self, "modifiedtime", datetime),
             )
         if get_optional_cols or resolver == RESOLVERS.SIZE:
-            self.schema = (
-                *self.schema,
-                Column(self, "size", int, col("path").apply(lambda p: os.stat(str(p)).st_size)),
-            )
+            self.schema = (*self.schema, Column(self, "size", int, col("path").apply(get_size)))
         self.hasher: Callable[[Image.Image], imagehash.ImageHash] = _HASHERS[hasher]
         self.resolver: Expr | bool = _RESOLVERS[resolver]
 
@@ -153,8 +154,7 @@ def compare(self, lst: Collection, cols: DataFrame) -> set:
             .apply(lambda df: df.filter(self.resolver) if len(df) > 1 else df)  # type: ignore
         )
 
-        resolved_paths = set(applied.get_column("path"))
-        return resolved_paths
+        return set(applied.get_column("path"))
 
     def apply_resolver(self, df: DataFrame):
         return df.filter(self.resolver)

diff --git a/util/print_funcs.py b/util/print_funcs.py
@@ -1,5 +1,5 @@
 import time
-from collections.abc import Iterable, Generator
+from collections.abc import Generator, Iterable
 from os import get_terminal_size
 from typing import Callable, TypeVar
 
@@ -19,8 +19,7 @@ def byte_format(size, leading: int = 3, trailing: int = 4, suffix="B") -> str:
                 return f"{size:{leading + trailing + 1}.{trailing}f}{unit}{suffix}"
             size /= 2**10
         return f"{size:3.1f}{unit}{suffix}"
-    else:
-        return f"N/A{suffix}"
+    return f"N/A{suffix}"
 
 
 def pbar(iteration: int, total: int, length=20, fill="#", nullp="-", corner="[]", pref="", suff="") -> str:
@@ -54,7 +53,7 @@ def ipbar(
         if newtime - _time > refresh_interval:  # refresh interval
             output = isbar(i + 1, total, **kwargs)
             if print_item:
-                output += f" {str(obj)}"
+                output += f" {obj!s}"
             print(f"\033[K{output}", end=end)
             _time = newtime
     print(isbar(total, total, **kwargs), end="\033[2K\r" if clear else very_end)