From 94a5277e4c9698b5974ca55c003aec6376e938c1 Mon Sep 17 00:00:00 2001 From: zeptofine Date: Mon, 6 Nov 2023 12:33:37 -0500 Subject: [PATCH] more ruff rules --- imdataset_creator/__main__.py | 14 ++++++------ imdataset_creator/configs/keyworded.py | 4 ++-- imdataset_creator/datarules/base_rules.py | 8 +++---- .../datarules/dataset_builder.py | 21 ++++++++++-------- imdataset_creator/datarules/image_rules.py | 2 +- imdataset_creator/file.py | 6 ++--- imdataset_creator/gui/input_view.py | 2 +- imdataset_creator/gui/main_window.py | 22 +++++++++---------- imdataset_creator/gui/output_view.py | 2 +- imdataset_creator/gui/producer_views.py | 2 +- imdataset_creator/gui/rule_views.py | 2 +- imdataset_creator/gui/settings_inputs.py | 3 ++- imdataset_creator/image_filters/destroyers.py | 2 +- imdataset_creator/image_filters/resizer.py | 2 +- pyproject.toml | 13 ++++++++--- 15 files changed, 57 insertions(+), 48 deletions(-) diff --git a/imdataset_creator/__main__.py b/imdataset_creator/__main__.py index 899fc3e..9d2781a 100644 --- a/imdataset_creator/__main__.py +++ b/imdataset_creator/__main__.py @@ -4,16 +4,16 @@ from multiprocessing import Pool, cpu_count, freeze_support from pathlib import Path from pprint import pformat +from typing import Annotated -import rich.progress as progress import typer from polars import DataFrame, concat from rich import print as rprint +from rich import progress from rich.console import Console from rich.logging import RichHandler from rich.progress import Progress, TaskID from typer import Option -from typing_extensions import Annotated from . import ( ConfigHandler, @@ -32,9 +32,7 @@ @app.command() def main( - config_path: Annotated[Path, Option(help="Where the dataset config is placed")] = Path( - "config.json" - ), + config_path: Annotated[Path, Option(help="Where the dataset config is placed")] = Path("config.json"), database_path: Annotated[Path, Option(help="Where the database is placed")] = Path("filedb.arrow"), threads: Annotated[int, Option(help="multiprocessing threads")] = CPU_COUNT * 3 // 4, chunksize: Annotated[int, Option(help="imap chunksize")] = 5, @@ -120,7 +118,9 @@ def main( print(chunk) old_collected = collected save_timer, collected = db.trigger_save_via_time( - save_timer, collected, interval=population_interval + save_timer, + collected, + interval=population_interval, ) if old_collected is not collected: p.log(f"Saved at {save_timer}") @@ -149,7 +149,7 @@ def main( files: list[File] if db_cfg.rules: filter_t = p.add_task("filtering", total=0) - files = [resolved[file] for file in db.filter(set(resolved)).get_column("path")] + files = [resolved[file] for file in (db.filter(set(resolved)).sort(sort_by).get_column("path"))] p.update(filter_t, total=len(files), completed=len(files)) else: files = list(resolved.values()) diff --git a/imdataset_creator/configs/keyworded.py b/imdataset_creator/configs/keyworded.py index 75e44fb..908ef1b 100644 --- a/imdataset_creator/configs/keyworded.py +++ b/imdataset_creator/configs/keyworded.py @@ -41,7 +41,7 @@ def get_cfg(cls) -> ItemData | SpecialItemData: cfg[key] = val.default.value else: cfg[key] = val.default - if val.annotation is not inspect._empty: + if val.default is not val.empty: annotation = eval(val.annotation, sys.modules[cls.__module__].__dict__) comment = Keyworded._obj_to_comment(annotation) if comment: @@ -51,7 +51,7 @@ def get_cfg(cls) -> ItemData | SpecialItemData: @staticmethod def _obj_to_comment(obj) -> str: if type(obj) is EnumType: - return " | ".join(obj._member_map_.values()) # type: ignore + return " | ".join(obj.__members__.values()) # type: ignore if hasattr(obj, "__metadata__"): return str(obj.__metadata__[0]) return "" diff --git a/imdataset_creator/datarules/base_rules.py b/imdataset_creator/datarules/base_rules.py index a42fd0d..611aac9 100644 --- a/imdataset_creator/datarules/base_rules.py +++ b/imdataset_creator/datarules/base_rules.py @@ -3,12 +3,12 @@ import textwrap from abc import abstractmethod from collections import defaultdict -from collections.abc import Callable, Generator, Mapping, Sequence +from collections.abc import Callable, Generator, Iterable, Mapping, Sequence from dataclasses import dataclass from pathlib import Path from string import Formatter from types import MappingProxyType -from typing import Any, ClassVar, Iterable +from typing import Any, ClassVar import numpy as np import wcmatch.glob as wglob @@ -180,7 +180,7 @@ def run(self) -> PathGenerator: yield self.folder / file -class InvalidFormatException(Exception): +class InvalidFormatError(Exception): def __init__(self, disallowed: str): super().__init__(f"invalid format string. '{disallowed}' is not allowed.") @@ -189,7 +189,7 @@ class SafeFormatter(Formatter): def get_field(self, field_name: str, args: Sequence[Any], kwargs: Mapping[str, Any]) -> Any: # the goal is to make sure `property`s and indexing is still available, while dunders and things are not if "__" in field_name: - raise InvalidFormatException("__") + raise InvalidFormatError("__") return super().get_field(field_name, args, kwargs) diff --git a/imdataset_creator/datarules/dataset_builder.py b/imdataset_creator/datarules/dataset_builder.py index 1cdc789..18236b9 100644 --- a/imdataset_creator/datarules/dataset_builder.py +++ b/imdataset_creator/datarules/dataset_builder.py @@ -2,10 +2,11 @@ import os import textwrap import warnings -from collections.abc import Collection, Iterable +from collections.abc import Collection, Generator, Iterable from datetime import datetime +from io import BytesIO from pathlib import Path -from typing import Generator, Literal, TypeVar, overload +from typing import BinaryIO, Literal, TypeVar, overload import polars as pl from polars import DataFrame, Expr, LazyFrame @@ -52,9 +53,11 @@ def chunk_split( """ return ( part.drop(col_name) - for _, part in df.with_row_count(col_name) - .with_columns(pl.col(col_name) // chunksize) - .groupby(col_name, maintain_order=True) + for _, part in ( + df.with_row_count(col_name) + .with_columns(pl.col(col_name) // chunksize) + .groupby(col_name, maintain_order=True) + ) ) @@ -133,7 +136,7 @@ def fill_from_config(self, cfg: dict[str, dict], no_warn=False): for kwd, dct in cfg.items(): if kwd in self.unready_rules: rule = self.unready_rules.pop(kwd) - sig: inspect.Signature = inspect.signature(rule) + sig = inspect.signature(rule) params = {k: v for k, v in dct.items() if k in sig.parameters and k != "self"} self.add_rule(rule(**params)) @@ -288,9 +291,9 @@ def filter(self, lst) -> DataFrame: # noqa: A003 vdf = matcher(vdf, self.__df) if isinstance(matcher, DataFrameMatcher) else vdf.filter(matcher) return vdf - def save_df(self, pth: str | Path | None = None) -> None: - """saves the dataframe to self.filepath""" - self.__df.write_ipc(pth or self.filepath) + def save_df(self, file: BinaryIO | BytesIO | str | Path | None = None) -> None: + """saves the dataframe to a file""" + self.__df.write_ipc(file if file is not None else self.filepath) def update(self, df: DataFrame, on="path", how: Literal["left", "inner", "outer"] = "left"): self.__df = self.__df.update(df, on=on, how=how) diff --git a/imdataset_creator/datarules/image_rules.py b/imdataset_creator/datarules/image_rules.py index d705ea7..4959b8d 100644 --- a/imdataset_creator/datarules/image_rules.py +++ b/imdataset_creator/datarules/image_rules.py @@ -109,7 +109,7 @@ class ChannelRule(Rule): def __init__(self, min_channels=1, max_channels=4) -> None: super().__init__() self.requires = DataColumn("channels", int) - self.matcher = ExprMatcher((min_channels <= col("channels")) & (col("channels") <= max_channels)) + self.matcher = ExprMatcher(min_channels <= col("channels"), col("channels") <= max_channels) def get_size(pth): diff --git a/imdataset_creator/file.py b/imdataset_creator/file.py index 515e87d..228b4c7 100644 --- a/imdataset_creator/file.py +++ b/imdataset_creator/file.py @@ -1,4 +1,3 @@ -import os from dataclasses import dataclass from pathlib import Path @@ -17,10 +16,9 @@ def __format__(self, format_spec): else: raise ValueError(f"Unknown format specifier: {key}") elif fmt == "underscores": - newfmt = MalleablePath("_".join(self.split(" "))) + newfmt = MalleablePath("_".join(newfmt.split(" "))) elif fmt == "underscore_path": - newfmt = MalleablePath("_".join(Path(self).parts)) - + newfmt = MalleablePath("_".join(Path(newfmt).parts)) return str(newfmt) diff --git a/imdataset_creator/gui/input_view.py b/imdataset_creator/gui/input_view.py index 07c1423..1403340 100644 --- a/imdataset_creator/gui/input_view.py +++ b/imdataset_creator/gui/input_view.py @@ -34,7 +34,7 @@ ) -def InputList(parent=None): +def input_list(parent=None): return ProceduralConfigList(InputView_, parent=parent).label("Inputs") diff --git a/imdataset_creator/gui/main_window.py b/imdataset_creator/gui/main_window.py index bc4bdd3..497e492 100644 --- a/imdataset_creator/gui/main_window.py +++ b/imdataset_creator/gui/main_window.py @@ -27,10 +27,10 @@ from ..configs import MainConfig from ..datarules import chunk_split from .err_dialog import catch_errors -from .input_view import InputList -from .output_view import OutputList -from .producer_views import ProducerList -from .rule_views import RuleList +from .input_view import input_list +from .output_view import output_list +from .producer_views import producer_list +from .rule_views import rule_list log = logging.getLogger() @@ -70,13 +70,13 @@ def __init__(self, parent, cfg_path=Path("config.json")): # self.set_builder_button = QPushButton("Create builder", self) # self.set_builder_button.clicked.connect(self.set_builder) - self.input_list = InputList(self) + self.input_list = input_list(self) # self.run_all_inputs_button = QPushButton("Gather all inputs", self) # self.run_all_inputs_button.clicked.connect(self.input_list.gather_all) - self.producer_list = ProducerList(self) + self.producer_list = producer_list(self) - self.rule_list = RuleList(self) + self.rule_list = rule_list(self) self.producers_rules = QSplitter(self) self.producers_rules.addWidget(self.producer_list) @@ -95,7 +95,7 @@ def __init__(self, parent, cfg_path=Path("config.json")): # self.populator_thread.population_chunksize = 100 # self.run_population_button.clicked.connect(self.run_population) - self.output_list = OutputList(self) + self.output_list = output_list(self) self.lists = QSplitter(self) self.lists.addWidget(self.input_list) @@ -192,7 +192,7 @@ def open_config(self, s: str = ""): )[0] ) if file: - log.info(f"Opening {file}") + log.info("Opening", file) self.cfg_path = Path(file) self.load_config() @@ -250,7 +250,7 @@ def create_builder(self) -> DatasetBuilder: builder.add_producers(*producers) builder.add_rules(*rules) - log.info(f"built builder: {builder}") + log.info("built builder", builder) return builder @property @@ -299,7 +299,7 @@ class PopulatorThread(QThread): def run(self): log.info("started") if finished := self.db.remove_finished_producers(): - log.warning(f"Skipping finished producers: {finished}") + log.warning("Skipping finished producers", finished) collected: list[DataFrame] = [] save_timer: datetime = datetime.now() chunk: DataFrame diff --git a/imdataset_creator/gui/output_view.py b/imdataset_creator/gui/output_view.py index ee916c4..2256a8b 100644 --- a/imdataset_creator/gui/output_view.py +++ b/imdataset_creator/gui/output_view.py @@ -185,5 +185,5 @@ def div_100(val): ) -def OutputList(parent=None): +def output_list(parent=None): return ProceduralConfigList(OutputView_, parent=parent) diff --git a/imdataset_creator/gui/producer_views.py b/imdataset_creator/gui/producer_views.py index 41dcbe2..677c6a2 100644 --- a/imdataset_creator/gui/producer_views.py +++ b/imdataset_creator/gui/producer_views.py @@ -34,7 +34,7 @@ ) -def ProducerList(parent=None) -> ProceduralConfigList: +def producer_list(parent=None) -> ProceduralConfigList: return ProceduralConfigList( FileInfoProducerView, ImShapeProducerView, diff --git a/imdataset_creator/gui/rule_views.py b/imdataset_creator/gui/rule_views.py index 68e3435..5234b0d 100644 --- a/imdataset_creator/gui/rule_views.py +++ b/imdataset_creator/gui/rule_views.py @@ -121,7 +121,7 @@ ) -def RuleList(parent=None): +def rule_list(parent=None): return ProceduralConfigList( StatRuleView_, BlacklistWhitelistView_, diff --git a/imdataset_creator/gui/settings_inputs.py b/imdataset_creator/gui/settings_inputs.py index 3b29832..24d3702 100644 --- a/imdataset_creator/gui/settings_inputs.py +++ b/imdataset_creator/gui/settings_inputs.py @@ -1,9 +1,10 @@ # from __future__ import annotations from abc import ABC, abstractmethod +from collections.abc import Callable from enum import Enum from pathlib import Path -from typing import Any, Callable +from typing import Any from PySide6.QtCore import QDate, QDateTime, QRect, QSize, Qt, QTime, Signal, Slot from PySide6.QtGui import QAction, QIcon, QMouseEvent diff --git a/imdataset_creator/image_filters/destroyers.py b/imdataset_creator/image_filters/destroyers.py index 4f79689..b0d430b 100644 --- a/imdataset_creator/image_filters/destroyers.py +++ b/imdataset_creator/image_filters/destroyers.py @@ -235,7 +235,7 @@ def run(self, img: ndarray): return newimg.reshape((height, width, 3)) except subprocess.TimeoutExpired as e: compressor.send_signal("SIGINT") - log.warning(f"{e}") + log.warning(e) return img diff --git a/imdataset_creator/image_filters/resizer.py b/imdataset_creator/image_filters/resizer.py index a4823b8..1f8acb9 100644 --- a/imdataset_creator/image_filters/resizer.py +++ b/imdataset_creator/image_filters/resizer.py @@ -1,7 +1,7 @@ +from collections.abc import Sequence from dataclasses import dataclass from enum import Enum from random import choice -from typing import Sequence import cv2 import numpy as np diff --git a/pyproject.toml b/pyproject.toml index a641257..95cfd2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ PySide6-Essentials = "^6.5.2" ffmpeg-python = "^0.2.0" imagesize = "^1.4.1" numpy = "^1.26.0" +opencv-python = "^4.8.1.78" polars = "^0.19.3" pyarrow = "^13.0.0" python = "^3.10, <3.12" @@ -22,7 +23,6 @@ python-dateutil = "^2.8.2" rich = "^13.5.3" typer = "^0.9.0" wcmatch = "^8.5" -opencv-python = "^4.8.1.78" [tool.poetry.scripts] imdataset-creator = "imdataset_creator.__main__:app" @@ -33,7 +33,7 @@ build-backend = "poetry.core.masonry.api" requires = ["poetry-core"] [tool.ruff] -extend-ignore = ["F401", "E501"] +extend-ignore = ["F401", "E501", "B905"] extend-select = [ "A", "ASYNC", @@ -49,13 +49,20 @@ extend-select = [ "RUF", "PIE", "C4", + "E", # pycodestyle + "W", # pycodestyle + "FA", # flake8-future-annotations + "ISC", # flake8-implicit-str-concat + "G", # flake8-logging-format + "Q", # flake8-quotes + "SLF", # flake8-self ] fixable = ["ALL"] line-length = 120 +target-version = "py311" [tool.ruff.per-file-ignores] "__init__.py" = ["E402"] - [tool.isort] profile = "black"