diff --git a/imdataset_creator/file.py b/imdataset_creator/file.py index 4d9a022..ad439b5 100644 --- a/imdataset_creator/file.py +++ b/imdataset_creator/file.py @@ -7,6 +7,7 @@ from string import Formatter from typing import Any, ClassVar +from pathvalidate import sanitize_filename, sanitize_filepath from typing_extensions import SupportsIndex @@ -39,7 +40,7 @@ def get_field(self, field_name: str, _: Sequence[Any], kwargs: Mapping[str, Any] return super().get_field(field_name, _, kwargs) -escaped_split = re.compile(r"[^\\],") +escaped_split = re.compile(r"(?[^\?:]+)\?(?P(?:[^:])*):?(?P.*)$") # present?yes:no replacement_fmt = re.compile(r"'(?P[^']+)'='(?P[^']*)'") @@ -83,11 +84,13 @@ def __format__(self, format_spec: str): newpth = "_".join(newpth.split(" ")) elif fmt == "underscore_parts": newpth = "_".join(Path(newpth).parts) + elif fmt == "sanitize": + newpth = sanitize_filename(newpth, platform="auto") elif not patterns_used: raise ValueError(f"Unknown format specifier: {fmt!r}") - return str(newpth) + return newpth def __getitem__(self, __key: SupportsIndex | slice) -> str: return MalleablePath(super().__getitem__(__key)) @@ -117,5 +120,5 @@ def from_src(cls, src: Path, pth: Path): src=MalleablePath(src), relative_path=MalleablePath(pth.relative_to(src).parent), file=MalleablePath(pth.stem), - ext=pth.suffix[1:], + ext=pth.suffix[1:], # Can't think of a MP usecase here. If anyone does, I'm happy to change this )