diff --git a/pyproject.toml b/pyproject.toml index a425329b..555aef97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,57 +26,58 @@ dependencies = [ "flash-attn", # install this directly via `pip install flash-attn --no-build-isolation` ] -#[project.optional-dependencies] -#linting = ["pre-commit"] -#tests = ["pytest", "pytest-cov"] -#install_helper = ["ninja"] -# -#[project.scripts] -#modalities = "modalities.__main__:main" -# -#[build-system] -#requires = ["setuptools >= 61.0.0"] -#build-backend = "setuptools.build_meta" -# -#[tool.black] -#target-version = ["py310"] -#line-length = 120 -# -#[tool.isort] -#profile = "black" -#line_length = 120 -# -#[tool.ruff] -#line-length = 120 -# -#[tool.pytest.ini_options] -#addopts = "--cov=src --cov-report term --cov-report html" -# -#[tool.coverage.run] -#branch = true -#omit = ["*/src/modalities/dataloader/open_gptx_dataset/*"] -# -#[tool.coverage.report] -## Regexes for lines to exclude from consideration -#exclude_also = [ -# # Don't complain about missing debug-only code: -# "def __repr__", -# "if self\\.debug", -# -# # Don't complain if tests don't hit defensive assertion code: -# "raise AssertionError", -# "raise NotImplementedError", -# -# # Don't complain if non-runnable code isn't run: -# "if 0:", -# "if __name__ == .__main__.:", -# -# # Don't complain about abstract methods, they aren't run: -# "@(abc\\.)?abstractmethod", -#] -# -# -#ignore_errors = true -# -#[tool.coverage.html] -#directory = "coverage_html_report" +[project.optional-dependencies] +linting = ["pre-commit"] +tests = ["pytest", "pytest-cov"] +install_helper = ["ninja"] + +[project.scripts] +modalities = "modalities.__main__:main" + +[build-system] +requires = ["setuptools >= 61.0.0"] +build-backend = "setuptools.build_meta" + +[tool.black] +target-version = ["py310"] +line-length = 120 + +[tool.isort] +profile = "black" +line_length = 120 +src_paths = ["src", "tests"] + +[tool.ruff] +line-length = 120 + +[tool.pytest.ini_options] +addopts = "--cov=src --cov-report term --cov-report html" + +[tool.coverage.run] +branch = true +omit = ["*/src/modalities/dataloader/open_gptx_dataset/*"] + +[tool.coverage.report] +# Regexes for lines to exclude from consideration +exclude_also = [ + # Don't complain about missing debug-only code: + "def __repr__", + "if self\\.debug", + + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain if non-runnable code isn't run: + "if 0:", + "if __name__ == .__main__.:", + + # Don't complain about abstract methods, they aren't run: + "@(abc\\.)?abstractmethod", +] + + +ignore_errors = true + +[tool.coverage.html] +directory = "coverage_html_report" \ No newline at end of file diff --git a/src/modalities/logging_broker/subscriber_impl/results_subscriber.py b/src/modalities/logging_broker/subscriber_impl/results_subscriber.py index 74f5797a..2de648fe 100644 --- a/src/modalities/logging_broker/subscriber_impl/results_subscriber.py +++ b/src/modalities/logging_broker/subscriber_impl/results_subscriber.py @@ -1,10 +1,10 @@ from pathlib import Path import rich +import wandb from rich.console import Group from rich.panel import Panel -import wandb from modalities.batch import EvaluationResultBatch from modalities.config.config import WandbMode from modalities.logging_broker.messages import Message diff --git a/src/modalities/tokenization/tokenizer_wrapper.py b/src/modalities/tokenization/tokenizer_wrapper.py index 2acadb94..1e413a92 100644 --- a/src/modalities/tokenization/tokenizer_wrapper.py +++ b/src/modalities/tokenization/tokenizer_wrapper.py @@ -14,11 +14,13 @@ def decode(self, input_ids: List[int]) -> str: @property def vocab_size(self) -> int: - raise NotImplementedError + raise NotImplementedError("Tokenizer must be implemented by a subclass.") def get_token_id(self, token: str) -> int: raise NotImplementedError + def get_token_id(self, token: str) -> int: + raise NotImplementedError class PreTrainedHFTokenizer(TokenizerWrapper): def __init__(