[ruff] Fix all invalid escape sequences (#49004)

## Why are these changes needed? This PR fixes all invalid escape sequences, and enables the corresponding flake8 rule [`W605`](https://www.flake8rules.com/rules/W605.html). This came up as part of #48921, leading to the wrong conclusion that the mypy hiccup is related to invalid escape sequences. Most likely it isn't (python/mypy#18215 (comment)), but it may still be a good idea to get rid of these to avoid unnecessary warnings and confusion. I couldn't really figure out how flake8 is executed by the CI, because just running `flake8` on the `master` branch actually fails, and running `scripts/format.sh` doesn't seem to do anything (most likely because it only checks the diff, and isn't a full check). So I'm wondering if enabling the `W605` check currently actually has much of an effect. ## Related issue number Related to #48921 ## Checks - [x] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [x] I've run `scripts/format.sh` to lint the changes in this PR. - [ ] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [ ] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [ ] Unit tests - [ ] Release tests - [ ] This PR is not tested :( --------- Signed-off-by: Fabian Keller <[email protected]> Co-authored-by: Philipp Moritz <[email protected]>
ray-project · Feb 9, 2025 · 3faccef · 3faccef
1 parent 9b5691b
commit 3faccef
Show file tree

Hide file tree

Showing 26 changed files with 57 additions and 57 deletions.
diff --git a/ci/lint/clang-tidy-diff.py b/ci/lint/clang-tidy-diff.py
@@ -202,7 +202,7 @@ def main():
     filename = None
     lines_by_file = {}
     for line in sys.stdin:
-        match = re.search('^\+\+\+\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
+        match = re.search('^\\+\\+\\+\\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
         if match:
             filename = match.group(2)
         if filename is None:
@@ -215,7 +215,7 @@ def main():
             if not re.match("^%s$" % args.iregex, filename, re.IGNORECASE):
                 continue
 
-        match = re.search("^@@.*\+(\d+)(,(\d+))?", line)
+        match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line)
         if match:
             start_line = int(match.group(1))
             line_count = 1

diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
@@ -200,7 +200,7 @@ def get_commit_range():
                 RAY_CI_JAVA_AFFECTED = 1
                 if (
                     changed_file.startswith("python/setup.py")
-                    or re.match(".*requirements.*\.txt", changed_file)
+                    or re.match(r".*requirements.*\.txt", changed_file)
                     or changed_file == "python/requirements_compiled.txt"
                 ):
                     RAY_CI_PYTHON_DEPENDENCIES_AFFECTED = 1

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,7 @@ extend-exclude = [
 ]
 
 [tool.ruff.lint]
-extend-select = ["I", "B", "Q", "C4"]
+extend-select = ["I", "B", "Q", "C4", "W"]
 ignore = [
     "B003",
     "B005",

diff --git a/python/ray/_private/log_monitor.py b/python/ray/_private/log_monitor.py
@@ -23,9 +23,9 @@
 logger = logging.getLogger(__name__)
 
 # The groups are job id, and pid.
-WORKER_LOG_PATTERN = re.compile(".*worker.*-([0-9a-f]+)-(\d+)")
+WORKER_LOG_PATTERN = re.compile(r".*worker.*-([0-9a-f]+)-(\d+)")
 # The groups are job id.
-RUNTIME_ENV_SETUP_PATTERN = re.compile(".*runtime_env_setup-(\d+).log")
+RUNTIME_ENV_SETUP_PATTERN = re.compile(r".*runtime_env_setup-(\d+).log")
 # Log name update interval under pressure.
 # We need it because log name update is CPU intensive and uses 100%
 # of cpu when there are many log files.

diff --git a/python/ray/dag/tests/experimental/test_accelerated_dag.py b/python/ray/dag/tests/experimental/test_accelerated_dag.py
@@ -1313,8 +1313,8 @@ def f(x):
     with pytest.raises(
         ValueError,
         match=(
-            "ray.get\(\) can only be called once "
-            "on a CompiledDAGRef, and it was already called."
+            r"ray.get\(\) can only be called once "
+            r"on a CompiledDAGRef, and it was already called."
         ),
     ):
         ray.get(ref)

diff --git a/python/ray/dag/tests/experimental/test_torch_tensor_dag.py b/python/ray/dag/tests/experimental/test_torch_tensor_dag.py
@@ -1207,7 +1207,7 @@ def test_torch_tensor_explicit_communicator(ray_start_regular):
         ValueError,
         match=(
             "Please specify a custom communicator for the DAGNode using "
-            "`with_tensor_transport\(\)`, or specify a communicator or 'create' for "
+            r"`with_tensor_transport\(\)`, or specify a communicator or 'create' for "
             "_default_communicator when calling experimental_compile()."
         ),
     ):

diff --git a/python/ray/dashboard/modules/log/log_manager.py b/python/ray/dashboard/modules/log/log_manager.py
@@ -20,7 +20,7 @@
 
 logger = logging.getLogger(__name__)
 
-WORKER_LOG_PATTERN = re.compile(".*worker-([0-9a-f]+)-([0-9a-f]+)-(\d+).(out|err)")
+WORKER_LOG_PATTERN = re.compile(r".*worker-([0-9a-f]+)-([0-9a-f]+)-(\d+).(out|err)")
 
 
 class ResolvedStreamFileInfo(BaseModel):

diff --git a/python/ray/data/grouped_data.py b/python/ray/data/grouped_data.py
@@ -330,7 +330,7 @@ def sum(
     def min(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped min aggregation.
+        r"""Compute grouped min aggregation.
 
         Examples:
             >>> import ray
@@ -369,7 +369,7 @@ def min(
     def max(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped max aggregation.
+        r"""Compute grouped max aggregation.
 
         Examples:
             >>> import ray
@@ -408,7 +408,7 @@ def max(
     def mean(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped mean aggregation.
+        r"""Compute grouped mean aggregation.
 
         Examples:
             >>> import ray
@@ -450,7 +450,7 @@ def std(
         ddof: int = 1,
         ignore_nulls: bool = True,
     ) -> Dataset:
-        """Compute grouped standard deviation aggregation.
+        r"""Compute grouped standard deviation aggregation.
 
         Examples:
             >>> import ray

diff --git a/python/ray/data/preprocessors/hasher.py b/python/ray/data/preprocessors/hasher.py
@@ -10,7 +10,7 @@
 
 @PublicAPI(stability="alpha")
 class FeatureHasher(Preprocessor):
-    """Apply the `hashing trick <https://en.wikipedia.org/wiki/Feature_hashing>`_ to a
+    r"""Apply the `hashing trick <https://en.wikipedia.org/wiki/Feature_hashing>`_ to a
     table that describes token frequencies.
 
     :class:`FeatureHasher` creates ``num_features`` columns named ``hash_{index}``,

diff --git a/python/ray/data/tests/test_stats.py b/python/ray/data/tests/test_stats.py
@@ -182,11 +182,11 @@ def gen_runtime_metrics_str(op_names: List[str], verbose: bool) -> str:
 
 def canonicalize(stats: str, filter_global_stats: bool = True) -> str:
     # Dataset UUID expression.
-    canonicalized_stats = re.sub("([a-f\d]{32})", "U", stats)
+    canonicalized_stats = re.sub(r"([a-f\d]{32})", "U", stats)
     # Time expressions.
-    canonicalized_stats = re.sub("[0-9\.]+(ms|us|s)", "T", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9\.]+(ms|us|s)", "T", canonicalized_stats)
     # Memory expressions.
-    canonicalized_stats = re.sub("[0-9\.]+(B|MB|GB)", "M", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9\.]+(B|MB|GB)", "M", canonicalized_stats)
     # For obj_store_mem_used, the value can be zero or positive, depending on the run.
     # Replace with A to avoid test flakiness.
     canonicalized_stats = re.sub(
@@ -196,13 +196,13 @@ def canonicalize(stats: str, filter_global_stats: bool = True) -> str:
         canonicalized_stats,
     )
     # Handle floats in (0, 1)
-    canonicalized_stats = re.sub(" (0\.0*[1-9][0-9]*)", " N", canonicalized_stats)
+    canonicalized_stats = re.sub(r" (0\.0*[1-9][0-9]*)", " N", canonicalized_stats)
     # Handle zero values specially so we can check for missing values.
-    canonicalized_stats = re.sub(" [0]+(\.[0])?", " Z", canonicalized_stats)
+    canonicalized_stats = re.sub(r" [0]+(\.[0])?", " Z", canonicalized_stats)
     # Scientific notation for small or large numbers
-    canonicalized_stats = re.sub("\d+(\.\d+)?[eE][-+]?\d+", "N", canonicalized_stats)
+    canonicalized_stats = re.sub(r"\d+(\.\d+)?[eE][-+]?\d+", "N", canonicalized_stats)
     # Other numerics.
-    canonicalized_stats = re.sub("[0-9]+(\.[0-9]+)?", "N", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9]+(\.[0-9]+)?", "N", canonicalized_stats)
     # Replace tabs with spaces.
     canonicalized_stats = re.sub("\t", "    ", canonicalized_stats)
     if filter_global_stats:

diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py
@@ -762,7 +762,7 @@ def f():
         ValueError,
         match=(
             r"Invalid route_prefix 'no_slash', "
-            "must start with a forward slash \('/'\)"
+            r"must start with a forward slash \('/'\)"
         ),
     ):
         serve.run(f.bind(), route_prefix="no_slash")

diff --git a/python/ray/serve/tests/test_handle_streaming.py b/python/ray/serve/tests/test_handle_streaming.py
@@ -94,17 +94,17 @@ def test_call_gen_without_stream_flag(self, serve_instance, deployment: Deployme
         with pytest.raises(
             TypeError,
             match=(
-                "Method '__call__' returned a generator. You must use "
-                "`handle.options\(stream=True\)` to call generators on a deployment."
+                r"Method '__call__' returned a generator. You must use "
+                r"`handle.options\(stream=True\)` to call generators on a deployment."
             ),
         ):
             h.remote(5).result()
 
         with pytest.raises(
             TypeError,
             match=(
-                "Method 'call_inner_generator' returned a generator. You must use "
-                "`handle.options\(stream=True\)` to call generators on a deployment."
+                r"Method 'call_inner_generator' returned a generator. You must use "
+                r"`handle.options\(stream=True\)` to call generators on a deployment."
             ),
         ):
             h.call_inner_generator.remote(5).result()
@@ -173,19 +173,19 @@ async def __call__(self):
                 with pytest.raises(
                     TypeError,
                     match=(
-                        "Method '__call__' returned a generator. You must use "
-                        "`handle.options\(stream=True\)` to call generators on a "
-                        "deployment."
+                        r"Method '__call__' returned a generator. You must use "
+                        r"`handle.options\(stream=True\)` to call generators on a "
+                        r"deployment."
                     ),
                 ):
                     await self._h.remote(5)
 
                 with pytest.raises(
                     TypeError,
                     match=(
-                        "Method 'call_inner_generator' returned a generator. You must "
-                        "use `handle.options\(stream=True\)` to call generators on a "
-                        "deployment."
+                        r"Method 'call_inner_generator' returned a generator. You must "
+                        r"use `handle.options\(stream=True\)` to call generators on a "
+                        r"deployment."
                     ),
                 ):
                     await self._h.call_inner_generator.remote(5)

diff --git a/python/ray/serve/tests/test_logging.py b/python/ray/serve/tests/test_logging.py
@@ -430,7 +430,7 @@ def check_log():
                 f'"deployment": "{resp["app_name"]}_fn", '
                 f'"replica": "{method_replica_id}", '
                 f'"component_name": "replica", '
-                f'"timestamp_ns": \d+}}.*'
+                rf'"timestamp_ns": \d+}}.*'
             )
             user_class_method_log_regex = (
                 '.*"message": "user log message from class method".*'
@@ -446,7 +446,7 @@ def check_log():
                 f'"deployment": "{resp2["app_name"]}_Model", '
                 f'"replica": "{class_method_replica_id}", '
                 f'"component_name": "replica", '
-                f'"timestamp_ns": \d+}}.*'
+                rf'"timestamp_ns": \d+}}.*'
             )
         else:
             user_method_log_regex = f".*{resp['request_id']} -- user func.*"

diff --git a/python/ray/serve/tests/unit/test_config.py b/python/ray/serve/tests/unit/test_config.py
@@ -216,7 +216,7 @@ class Class:
         # Invalid: not in the range of [1, 100]
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,
@@ -227,7 +227,7 @@ class Class:
 
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,
@@ -238,7 +238,7 @@ class Class:
 
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,

diff --git a/python/ray/tests/test_basic.py b/python/ray/tests/test_basic.py
@@ -424,7 +424,7 @@ class A:
             ValueError,
             match=f"The keyword '{keyword}' only accepts None, "
             "a non-negative integer, "
-            "'streaming' \(for generators\), or 'dynamic'",
+            r"'streaming' \(for generators\), or 'dynamic'",
         ):
             ray.remote(**{keyword: v})(f)
 

diff --git a/python/ray/tests/test_job.py b/python/ray/tests/test_job.py
@@ -228,11 +228,11 @@ def line_exists(lines: List[str], regex_target: str):
 
     # Test python shell
     outputs = execute_driver(["python", "-i"], input=get_entrypoint)
-    assert line_exists(outputs, ".*result: \(interactive_shell\) python -i.*")
+    assert line_exists(outputs, r".*result: \(interactive_shell\) python -i.*")
 
     # Test IPython shell
     outputs = execute_driver(["ipython"], input=get_entrypoint)
-    assert line_exists(outputs, ".*result: \(interactive_shell\).*ipython")
+    assert line_exists(outputs, r".*result: \(interactive_shell\).*ipython")
 
 
 def test_removed_internal_flags(shutdown_only):

diff --git a/python/ray/tests/test_traceback.py b/python/ray/tests/test_traceback.py
@@ -31,13 +31,13 @@ def scrub_traceback(ex):
     print(ex)
     ex = ex.strip("\n")
     ex = re.sub("pid=[0-9]+,", "pid=XXX,", ex)
-    ex = re.sub("ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+", "ip=YYY", ex)
-    ex = re.sub("repr=.*\)", "repr=ZZZ)", ex)
+    ex = re.sub(r"ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+", "ip=YYY", ex)
+    ex = re.sub(r"repr=.*\)", "repr=ZZZ)", ex)
     ex = re.sub("line .*,", "line ZZ,", ex)
     ex = re.sub('".*"', '"FILE"', ex)
     # These are used to coloring the string.
-    ex = re.sub("\\x1b\[36m", "", ex)
-    ex = re.sub("\\x1b\[39m", "", ex)
+    ex = re.sub(r"\x1b\[36m", "", ex)
+    ex = re.sub(r"\x1b\[39m", "", ex)
     # When running bazel test with pytest 6.x, the module name becomes
     # "python.ray.tests.test_traceback" instead of just "test_traceback"
     # Also remove the "com_github_ray_project_ray" prefix, which may appear on Windows.
@@ -50,10 +50,10 @@ def scrub_traceback(ex):
     ex = re.sub("object at .*?>", "object at ADDRESS>", ex)
     # This is from ray.util.inspect_serializability()
     ex = re.sub(
-        "=[\s\S]*Checking Serializability of[\s\S]*=", "INSPECT_SERIALIZABILITY", ex
+        r"=[\s\S]*Checking Serializability of[\s\S]*=", "INSPECT_SERIALIZABILITY", ex
     )
     # Clean up underscore in stack trace, which is new in python 3.12
-    ex = re.sub("^\s+~*\^+~*\n", "", ex, flags=re.MULTILINE)
+    ex = re.sub("^\\s+~*\\^+~*\n", "", ex, flags=re.MULTILINE)
     return ex
 
 

diff --git a/python/ray/workflow/examples/function_chain.py b/python/ray/workflow/examples/function_chain.py
@@ -25,7 +25,7 @@ def chain_func(*args, **kw_argv):
     return chain_func
 
 
-"""
+r"""
 Multiply semantics of each steps:
   [[s_1_1, s_1_2],
    [s_2_1, s_2_2]]

diff --git a/release/air_tests/air_benchmarks/mlperf-train/metric_utils.py b/release/air_tests/air_benchmarks/mlperf-train/metric_utils.py
@@ -8,10 +8,10 @@ def get_ray_spilled_and_restored_mb():
 
     summary_str = internal_api.memory_summary(stats_only=True)
 
-    match = re.search("Spilled (\d+) MiB", summary_str)
+    match = re.search(r"Spilled (\d+) MiB", summary_str)
     spilled_mb = int(match.group(1)) if match else 0
 
-    match = re.search("Restored (\d+) MiB", summary_str)
+    match = re.search(r"Restored (\d+) MiB", summary_str)
     restored_mb = int(match.group(1)) if match else 0
 
     return spilled_mb, restored_mb

diff --git a/release/k8s_tests/solution.py b/release/k8s_tests/solution.py
@@ -2,7 +2,7 @@
 from ray.serve.drivers import DAGDriver
 from ray.dag.input_node import InputNode
 
-"""
+r"""
 We are building a DAG like this:
 A ->  B ----> C
  \->  D --/

diff --git a/release/util/get_contributors.py b/release/util/get_contributors.py
@@ -57,7 +57,7 @@ def run(access_token, prev_release_commit, curr_release_commit):
         (
             f"git log {prev_release_commit}..{curr_release_commit} "
             f'--pretty=format:"%s" '
-            f' | grep -Eo "#(\d+)"'
+            rf' | grep -Eo "#(\d+)"'
         )
     )
     joined = " && ".join(cmd)

diff --git a/rllib/algorithms/impala/torch/vtrace_torch_v2.py b/rllib/algorithms/impala/torch/vtrace_torch_v2.py
@@ -80,7 +80,7 @@ def vtrace_torch(
     clip_rho_threshold: Union[float, "torch.Tensor"] = 1.0,
     clip_pg_rho_threshold: Union[float, "torch.Tensor"] = 1.0,
 ):
-    """V-trace for softmax policies implemented with torch.
+    r"""V-trace for softmax policies implemented with torch.
 
     Calculates V-trace actor critic targets for softmax polices as described in
     "IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner

diff --git a/rllib/core/learner/learner_group.py b/rllib/core/learner/learner_group.py
@@ -824,7 +824,7 @@ def foreach_learner(
         mark_healthy: bool = False,
         **kwargs,
     ) -> RemoteCallResults:
-        """Calls the given function on each Learner L with the args: (L, \*\*kwargs).
+        r"""Calls the given function on each Learner L with the args: (L, \*\*kwargs).
 
         Args:
             func: The function to call on each Learner L with args: (L, \*\*kwargs).

diff --git a/rllib/models/tf/tf_distributions.py b/rllib/models/tf/tf_distributions.py
@@ -62,7 +62,7 @@ def rsample(
 
 @DeveloperAPI
 class TfCategorical(TfDistribution):
-    """Wrapper class for Categorical distribution.
+    r"""Wrapper class for Categorical distribution.
 
     Creates a categorical distribution parameterized by either :attr:`probs` or
     :attr:`logits` (but not both).

diff --git a/rllib/models/torch/torch_distributions.py b/rllib/models/torch/torch_distributions.py
@@ -70,7 +70,7 @@ def rsample(
 
 @DeveloperAPI
 class TorchCategorical(TorchDistribution):
-    """Wrapper class for PyTorch Categorical distribution.
+    r"""Wrapper class for PyTorch Categorical distribution.
 
     Creates a categorical distribution parameterized by either :attr:`probs` or
     :attr:`logits` (but not both).

diff --git a/rllib/offline/estimators/doubly_robust.py b/rllib/offline/estimators/doubly_robust.py
@@ -26,7 +26,7 @@
 
 @DeveloperAPI
 class DoublyRobust(OffPolicyEstimator):
-    """The Doubly Robust estimator.
+    r"""The Doubly Robust estimator.
 
     Let s_t, a_t, and r_t be the state, action, and reward at timestep t.