ray-project · pcmoritz · Feb 9, 2025 · Dec 1, 2024 · Dec 1, 2024 · Dec 2, 2024
diff --git a/.flake8 b/.flake8
@@ -26,7 +26,6 @@ ignore =
   E704
   W503
   W504
-  W605
   I
   N
   B001

diff --git a/ci/lint/clang-tidy-diff.py b/ci/lint/clang-tidy-diff.py
@@ -202,7 +202,7 @@ def main():
     filename = None
     lines_by_file = {}
     for line in sys.stdin:
-        match = re.search('^\+\+\+\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
+        match = re.search('^\\+\\+\\+\\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
         if match:
             filename = match.group(2)
         if filename is None:
@@ -215,7 +215,7 @@ def main():
             if not re.match("^%s$" % args.iregex, filename, re.IGNORECASE):
                 continue
 
-        match = re.search("^@@.*\+(\d+)(,(\d+))?", line)
+        match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line)
         if match:
             start_line = int(match.group(1))
             line_count = 1

diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
@@ -201,7 +201,7 @@ def get_commit_range():
                 RAY_CI_JAVA_AFFECTED = 1
                 if (
                     changed_file.startswith("python/setup.py")
-                    or re.match(".*requirements.*\.txt", changed_file)
+                    or re.match(r".*requirements.*\.txt", changed_file)
                     or changed_file == "python/requirements_compiled.txt"
                 ):
                     RAY_CI_PYTHON_DEPENDENCIES_AFFECTED = 1

diff --git a/python/ray/_private/log_monitor.py b/python/ray/_private/log_monitor.py
@@ -23,9 +23,9 @@
 logger = logging.getLogger(__name__)
 
 # The groups are job id, and pid.
-WORKER_LOG_PATTERN = re.compile(".*worker.*-([0-9a-f]+)-(\d+)")
+WORKER_LOG_PATTERN = re.compile(r".*worker.*-([0-9a-f]+)-(\d+)")
 # The groups are job id.
-RUNTIME_ENV_SETUP_PATTERN = re.compile(".*runtime_env_setup-(\d+).log")
+RUNTIME_ENV_SETUP_PATTERN = re.compile(r".*runtime_env_setup-(\d+).log")
 # Log name update interval under pressure.
 # We need it because log name update is CPU intensive and uses 100%
 # of cpu when there are many log files.

diff --git a/python/ray/dag/tests/experimental/test_accelerated_dag.py b/python/ray/dag/tests/experimental/test_accelerated_dag.py
@@ -1219,8 +1219,8 @@ def f(x):
     with pytest.raises(
         ValueError,
         match=(
-            "ray.get\(\) can only be called once "
-            "on a CompiledDAGRef, and it was already called."
+            r"ray.get\(\) can only be called once "
+            r"on a CompiledDAGRef, and it was already called."
         ),
     ):
         ray.get(ref)
@@ -1319,9 +1319,9 @@ def test_exceed_max_buffered_results(ray_start_regular):
     with pytest.raises(
         ValueError,
         match=(
-            "Too many buffered results: the allowed max count for buffered "
-            "results is 1; call ray.get\(\) on previous CompiledDAGRefs to "
-            "free them up from buffer"
+            r"Too many buffered results: the allowed max count for buffered "
+            r"results is 1; call ray.get\(\) on previous CompiledDAGRefs to "
+            r"free them up from buffer"
         ),
     ):
         ray.get(ref)
@@ -1354,9 +1354,9 @@ def test_exceed_max_buffered_results_multi_output(ray_start_regular, single_fetc
     with pytest.raises(
         ValueError,
         match=(
-            "Too many buffered results: the allowed max count for buffered "
-            "results is 1; call ray.get\(\) on previous CompiledDAGRefs to "
-            "free them up from buffer"
+            r"Too many buffered results: the allowed max count for buffered "
+            r"results is 1; call ray.get\(\) on previous CompiledDAGRefs to "
+            r"free them up from buffer"
         ),
     ):
         if single_fetch:

diff --git a/python/ray/dashboard/modules/log/log_manager.py b/python/ray/dashboard/modules/log/log_manager.py
@@ -22,7 +22,7 @@
 
 logger = logging.getLogger(__name__)
 
-WORKER_LOG_PATTERN = re.compile(".*worker-([0-9a-f]+)-([0-9a-f]+)-(\d+).(out|err)")
+WORKER_LOG_PATTERN = re.compile(r".*worker-([0-9a-f]+)-([0-9a-f]+)-(\d+).(out|err)")
 
 
 class ResolvedStreamFileInfo(BaseModel):

@@ -351,7 +351,7 @@ def sum(
     def min(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped min aggregation.
+        r"""Compute grouped min aggregation.
 
         Examples:
             >>> import ray
@@ -390,7 +390,7 @@ def min(
     def max(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped max aggregation.
+        r"""Compute grouped max aggregation.
 
         Examples:
             >>> import ray
@@ -429,7 +429,7 @@ def max(
     def mean(
         self, on: Union[str, List[str]] = None, ignore_nulls: bool = True
     ) -> Dataset:
-        """Compute grouped mean aggregation.
+        r"""Compute grouped mean aggregation.
 
         Examples:
             >>> import ray
@@ -471,7 +471,7 @@ def std(
         ddof: int = 1,
         ignore_nulls: bool = True,
     ) -> Dataset:
-        """Compute grouped standard deviation aggregation.
+        r"""Compute grouped standard deviation aggregation.
 
         Examples:
             >>> import ray

@@ -10,7 +10,7 @@
 
 @PublicAPI(stability="alpha")
 class FeatureHasher(Preprocessor):
-    """Apply the `hashing trick <https://en.wikipedia.org/wiki/Feature_hashing>`_ to a
+    r"""Apply the `hashing trick <https://en.wikipedia.org/wiki/Feature_hashing>`_ to a
     table that describes token frequencies.
 
     :class:`FeatureHasher` creates ``num_features`` columns named ``hash_{index}``,

@@ -182,11 +182,11 @@ def gen_runtime_metrics_str(op_names: List[str], verbose: bool) -> str:
 
 def canonicalize(stats: str, filter_global_stats: bool = True) -> str:
     # Dataset UUID expression.
-    canonicalized_stats = re.sub("([a-f\d]{32})", "U", stats)
+    canonicalized_stats = re.sub(r"([a-f\d]{32})", "U", stats)
     # Time expressions.
-    canonicalized_stats = re.sub("[0-9\.]+(ms|us|s)", "T", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9\.]+(ms|us|s)", "T", canonicalized_stats)
     # Memory expressions.
-    canonicalized_stats = re.sub("[0-9\.]+(B|MB|GB)", "M", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9\.]+(B|MB|GB)", "M", canonicalized_stats)
     # For obj_store_mem_used, the value can be zero or positive, depending on the run.
     # Replace with A to avoid test flakiness.
     canonicalized_stats = re.sub(
@@ -196,13 +196,13 @@ def canonicalize(stats: str, filter_global_stats: bool = True) -> str:
         canonicalized_stats,
     )
     # Handle floats in (0, 1)
-    canonicalized_stats = re.sub(" (0\.0*[1-9][0-9]*)", " N", canonicalized_stats)
+    canonicalized_stats = re.sub(r" (0\.0*[1-9][0-9]*)", " N", canonicalized_stats)
     # Handle zero values specially so we can check for missing values.
-    canonicalized_stats = re.sub(" [0]+(\.[0])?", " Z", canonicalized_stats)
+    canonicalized_stats = re.sub(r" [0]+(\.[0])?", " Z", canonicalized_stats)
     # Scientific notation for small or large numbers
-    canonicalized_stats = re.sub("\d+(\.\d+)?[eE][-+]?\d+", "N", canonicalized_stats)
+    canonicalized_stats = re.sub(r"\d+(\.\d+)?[eE][-+]?\d+", "N", canonicalized_stats)
     # Other numerics.
-    canonicalized_stats = re.sub("[0-9]+(\.[0-9]+)?", "N", canonicalized_stats)
+    canonicalized_stats = re.sub(r"[0-9]+(\.[0-9]+)?", "N", canonicalized_stats)
     # Replace tabs with spaces.
     canonicalized_stats = re.sub("\t", "    ", canonicalized_stats)
     if filter_global_stats:

diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py
@@ -762,7 +762,7 @@ def f():
         ValueError,
         match=(
             r"Invalid route_prefix 'no_slash', "
-            "must start with a forward slash \('/'\)"
+            r"must start with a forward slash \('/'\)"
         ),
     ):
         serve.run(f.bind(), route_prefix="no_slash")

diff --git a/python/ray/serve/tests/test_handle_streaming.py b/python/ray/serve/tests/test_handle_streaming.py
@@ -94,17 +94,17 @@ def test_call_gen_without_stream_flag(self, serve_instance, deployment: Deployme
         with pytest.raises(
             TypeError,
             match=(
-                "Method '__call__' returned a generator. You must use "
-                "`handle.options\(stream=True\)` to call generators on a deployment."
+                r"Method '__call__' returned a generator. You must use "
+                r"`handle.options\(stream=True\)` to call generators on a deployment."
             ),
         ):
             h.remote(5).result()
 
         with pytest.raises(
             TypeError,
             match=(
-                "Method 'call_inner_generator' returned a generator. You must use "
-                "`handle.options\(stream=True\)` to call generators on a deployment."
+                r"Method 'call_inner_generator' returned a generator. You must use "
+                r"`handle.options\(stream=True\)` to call generators on a deployment."
             ),
         ):
             h.call_inner_generator.remote(5).result()
@@ -173,19 +173,19 @@ async def __call__(self):
                 with pytest.raises(
                     TypeError,
                     match=(
-                        "Method '__call__' returned a generator. You must use "
-                        "`handle.options\(stream=True\)` to call generators on a "
-                        "deployment."
+                        r"Method '__call__' returned a generator. You must use "
+                        r"`handle.options\(stream=True\)` to call generators on a "
+                        r"deployment."
                     ),
                 ):
                     await self._h.remote(5)
 
                 with pytest.raises(
                     TypeError,
                     match=(
-                        "Method 'call_inner_generator' returned a generator. You must "
-                        "use `handle.options\(stream=True\)` to call generators on a "
-                        "deployment."
+                        r"Method 'call_inner_generator' returned a generator. You must "
+                        r"use `handle.options\(stream=True\)` to call generators on a "
+                        r"deployment."
                     ),
                 ):
                     await self._h.call_inner_generator.remote(5)

diff --git a/python/ray/serve/tests/unit/test_config.py b/python/ray/serve/tests/unit/test_config.py
@@ -216,7 +216,7 @@ class Class:
         # Invalid: not in the range of [1, 100]
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,
@@ -227,7 +227,7 @@ class Class:
 
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,
@@ -238,7 +238,7 @@ class Class:
 
         with pytest.raises(
             ValueError,
-            match="Valid values are None or an integer in the range of \[1, 100\]",
+            match=r"Valid values are None or an integer in the range of \[1, 100\]",
         ):
             ReplicaConfig.create(
                 Class,

diff --git a/python/ray/tests/test_basic.py b/python/ray/tests/test_basic.py
@@ -424,7 +424,7 @@ class A:
             ValueError,
             match=f"The keyword '{keyword}' only accepts None, "
             "a non-negative integer, "
-            "'streaming' \(for generators\), or 'dynamic'",
+            r"'streaming' \(for generators\), or 'dynamic'",
         ):
             ray.remote(**{keyword: v})(f)
 

diff --git a/python/ray/tests/test_job.py b/python/ray/tests/test_job.py
@@ -228,11 +228,11 @@ def line_exists(lines: List[str], regex_target: str):
 
     # Test python shell
     outputs = execute_driver(["python", "-i"], input=get_entrypoint)
-    assert line_exists(outputs, ".*result: \(interactive_shell\) python -i.*")
+    assert line_exists(outputs, r".*result: \(interactive_shell\) python -i.*")
 
     # Test IPython shell
     outputs = execute_driver(["ipython"], input=get_entrypoint)
-    assert line_exists(outputs, ".*result: \(interactive_shell\).*ipython")
+    assert line_exists(outputs, r".*result: \(interactive_shell\).*ipython")
 
 
 def test_removed_internal_flags(shutdown_only):

diff --git a/python/ray/tests/test_traceback.py b/python/ray/tests/test_traceback.py
@@ -31,13 +31,13 @@ def scrub_traceback(ex):
     print(ex)
     ex = ex.strip("\n")
     ex = re.sub("pid=[0-9]+,", "pid=XXX,", ex)
-    ex = re.sub("ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+", "ip=YYY", ex)
-    ex = re.sub("repr=.*\)", "repr=ZZZ)", ex)
+    ex = re.sub(r"ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+", "ip=YYY", ex)
+    ex = re.sub(r"repr=.*\)", "repr=ZZZ)", ex)
     ex = re.sub("line .*,", "line ZZ,", ex)
     ex = re.sub('".*"', '"FILE"', ex)
     # These are used to coloring the string.
-    ex = re.sub("\\x1b\[36m", "", ex)
-    ex = re.sub("\\x1b\[39m", "", ex)
+    ex = re.sub(r"\x1b\[36m", "", ex)
+    ex = re.sub(r"\x1b\[39m", "", ex)
     # When running bazel test with pytest 6.x, the module name becomes
     # "python.ray.tests.test_traceback" instead of just "test_traceback"
     # Also remove the "com_github_ray_project_ray" prefix, which may appear on Windows.
@@ -50,10 +50,10 @@ def scrub_traceback(ex):
     ex = re.sub("object at .*?>", "object at ADDRESS>", ex)
     # This is from ray.util.inspect_serializability()
     ex = re.sub(
-        "=[\s\S]*Checking Serializability of[\s\S]*=", "INSPECT_SERIALIZABILITY", ex
+        r"=[\s\S]*Checking Serializability of[\s\S]*=", "INSPECT_SERIALIZABILITY", ex
     )
     # Clean up underscore in stack trace, which is new in python 3.12
-    ex = re.sub("^\s+~*\^+~*\n", "", ex, flags=re.MULTILINE)
+    ex = re.sub("^\\s+~*\\^+~*\n", "", ex, flags=re.MULTILINE)
     return ex
 
 

@@ -25,7 +25,7 @@ def chain_func(*args, **kw_argv):
     return chain_func
 
 
-"""
+r"""
 Multiply semantics of each steps:
   [[s_1_1, s_1_2],
    [s_2_1, s_2_2]]

diff --git a/release/air_tests/air_benchmarks/mlperf-train/metric_utils.py b/release/air_tests/air_benchmarks/mlperf-train/metric_utils.py
@@ -8,10 +8,10 @@ def get_ray_spilled_and_restored_mb():
 
     summary_str = internal_api.memory_summary(stats_only=True)
 
-    match = re.search("Spilled (\d+) MiB", summary_str)
+    match = re.search(r"Spilled (\d+) MiB", summary_str)
     spilled_mb = int(match.group(1)) if match else 0
 
-    match = re.search("Restored (\d+) MiB", summary_str)
+    match = re.search(r"Restored (\d+) MiB", summary_str)
     restored_mb = int(match.group(1)) if match else 0
 
     return spilled_mb, restored_mb

diff --git a/release/k8s_tests/solution.py b/release/k8s_tests/solution.py
@@ -2,7 +2,7 @@
 from ray.serve.drivers import DAGDriver
 from ray.dag.input_node import InputNode
 
-"""
+r"""
 We are building a DAG like this:
 A ->  B ----> C
  \->  D --/

diff --git a/release/util/get_contributors.py b/release/util/get_contributors.py
@@ -57,7 +57,7 @@ def run(access_token, prev_release_commit, curr_release_commit):
         (
             f"git log {prev_release_commit}..{curr_release_commit} "
             f'--pretty=format:"%s" '
-            f' | grep -Eo "#(\d+)"'
+            rf' | grep -Eo "#(\d+)"'
         )
     )
     joined = " && ".join(cmd)

@@ -80,7 +80,7 @@ def vtrace_torch(
     clip_rho_threshold: Union[float, "torch.Tensor"] = 1.0,
     clip_pg_rho_threshold: Union[float, "torch.Tensor"] = 1.0,
 ):
-    """V-trace for softmax policies implemented with torch.
+    r"""V-trace for softmax policies implemented with torch.
 
     Calculates V-trace actor critic targets for softmax polices as described in
     "IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner

@@ -843,7 +843,7 @@ def foreach_learner(
         mark_healthy: bool = True,
         **kwargs,
     ) -> RemoteCallResults:
-        """Calls the given function on each Learner L with the args: (L, \*\*kwargs).
+        r"""Calls the given function on each Learner L with the args: (L, \*\*kwargs).
 
         Args:
             func: The function to call on each Learner L with args: (L, \*\*kwargs).

@@ -62,7 +62,7 @@ def rsample(
 
 @DeveloperAPI
 class TfCategorical(TfDistribution):
-    """Wrapper class for Categorical distribution.
+    r"""Wrapper class for Categorical distribution.
 
     Creates a categorical distribution parameterized by either :attr:`probs` or
     :attr:`logits` (but not both).

@@ -66,7 +66,7 @@ def rsample(
 
 @DeveloperAPI
 class TorchCategorical(TorchDistribution):
-    """Wrapper class for PyTorch Categorical distribution.
+    r"""Wrapper class for PyTorch Categorical distribution.
 
     Creates a categorical distribution parameterized by either :attr:`probs` or
     :attr:`logits` (but not both).

@@ -26,7 +26,7 @@
 
 @DeveloperAPI
 class DoublyRobust(OffPolicyEstimator):
-    """The Doubly Robust estimator.
+    r"""The Doubly Robust estimator.
 
     Let s_t, a_t, and r_t be the state, action, and reward at timestep t.
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,7 +26,6 @@ ignore = @@
       E704
       W503
       W504
-      W605
       I
       N
       B001
@@ Expand Down @@