Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update --labels and add --labels-file options for Label Selector API #51706

Merged
merged 20 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public void testEmptyNodeLabels() {
}

public void testSetNodeLabels() {
System.setProperty("ray.head-args.0", "--labels={\"gpu_type\":\"A100\",\"azone\":\"azone-1\"}");
System.setProperty("ray.head-args.0", "--labels=\"gpu_type=A100,azone=azone-1\"");
try {
Ray.init();
List<NodeInfo> nodeInfos = Ray.getRuntimeContext().getAllNodeInfo();
Expand Down
100 changes: 100 additions & 0 deletions python/ray/_private/label_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import re
import yaml
from typing import Dict

import ray._private.ray_constants as ray_constants

# Regex patterns used to validate that labels conform to Kubernetes label syntax rules.
# https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set

# Regex for mandatory name (DNS label) or value
# Examples:
# Valid matches: "a", "label-name", "a-._b", "123", "this_is_a_valid_label"
# Invalid matches: "-abc", "abc-", "my@label", "a" * 64
LABEL_REGEX = re.compile(r"[a-zA-Z0-9]([a-zA-Z0-9_.-]*[a-zA-Z0-9]){0,62}")

# Regex for optional prefix (DNS subdomain)
# Examples:
# Valid matches: "abc", "sub.domain.example", "my-label", "123.456.789"
# Invalid matches: "-abc", "prefix_", "sub..domain", sub.$$.example
LABEL_PREFIX_REGEX = rf"^({LABEL_REGEX.pattern}?(\.{LABEL_REGEX.pattern}?)*)$"


def parse_node_labels_string(labels_str: str) -> Dict[str, str]:
labels = {}

# Remove surrounding quotes if they exist
if len(labels_str) > 1 and labels_str.startswith('"') and labels_str.endswith('"'):
labels_str = labels_str[1:-1]

if labels_str == "":
return labels

# Labels argument should consist of a string of key=value pairs
# separated by commas. Labels follow Kubernetes label syntax.
label_pairs = labels_str.split(",")
for pair in label_pairs:
# Split each pair by `=`
key_value = pair.split("=")
if len(key_value) != 2:
raise ValueError("Label value is not a key-value pair.")
key = key_value[0].strip()
value = key_value[1].strip()
labels[key] = value

return labels


def parse_node_labels_from_yaml_file(path: str) -> Dict[str, str]:
if path == "":
return {}
with open(path, "r") as file:
# Expects valid YAML content
labels = yaml.safe_load(file)
if not isinstance(labels, dict):
raise ValueError(
"The format after deserialization is not a key-value pair map."
)
for key, value in labels.items():
if not isinstance(key, str):
raise ValueError("The key is not string type.")
if not isinstance(value, str):
raise ValueError(f'The value of "{key}" is not string type.')

return labels


def validate_node_labels(labels: Dict[str, str]):
if labels is None:
return
for key in labels.keys():
if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX):
raise ValueError(
f"Custom label keys `{key}` cannot start with the prefix "
f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. "
f"This is reserved for Ray defined labels."
)
if "/" in key:
prefix, name = key.rsplit("/")
if len(prefix) > 253 or not re.match(LABEL_PREFIX_REGEX, prefix):
raise ValueError(
f"Invalid label key prefix `{prefix}`. Prefix must be a series of DNS labels "
f"separated by dots (.),not longer than 253 characters in total."
)
else:
name = key
if len(name) > 63 or not re.match(LABEL_REGEX, name):
raise ValueError(
f"Invalid label key name `{name}`. Name must be 63 chars or less beginning and ending "
f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
f"dots (.), and alphanumerics between."
)
value = labels.get(key)
if value is None or value == "":
return
if len(value) > 63 or not re.match(LABEL_REGEX, value):
raise ValueError(
f"Invalid label key value `{value}`. Value must be 63 chars or less beginning and ending "
f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
f"dots (.), and alphanumerics between."
)
6 changes: 2 additions & 4 deletions python/ray/_private/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
from typing import Dict, List, Optional

import ray._private.ray_constants as ray_constants
from ray._private.utils import (
validate_node_labels,
check_ray_client_dependencies_installed,
)
from ray._private.label_utils import validate_node_labels
from ray._private.utils import check_ray_client_dependencies_installed


logger = logging.getLogger(__name__)
Expand Down
37 changes: 0 additions & 37 deletions python/ray/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1803,43 +1803,6 @@ def update_envs(env_vars: Dict[str, str]):
os.environ[key] = result


def parse_node_labels_json(
labels_json: str, cli_logger, cf, command_arg="--labels"
) -> Dict[str, str]:
try:
labels = json.loads(labels_json)
if not isinstance(labels, dict):
raise ValueError(
"The format after deserialization is not a key-value pair map"
)
for key, value in labels.items():
if not isinstance(key, str):
raise ValueError("The key is not string type.")
if not isinstance(value, str):
raise ValueError(f'The value of the "{key}" is not string type')
except Exception as e:
cli_logger.abort(
"`{}` is not a valid JSON string, detail error:{}"
"Valid values look like this: `{}`",
cf.bold(f"{command_arg}={labels_json}"),
str(e),
cf.bold(f'{command_arg}=\'{{"gpu_type": "A100", "region": "us"}}\''),
)
return labels


def validate_node_labels(labels: Dict[str, str]):
if labels is None:
return
for key in labels.keys():
if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX):
raise ValueError(
f"Custom label keys `{key}` cannot start with the prefix "
f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. "
f"This is reserved for Ray defined labels."
)


def parse_pg_formatted_resources_to_original(
pg_formatted_resources: Dict[str, float]
) -> Dict[str, float]:
Expand Down
49 changes: 45 additions & 4 deletions python/ray/scripts/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@
import ray
import ray._private.ray_constants as ray_constants
import ray._private.services as services
from ray._private.label_utils import (
parse_node_labels_from_yaml_file,
parse_node_labels_string,
)
from ray._private.utils import (
check_ray_client_dependencies_installed,
load_class,
parse_resources_json,
parse_node_labels_json,
)
from ray._private.internal_api import memory_summary
from ray._private.usage import usage_lib
Expand Down Expand Up @@ -635,9 +638,19 @@ def debug(address: str, verbose: bool):
"--labels",
required=False,
hidden=True,
default="{}",
default="",
type=str,
help="a JSON serialized dictionary mapping label name to label value.",
help="a string list of key-value pairs mapping label name to label value."
"These values take precedence over conflicting keys passed in from --labels-file."
'Ex: --labels "key1=val1,key2=val2"',
)
@click.option(
"--labels-file",
required=False,
hidden=True,
default="",
type=str,
help="a path to a YAML file containing a dictionary mapping of label keys to values.",
)
@click.option(
"--include-log-monitor",
Expand Down Expand Up @@ -695,6 +708,7 @@ def start(
ray_debugger_external,
disable_usage_stats,
labels,
labels_file,
include_log_monitor,
):
"""Start Ray processes manually on the local machine."""
Expand All @@ -715,7 +729,34 @@ def start(
node_ip_address = services.resolve_ip_for_localhost(node_ip_address)

resources = parse_resources_json(resources, cli_logger, cf)
labels_dict = parse_node_labels_json(labels, cli_logger, cf)

# Compose labels passed in with `--labels` and `--labels-file`.
# The label value from `--labels` will overrwite the value of any duplicate keys.
try:
labels_from_file_dict = parse_node_labels_from_yaml_file(labels_file)
except Exception as e:
cli_logger.abort(
"The file at `{}` is not a valid YAML file, detailed error:{}"
"Valid values look like this: `{}`",
cf.bold(f"--labels-file={labels_file}"),
str(e),
cf.bold("--labels-file='gpu_type: A100\nregion: us'"),
)
try:
labels_from_string = parse_node_labels_string(labels)
except Exception as e:
cli_logger.abort(
"`{}` is not a valid string of key-value pairs, detail error:{}"
"Valid values look like this: `{}`",
cf.bold(f"--labels={labels}"),
str(e),
cf.bold('--labels="key1=val1,key2=val2"'),
)
labels_dict = (
{**labels_from_file_dict, **labels_from_string}
if labels_from_file_dict
else labels_from_string
)

if plasma_store_socket_name is not None:
warnings.warn(
Expand Down
1 change: 1 addition & 0 deletions python/ray/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ py_test_module_list(
py_test_module_list(
size = "medium",
files = [
"test_label_utils.py",
"test_minimal_install.py",
"test_runtime_env_ray_minimal.py",
"test_utils.py",
Expand Down
121 changes: 121 additions & 0 deletions python/ray/tests/test_label_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import pytest
from ray._private.label_utils import (
parse_node_labels_string,
parse_node_labels_from_yaml_file,
validate_node_labels,
)
import sys
import tempfile


def test_parse_node_labels_from_string():
# Empty label argument passed
labels_string = ""
labels_dict = parse_node_labels_string(labels_string)
assert labels_dict == {}

# Valid label key with empty value
labels_string = "region="
labels_dict = parse_node_labels_string(labels_string)
assert labels_dict == {"region": ""}

# Multiple valid label keys and values
labels_string = "ray.io/accelerator-type=A100,region=us-west4"
labels_dict = parse_node_labels_string(labels_string)
assert labels_dict == {"ray.io/accelerator-type": "A100", "region": "us-west4"}

# Invalid label
labels_string = "ray.io/accelerator-type=type=A100"
with pytest.raises(ValueError) as e:
parse_node_labels_string(labels_string)
assert "Label value is not a key-value pair" in str(e)


def test_parse_node_labels_from_yaml_file():
# Empty/invalid yaml
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
test_file.write("")
test_file.flush() # Ensure data is written
with pytest.raises(ValueError) as e:
parse_node_labels_from_yaml_file(test_file.name)
assert "The format after deserialization is not a key-value pair map" in str(e)

# With non-existent yaml file
with pytest.raises(FileNotFoundError):
parse_node_labels_from_yaml_file("missing-file.yaml")

# Valid label key with empty value
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
test_file.write('"ray.io/accelerator-type": ""')
test_file.flush() # Ensure data is written
labels_dict = parse_node_labels_from_yaml_file(test_file.name)
assert labels_dict == {"ray.io/accelerator-type": ""}

# Multiple valid label keys and values
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
test_file.write(
'"ray.io/accelerator-type": "A100"\n"region": "us"\n"market-type": "spot"'
)
test_file.flush() # Ensure data is written
labels_dict = parse_node_labels_from_yaml_file(test_file.name)
assert labels_dict == {
"ray.io/accelerator-type": "A100",
"region": "us",
"market-type": "spot",
}

# Non-string label key
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
test_file.write('{100: "A100"}')
test_file.flush() # Ensure data is written
with pytest.raises(ValueError) as e:
parse_node_labels_from_yaml_file(test_file.name)
assert "The key is not string type." in str(e)

# Non-string label value
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
test_file.write('{"gpu": 100}')
test_file.flush() # Ensure data is written
with pytest.raises(ValueError) as e:
parse_node_labels_from_yaml_file(test_file.name)
assert 'The value of "gpu" is not string type' in str(e)


def test_validate_node_labels():
# Custom label starts with ray.io prefix
labels_dict = {"ray.io/accelerator-type": "A100"}
with pytest.raises(ValueError) as e:
validate_node_labels(labels_dict)
assert "This is reserved for Ray defined labels." in str(e)

# Invalid key prefix syntax
labels_dict = {"!invalidPrefix/accelerator-type": "A100"}
with pytest.raises(ValueError) as e:
validate_node_labels(labels_dict)
assert "Invalid label key prefix" in str(e)

# Invalid key name syntax
labels_dict = {"!!accelerator-type?": "A100"}
with pytest.raises(ValueError) as e:
validate_node_labels(labels_dict)
assert "Invalid label key name" in str(e)

# Invalid key value syntax
labels_dict = {"accelerator-type": "??"}
with pytest.raises(ValueError) as e:
validate_node_labels(labels_dict)
assert "Invalid label key value" in str(e)

# Valid node label
labels_dict = {"accelerator-type": "A100"}
validate_node_labels(labels_dict)


if __name__ == "__main__":
import os

# Skip test_basic_2_client_mode for now- the test suite is breaking.
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
else:
sys.exit(pytest.main(["-sv", __file__]))
2 changes: 1 addition & 1 deletion python/ray/tests/test_node_label_scheduling_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_node_id():

@pytest.mark.parametrize(
"call_ray_start",
['ray start --head --labels={"gpu_type":"A100","region":"us"}'],
["ray start --head --labels gpu_type=A100,region=us"],
indirect=True,
)
def test_node_label_scheduling_basic(call_ray_start):
Expand Down
Loading