Skip to content

Commit 2c73277

Browse files
committed
Validate smartstack.yaml in paasta validate
We currently do this in an internal repo (`py-gitolite`), but I don't see why we don't just do this in `paasta validate` - especially since moving this check left means that folks are alerted to broken config earlier rather than when they attempt to integrate. The smartstack_schema.json in this repo was behind the one in py-gitolite, so I've gone ahead and made them match. The majority of the added code (outside the tests, which I had Claude Code regenerate for me) is more-or-less taken straight from py-gitolite with minimal changes. I ran `paasta validate` over all soaconfigs and verified that there are no new failures when run with this code.
1 parent 4e3cb72 commit 2c73277

File tree

3 files changed

+573
-4
lines changed

3 files changed

+573
-4
lines changed

paasta_tools/cli/cmds/validate.py

Lines changed: 215 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434

3535
import pytz
3636
from croniter import croniter
37+
from environment_tools.type_utils import available_location_types
38+
from environment_tools.type_utils import compare_types
39+
from environment_tools.type_utils import convert_location_type
3740
from jsonschema import Draft4Validator
3841
from jsonschema import exceptions
3942
from jsonschema import FormatChecker
@@ -120,6 +123,7 @@ class AutoscalingValidationError(SoaValidationError):
120123
"rollback", # automatic rollbacks during deployments
121124
"tron", # batch workloads
122125
"eks", # eks workloads
126+
"smartstack", # mesh configs
123127
"autotuned_defaults/kubernetes",
124128
"autotuned_defaults/cassandracluster",
125129
}
@@ -166,6 +170,26 @@ class AutoscalingValidationError(SoaValidationError):
166170
METRICS_PROVIDER_PROMQL: {"desired_active_requests_per_replica"},
167171
}
168172

173+
# Listener names in Envoy cannot exceed 128 characters and use the
174+
# following format:
175+
# service_name.namespace.listener
176+
# This naming scheme leaves 128 - 10 = 118 characters
177+
# for the Smartstack service name and namespace.
178+
# See COREBACK-6303 for more context.
179+
MAX_ENVOY_NAME_LEN = 118
180+
181+
# Socket names cannot exceed 108 characters, and the longest socket
182+
# name generated by HAProxy uses the following format:
183+
# /var/run/synapse/sockets/service_name.namespace.LONGPID.sock.tmp
184+
# This naming scheme leaves 108 - 43 = 65 characters combined for the
185+
# Smartstack service name and namespace. We leave a generous buffer
186+
# to arrive at a maximum name length of 55, in case e.g. the .sock
187+
# suffix is renamed to a longer name for certain sockets.
188+
# See SMTSTK-204 for more context.
189+
# NOTE: the above is mostly still true - but the path we use is now /var/run/envoy/sockets/...
190+
# so we may want to adjust this a tad in the future ;)
191+
MAX_SMARTSTACK_NAME_LEN = 55
192+
169193

170194
class ConditionConfig(TypedDict, total=False):
171195
"""
@@ -971,6 +995,190 @@ def validate_cpu_burst(service_path: str) -> bool:
971995
return returncode
972996

973997

998+
def _check_smartstack_name_length_envoy(service: str, namespace: str) -> None:
999+
"""Ensures that Smartstack service name and namespace does not
1000+
exceed the limit on the length of Envoy's listener names
1001+
"""
1002+
if len(service) + len(namespace) > MAX_ENVOY_NAME_LEN:
1003+
raise ValueError(
1004+
"Service name and namespace exceeds max listener name length in Envoy. Note that the full listener name "
1005+
'is "{}.{}.listener". Please rename so that the combined length of the service name and namespace does '
1006+
"not exceed {} characters".format(service, namespace, MAX_ENVOY_NAME_LEN),
1007+
)
1008+
1009+
1010+
def _check_smartstack_name_length(service: str, namespace: str) -> None:
1011+
"""Ensure that Smartstack name does not
1012+
exceed limits on HAProxy socket name
1013+
"""
1014+
if len(service + namespace) > MAX_SMARTSTACK_NAME_LEN:
1015+
socket_name = "/var/run/synapse/sockets/{}.{}.sock.LONGPID.tmp".format(
1016+
service,
1017+
namespace,
1018+
)
1019+
raise ValueError(
1020+
"Name exceeds max socket name length. Note that the full socket name under the HAProxy naming scheme "
1021+
'is "{}". Please rename so that the combined length of the service name and namespace does not '
1022+
"exceed {} characters".format(socket_name, MAX_SMARTSTACK_NAME_LEN),
1023+
)
1024+
1025+
1026+
@lru_cache()
1027+
def _get_etc_services() -> list[str]:
1028+
with open("/etc/services") as f:
1029+
return f.read().splitlines()
1030+
1031+
1032+
@lru_cache()
1033+
def _get_etc_services_entry(port_lookup: int) -> str | None:
1034+
entries = _get_etc_services()
1035+
for entry in entries:
1036+
try:
1037+
service = entry.split()[0]
1038+
port = entry.split()[1]
1039+
if port.startswith("%s/" % str(port_lookup)):
1040+
return service
1041+
except IndexError:
1042+
continue
1043+
return None
1044+
1045+
1046+
def _check_proxy_port_in_use(service: str, namespace: str, port: int) -> bool:
1047+
if port is None:
1048+
return False
1049+
1050+
# TODO(luisp): this should probably check the distributed /nail/etc/services
1051+
# smartstack.yamls OR we should more automatically manage /etc/services
1052+
etc_services_entry = _get_etc_services_entry(port)
1053+
if etc_services_entry is None:
1054+
return False
1055+
elif f"{service}.{namespace}" == etc_services_entry:
1056+
return False
1057+
else:
1058+
raise ValueError(
1059+
(
1060+
"port {} is already in use by {} according to /etc/services, it cannot be used by "
1061+
"{}.{}. Please either pick a different port or update /etc/services via puppet"
1062+
).format(port, etc_services_entry, service, namespace),
1063+
)
1064+
1065+
1066+
def _check_advertise_discover(
1067+
smartstack_data: dict[str, Any]
1068+
) -> None: # XXX: we should use a TypedDict here
1069+
"""Need to ensure a few properties about smartstack files
1070+
1) discover is a member of advertise
1071+
2) discovery and advertise contain valid locations
1072+
3) extra_advertise contains valid locations
1073+
4) rhs of extra_advertise are >= discover type
1074+
"""
1075+
1076+
def assert_valid_type(location_type: str) -> None:
1077+
if location_type not in available_location_types():
1078+
raise ValueError(
1079+
'Location type "{}" not a valid Yelp location type'.format(
1080+
location_type,
1081+
),
1082+
)
1083+
1084+
def assert_valid_location(location_string: str) -> None:
1085+
try:
1086+
typ, loc = location_string.split(":")
1087+
assert len(convert_location_type(loc, typ, typ)) == 1
1088+
except Exception:
1089+
raise ValueError(
1090+
'Location string "{}" not a valid Yelp location'.format(
1091+
location_string,
1092+
),
1093+
)
1094+
1095+
advertise = smartstack_data.get("advertise", ["region"])
1096+
discover = smartstack_data.get("discover", "region")
1097+
if discover not in advertise:
1098+
raise ValueError(
1099+
'discover key "{}" not a member of advertise "{}"'.format(
1100+
discover,
1101+
advertise,
1102+
),
1103+
)
1104+
for location_type in [discover] + advertise:
1105+
assert_valid_type(location_type)
1106+
1107+
extra_advertisements = smartstack_data.get("extra_advertise", {})
1108+
for source, destinations in extra_advertisements.items():
1109+
assert_valid_location(source)
1110+
for destination in destinations:
1111+
assert_valid_location(destination)
1112+
dest_type = destination.split(":")[0]
1113+
if compare_types(dest_type, discover) > 0:
1114+
raise ValueError(
1115+
'Right hand side "{}" less general than discover type "{}". Your advertisement would potentially '
1116+
"result in more hosts seeing your service than intended. Please change the type of your RHS to be "
1117+
">= the discover type".format(destination, discover),
1118+
)
1119+
1120+
1121+
def _check_smartstack_valid_proxy(proxied_through: str) -> None:
1122+
"""Checks whether its parameter is a valid Smartstack namespace. Can be used for proxied_through or clb_proxy."""
1123+
proxy_service, proxy_namespace = proxied_through.split(".")
1124+
proxy_smartstack_filename = f"{proxy_service}/smartstack.yaml"
1125+
try:
1126+
yaml_data = get_config_file_dict(proxy_smartstack_filename)
1127+
if proxy_namespace not in yaml_data:
1128+
raise ValueError(
1129+
f"{proxied_through} is not a valid Smartstack namespace to proxy through: "
1130+
f"{proxy_namespace} not found in {proxy_smartstack_filename}.",
1131+
)
1132+
except FileNotFoundError:
1133+
raise ValueError(
1134+
f"{proxied_through} is not a valid Smartstack namespace to proxy through: "
1135+
f"{proxy_smartstack_filename} does not exist.",
1136+
)
1137+
1138+
1139+
def _check_smartstack_proxied_through(
1140+
smartstack_data: dict[str, Any]
1141+
) -> None: # XXX: we should use a TypedDict here
1142+
"""Checks the proxied_through field of a Smartstack namespace refers to another valid Smartstack namespace"""
1143+
if "proxied_through" not in smartstack_data:
1144+
return
1145+
1146+
proxied_through = smartstack_data["proxied_through"]
1147+
_check_smartstack_valid_proxy(proxied_through)
1148+
1149+
1150+
def validate_smartstack(service_path: str) -> bool:
1151+
if not os.path.exists(os.path.join(service_path, "smartstack.yaml")):
1152+
# not every service is mesh-registered, exit early if this is the case
1153+
return True
1154+
1155+
config = get_config_file_dict(os.path.join(service_path, "smartstack.yaml"))
1156+
if not config:
1157+
print(
1158+
failure(
1159+
"smartstack.yaml is empty - if this service is not mesh-registed, please remove this file.",
1160+
"http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
1161+
)
1162+
)
1163+
return False
1164+
1165+
_, service = path_to_soa_dir_service(service_path)
1166+
for namespace, namespace_config in config.items():
1167+
# XXX(luisp): these should all really either return bools or be enforced in the schema
1168+
# ...i'm mostly leaving these as-is since i'm trying to remove some internal code that
1169+
# duplicates a bunch of paasta validate checks (i.e., py-gitolite)
1170+
_check_smartstack_name_length_envoy(service, namespace)
1171+
if namespace_config["proxy_port"]:
1172+
_check_smartstack_name_length(service, namespace)
1173+
proxy_port = namespace_config["proxy_port"]
1174+
_check_proxy_port_in_use(service, namespace, proxy_port)
1175+
_check_advertise_discover(namespace_config)
1176+
_check_smartstack_proxied_through(namespace_config)
1177+
1178+
print(success("All SmartStack configs are valid"))
1179+
return True
1180+
1181+
9741182
def paasta_validate_soa_configs(
9751183
service: str, service_path: str, verbose: bool = False
9761184
) -> bool:
@@ -993,6 +1201,7 @@ def paasta_validate_soa_configs(
9931201
validate_secrets,
9941202
validate_min_max_instances,
9951203
validate_cpu_burst,
1204+
validate_smartstack,
9961205
]
9971206

9981207
# NOTE: we're explicitly passing a list comprehension to all()
@@ -1006,7 +1215,12 @@ def paasta_validate(args):
10061215
10071216
:param args: argparse.Namespace obj created from sys.args by cli
10081217
"""
1009-
service_path = get_service_path(args.service, args.yelpsoa_config_root)
10101218
service = args.service or guess_service_name()
1219+
service_path = get_service_path(service, args.yelpsoa_config_root)
1220+
1221+
# not much we can do if we have no path to inspect ;)
1222+
if not service_path:
1223+
return 1
1224+
10111225
if not paasta_validate_soa_configs(service, service_path, args.verbose):
10121226
return 1

paasta_tools/cli/schemas/smartstack_schema.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@
6161
"minimum": 0,
6262
"maximum": 60000
6363
},
64+
"timeout_client_ms": {
65+
"type": "integer",
66+
"minimum": 0,
67+
"maximum": 86400000
68+
},
6469
"timeout_server_ms": {
6570
"type": "integer",
6671
"minimum": 0,
@@ -116,6 +121,7 @@
116121
"mode": {
117122
"enum": [
118123
"http",
124+
"http2",
119125
"https",
120126
"tcp"
121127
]
@@ -130,6 +136,10 @@
130136
"CLUSTER_PROVIDED"
131137
]
132138
},
139+
"choice_count": {
140+
"type": "integer",
141+
"minimum": 1
142+
},
133143
"chaos": {
134144
"type": "object",
135145
"additionalProperties": {
@@ -154,6 +164,9 @@
154164
"proxied_through": {
155165
"type": "string"
156166
},
167+
"clb_proxy": {
168+
"type": "string"
169+
},
157170
"fixed_delay": {
158171
"type": "object",
159172
"additionalProperties": {

0 commit comments

Comments
 (0)