Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrote protobuf generation scripts in Python #12527

Merged
merged 14 commits into from
Sep 19, 2024
Merged
4 changes: 4 additions & 0 deletions requirements-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ ruff==0.5.4 # must match .pre-commit-config.yaml

# Libraries used by our various scripts.
aiohttp==3.10.2
# grpc install only fails on Windows, but let's avoid building sdist on other platforms
# https://github.com/grpc/grpc/issues/36201
grpcio-tools; python_version < "3.13"
Comment on lines +17 to +19
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy-protobuf==3.6.0
packaging==24.1
pathspec>=0.11.1
pre-commit
Expand Down
76 changes: 0 additions & 76 deletions scripts/generate_proto_stubs.sh

This file was deleted.

66 changes: 66 additions & 0 deletions scripts/sync_protobuf/_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

import subprocess
import sys
from http.client import HTTPResponse
from pathlib import Path
from typing import TYPE_CHECKING, Iterable
from urllib.request import urlopen
Copy link
Collaborator Author

@Avasam Avasam Aug 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm purposefully avoiding requests here, as to not add requests and types-requests in requirements-tests.txt

from zipfile import ZipFile

import tomlkit
from mypy_protobuf.main import ( # type: ignore[import-untyped] # pyright: ignore[reportMissingTypeStubs]
__version__ as mypy_protobuf__version__,
)

if TYPE_CHECKING:
from _typeshed import StrOrBytesPath, StrPath

REPO_ROOT = Path(__file__).absolute().parent.parent.parent
MYPY_PROTOBUF_VERSION = mypy_protobuf__version__


def download_file(url: str, destination: StrPath) -> None:
print(f"Downloading '{url}' to '{destination}'")
resp: HTTPResponse = urlopen(url)
if resp.getcode() != 200:
raise RuntimeError(f"Error downloading {url}")
with open(destination, "wb") as file:
file.write(resp.read())


def extract_archive(archive_path: StrPath, destination: StrPath) -> None:
print(f"Extracting '{archive_path}' to '{destination}'")
with ZipFile(archive_path) as file_in:
file_in.extractall(destination)


def update_metadata(metadata_folder: StrPath, new_extra_description: str) -> None:
metadata_path = Path(metadata_folder) / "METADATA.toml"
with open(metadata_path) as file:
metadata = tomlkit.load(file)
metadata["extra_description"] = new_extra_description
with open(metadata_path, "w") as file:
# tomlkit.dump has partially unknown IO type
tomlkit.dump(metadata, file) # pyright: ignore[reportUnknownMemberType]
print(f"Updated {metadata_path}")


def run_protoc(
proto_paths: Iterable[StrPath], mypy_out: StrPath, proto_globs: Iterable[str], cwd: StrOrBytesPath | None = None
) -> str:
"""TODO: Describe parameters and return"""
protoc_version = (
subprocess.run([sys.executable, "-m", "grpc_tools.protoc", "--version"], capture_output=True).stdout.decode().strip()
)
print()
print(protoc_version)
protoc_args = [
*[f"--proto_path={proto_path}" for proto_path in proto_paths],
"--mypy_out",
f"relax_strict_optional_primitives:{mypy_out}",
*proto_globs,
]
print("Running: protoc\n " + "\n ".join(protoc_args) + "\n")
subprocess.run((sys.executable, "-m", "grpc_tools.protoc", *protoc_args), cwd=cwd, check=True)
return protoc_version
91 changes: 91 additions & 0 deletions scripts/sync_protobuf/google_protobuf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Generates the protobuf stubs for the given protobuf version using mypy-protobuf.
Generally, new minor versions are a good time to update the stubs.
"""

from __future__ import annotations

import json
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path

from _helpers import MYPY_PROTOBUF_VERSION, REPO_ROOT, download_file, extract_archive, run_protoc, update_metadata

# Whenever you update PACKAGE_VERSION here, version should be updated
# in stubs/protobuf/METADATA.toml and vice-versa.
PACKAGE_VERSION = "27.1"

STUBS_FOLDER = REPO_ROOT / "stubs" / "protobuf"
ARCHIVE_FILENAME = f"protobuf-{PACKAGE_VERSION}.zip"
ARCHIVE_URL = f"https://github.com/protocolbuffers/protobuf/releases/download/v{PACKAGE_VERSION}/{ARCHIVE_FILENAME}"
EXTRACTED_PACKAGE_DIR = f"protobuf-{PACKAGE_VERSION}"

VERSION_PATTERN = re.compile(r'def game_version\(\):\n return "(.+?)"')
PROTO_FILE_PATTERN = re.compile(r'"//:(.*)_proto"')


def extract_python_version(file_path: Path) -> str:
"""Extract the Python version from https://github.com/protocolbuffers/protobuf/blob/main/version.json"""
with open(file_path) as file:
data: dict[str, dict[str, dict[str, str]]] = json.load(file)
Avasam marked this conversation as resolved.
Show resolved Hide resolved
# The root key will be the protobuf source code version
return next(iter(data.values()))["languages"]["python"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to see some validation of the version, considering its coming from an outside source. Something like:

Suggested change
data: dict[str, dict[str, dict[str, str]]] = json.load(file)
# The root key will be the protobuf source code version
return next(iter(data.values()))["languages"]["python"]
data = json.load(file)
# The root key will be the protobuf source code version
version = next(iter(data.values()))["languages"]["python"]
assert isinstance(version, str)
assert re.fullmatch(r"...", version) # proper re here
return version

This way we're also sure (at runtime) that version has the correct type and format.

Copy link
Collaborator Author

@Avasam Avasam Sep 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like validating the version string is unnecessary extra work. If they somehow write an invalid Python version, our script doesn't need to fail. We're not doing anything with it other than displaying it. Proper validation should probably use a Python packaging library (I don't remember which).
The str assertion I still find valuable in case protobuff changes the structure of that file and the value becomes an object (dict)



def extract_proto_file_paths(temp_dir: Path) -> list[str]:
"""
Roughly reproduce the subset of .proto files on the public interface
as described in py_proto_library calls in
https://github.com/protocolbuffers/protobuf/blob/main/python/dist/BUILD.bazel
"""
with open(temp_dir / EXTRACTED_PACKAGE_DIR / "python" / "dist" / "BUILD.bazel") as file:
matched_lines = filter(None, (re.search(PROTO_FILE_PATTERN, line) for line in file.readlines()))
Avasam marked this conversation as resolved.
Show resolved Hide resolved
proto_files = [
EXTRACTED_PACKAGE_DIR + "/src/google/protobuf/" + match.group(1).replace("compiler_", "compiler/") + ".proto"
for match in matched_lines
]
return proto_files


def main() -> None:
temp_dir = Path(tempfile.mkdtemp())
# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)

# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()

PROTOC_VERSION = run_protoc(
proto_paths=(f"{EXTRACTED_PACKAGE_DIR}/src",),
mypy_out=STUBS_FOLDER,
proto_globs=extract_proto_file_paths(temp_dir),
cwd=temp_dir,
)

PYTHON_PROTOBUF_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "version.json")

# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)
Comment on lines +58 to +78
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure the temp directory is always cleaned up:

Suggested change
temp_dir = Path(tempfile.mkdtemp())
# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)
# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()
PROTOC_VERSION = run_protoc(
proto_paths=(f"{EXTRACTED_PACKAGE_DIR}/src",),
mypy_out=STUBS_FOLDER,
proto_globs=extract_proto_file_paths(temp_dir),
cwd=temp_dir,
)
PYTHON_PROTOBUF_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "version.json")
# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)
with tempfile.TemporaryDirectory() as td:
temp_dir = Path(td)
# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)
# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()
PROTOC_VERSION = run_protoc(
proto_paths=(f"{EXTRACTED_PACKAGE_DIR}/src",),
mypy_out=STUBS_FOLDER,
proto_globs=extract_proto_file_paths(temp_dir),
cwd=temp_dir,
)
PYTHON_PROTOBUF_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "version.json")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did that originally, but it was more annoying to comment out for debugging purposes. Maybe I could do like #12151


update_metadata(
STUBS_FOLDER,
f"""Partially generated using \
[mypy-protobuf=={MYPY_PROTOBUF_VERSION}](https://github.com/nipunn1313/mypy-protobuf/tree/v{MYPY_PROTOBUF_VERSION}) \
and {PROTOC_VERSION} on \
[protobuf v{PACKAGE_VERSION}](https://github.com/protocolbuffers/protobuf/releases/tag/v{PACKAGE_VERSION}) \
(python `protobuf=={PYTHON_PROTOBUF_VERSION}`).""",
)

# Run pre-commit to cleanup the stubs
subprocess.run((sys.executable, "-m", "pre_commit", "run", "--files", *STUBS_FOLDER.rglob("*_pb2.pyi")))


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions scripts/sync_protobuf/s2clientprotocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Generates the protobuf stubs for the given s2clientprotocol version using mypy-protobuf.
Generally, new minor versions are a good time to update the stubs.
"""

from __future__ import annotations

import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path

from _helpers import MYPY_PROTOBUF_VERSION, REPO_ROOT, download_file, extract_archive, run_protoc, update_metadata

# Whenever you update PACKAGE_VERSION here, version should be updated
# in stubs/s2clientprotocol/METADATA.toml and vice-versa.
PACKAGE_VERSION = "c04df4adbe274858a4eb8417175ee32ad02fd609"

STUBS_FOLDER = REPO_ROOT / "stubs" / "s2clientprotocol"
ARCHIVE_FILENAME = f"{PACKAGE_VERSION}.zip"
ARCHIVE_URL = f"https://github.com/Blizzard/s2client-proto/archive/{ARCHIVE_FILENAME}"
EXTRACTED_PACKAGE_DIR = f"s2client-proto-{PACKAGE_VERSION}"

VERSION_PATTERN = re.compile(r'def game_version\(\):\n return "(.+?)"')


def extract_python_version(file_path: Path) -> str:
"""Extract Python version from s2clientprotocol's build file"""
match = re.search(VERSION_PATTERN, file_path.read_text())
assert match
return match.group(1)


def main() -> None:
temp_dir = Path(tempfile.mkdtemp())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above.

# Fetch s2clientprotocol (which contains all the .proto files)
archive_path = temp_dir / ARCHIVE_FILENAME
download_file(ARCHIVE_URL, archive_path)
extract_archive(archive_path, temp_dir)

# Remove existing pyi
for old_stub in STUBS_FOLDER.rglob("*_pb2.pyi"):
old_stub.unlink()

PROTOC_VERSION = run_protoc(
proto_paths=(EXTRACTED_PACKAGE_DIR,),
mypy_out=STUBS_FOLDER,
proto_globs=(f"{EXTRACTED_PACKAGE_DIR}/s2clientprotocol/*.proto",),
cwd=temp_dir,
)

PYTHON_S2_CLIENT_PROTO_VERSION = extract_python_version(temp_dir / EXTRACTED_PACKAGE_DIR / "s2clientprotocol" / "build.py")

# Cleanup after ourselves, this is a temp dir, but it can still grow fast if run multiple times
shutil.rmtree(temp_dir)

update_metadata(
STUBS_FOLDER,
f"""Partially generated using \
[mypy-protobuf=={MYPY_PROTOBUF_VERSION}](https://github.com/nipunn1313/mypy-protobuf/tree/v{MYPY_PROTOBUF_VERSION}) \
and {PROTOC_VERSION} on \
[s2client-proto {PYTHON_S2_CLIENT_PROTO_VERSION}](https://github.com/Blizzard/s2client-proto/tree/{PACKAGE_VERSION}).""",
)

# Run pre-commit to cleanup the stubs
subprocess.run((sys.executable, "-m", "pre_commit", "run", "--files", *STUBS_FOLDER.rglob("*_pb2.pyi")))


if __name__ == "__main__":
main()
Loading