From 3bd96a9a52f697f35a7722039cb42718cb60ec36 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 30 May 2024 13:05:14 +0900 Subject: [PATCH] Revert "feat(gazelle): pure golang helper (#1895)" This reverts commit 7fc79626b82d0ff3bb57584de2de9d03ddbbb3c4. Fixes #1913 --- CHANGELOG.md | 4 - gazelle/BUILD.bazel | 10 +- gazelle/MODULE.bazel | 18 -- gazelle/WORKSPACE | 9 +- gazelle/deps.bzl | 144 +++------------ gazelle/go.mod | 8 +- gazelle/go.sum | 22 +-- gazelle/python/BUILD.bazel | 69 ++++--- gazelle/python/__main__.py | 32 ++++ gazelle/python/extensions.bzl | 5 - gazelle/python/file_parser.go | 201 -------------------- gazelle/python/file_parser_test.go | 256 -------------------------- gazelle/python/language.go | 1 + gazelle/python/lifecycle.go | 63 +++++++ gazelle/python/parse.py | 147 +++++++++++++++ gazelle/python/parse_test.py | 41 +++++ gazelle/python/parser.go | 114 +++++++++--- gazelle/python/private/BUILD.bazel | 0 gazelle/python/private/extensions.bzl | 9 - gazelle/python/python_test.go | 14 +- gazelle/python/resolve.go | 6 +- gazelle/python/std_modules.go | 89 +++++++-- gazelle/python/std_modules.py | 51 +++++ gazelle/python/std_modules_test.go | 27 --- 24 files changed, 592 insertions(+), 748 deletions(-) create mode 100644 gazelle/python/__main__.py delete mode 100644 gazelle/python/extensions.bzl delete mode 100644 gazelle/python/file_parser.go delete mode 100644 gazelle/python/file_parser_test.go create mode 100644 gazelle/python/lifecycle.go create mode 100644 gazelle/python/parse.py create mode 100644 gazelle/python/parse_test.py delete mode 100644 gazelle/python/private/BUILD.bazel delete mode 100644 gazelle/python/private/extensions.bzl create mode 100644 gazelle/python/std_modules.py delete mode 100644 gazelle/python/std_modules_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index e15be3bbcc..e2eeb6fd55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,10 +31,6 @@ A brief description of the categories of changes: marked as `reproducible` and will not include any lock file entries from now on. -* (gazelle): Remove gazelle plugin's python deps and make it hermetic. - Introduced a new Go-based helper leveraging tree-sitter for syntax analysis. - Implemented the use of `pypi/stdlib-list` for standard library module verification. - ### Fixed * (gazelle) Remove `visibility` from `NonEmptyAttr`. Now empty(have no `deps/main/srcs/imports` attr) `py_library/test/binary` rules will diff --git a/gazelle/BUILD.bazel b/gazelle/BUILD.bazel index f74338d4b5..e00c74a444 100644 --- a/gazelle/BUILD.bazel +++ b/gazelle/BUILD.bazel @@ -1,4 +1,4 @@ -load("@bazel_gazelle//:def.bzl", "gazelle") +load("@bazel_gazelle//:def.bzl", "DEFAULT_LANGUAGES", "gazelle", "gazelle_binary") # Gazelle configuration options. # See https://github.com/bazelbuild/bazel-gazelle#running-gazelle-with-bazel @@ -6,13 +6,19 @@ load("@bazel_gazelle//:def.bzl", "gazelle") # gazelle:exclude bazel-out gazelle( name = "gazelle", + gazelle = ":gazelle_binary", +) + +gazelle_binary( + name = "gazelle_binary", + languages = DEFAULT_LANGUAGES + ["//python"], ) gazelle( name = "gazelle_update_repos", args = [ "-from_file=go.mod", - "-to_macro=deps.bzl%go_deps", + "-to_macro=deps.bzl%gazelle_deps", "-prune", ], command = "update-repos", diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel index 1829d248b2..6ae7719d4b 100644 --- a/gazelle/MODULE.bazel +++ b/gazelle/MODULE.bazel @@ -9,11 +9,6 @@ bazel_dep(name = "rules_python", version = "0.18.0") bazel_dep(name = "rules_go", version = "0.41.0", repo_name = "io_bazel_rules_go") bazel_dep(name = "gazelle", version = "0.33.0", repo_name = "bazel_gazelle") -local_path_override( - module_name = "rules_python", - path = "..", -) - go_deps = use_extension("@bazel_gazelle//:extensions.bzl", "go_deps") go_deps.from_file(go_mod = "//:go.mod") use_repo( @@ -22,18 +17,5 @@ use_repo( "com_github_bmatcuk_doublestar_v4", "com_github_emirpasic_gods", "com_github_ghodss_yaml", - "com_github_smacker_go_tree_sitter", - "com_github_stretchr_testify", "in_gopkg_yaml_v2", - "org_golang_x_sync", -) - -python_stdlib_list = use_extension("//python:extensions.bzl", "python_stdlib_list") -use_repo( - python_stdlib_list, - "python_stdlib_list_3_10", - "python_stdlib_list_3_11", - "python_stdlib_list_3_12", - "python_stdlib_list_3_8", - "python_stdlib_list_3_9", ) diff --git a/gazelle/WORKSPACE b/gazelle/WORKSPACE index d9f0645071..df2883fd08 100644 --- a/gazelle/WORKSPACE +++ b/gazelle/WORKSPACE @@ -34,11 +34,16 @@ local_repository( path = "..", ) -load("@rules_python//python:repositories.bzl", "py_repositories") +load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") py_repositories() +python_register_toolchains( + name = "python_3_11", + python_version = "3.11", +) + load("//:deps.bzl", _py_gazelle_deps = "gazelle_deps") -# gazelle:repository_macro deps.bzl%go_deps +# gazelle:repository_macro deps.bzl%gazelle_deps _py_gazelle_deps() diff --git a/gazelle/deps.bzl b/gazelle/deps.bzl index f4f4c24fc7..d9d38810be 100644 --- a/gazelle/deps.bzl +++ b/gazelle/deps.bzl @@ -14,54 +14,13 @@ "This file managed by `bazel run //:gazelle_update_repos`" -load( - "@bazel_gazelle//:deps.bzl", - _go_repository = "go_repository", -) -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") +load("@bazel_gazelle//:deps.bzl", _go_repository = "go_repository") def go_repository(name, **kwargs): if name not in native.existing_rules(): _go_repository(name = name, **kwargs) -def python_stdlib_list_deps(): - "Fetch python stdlib list dependencies" - http_file( - name = "python_stdlib_list_3_8", - sha256 = "ee6dc367011ff298b906dbaab408940aa57086d5f8f47278f4b7523b9aa13ae3", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.8.txt", - downloaded_file_path = "3.8.txt", - ) - http_file( - name = "python_stdlib_list_3_9", - sha256 = "a4340e5ffe2e75bb18f548028cef6e6ac15384c44ae0a776e04dd869da1d1fd7", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.9.txt", - downloaded_file_path = "3.9.txt", - ) - http_file( - name = "python_stdlib_list_3_10", - sha256 = "0b867738b78ac98944237de2600093a1c6ef259d1810017e46f01a29f3d199e7", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.10.txt", - downloaded_file_path = "3.10.txt", - ) - http_file( - name = "python_stdlib_list_3_11", - sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt", - downloaded_file_path = "3.11.txt", - ) - http_file( - name = "python_stdlib_list_3_12", - sha256 = "6d3d53194218b43ee1d04bf9a4f0b6a9309bb59cdcaddede7d9cfe8b6835d34a", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.12.txt", - downloaded_file_path = "3.12.txt", - ) - def gazelle_deps(): - go_deps() - python_stdlib_list_deps() - -def go_deps(): "Fetch go dependencies" go_repository( name = "co_honnef_go_tools", @@ -69,25 +28,13 @@ def go_deps(): sum = "h1:/hemPrYIhOhy8zYrNj+069zDB68us2sMGsfkFJO0iZs=", version = "v0.0.0-20190523083050-ea95bdfd59fc", ) - go_repository( - name = "com_github_bazelbuild_bazel_gazelle", - importpath = "github.com/bazelbuild/bazel-gazelle", - sum = "h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg=", - version = "v0.31.1", - ) go_repository( name = "com_github_bazelbuild_buildtools", build_naming_convention = "go_default_library", importpath = "github.com/bazelbuild/buildtools", - sum = "h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw=", - version = "v0.0.0-20231103205921-433ea8554e82", - ) - go_repository( - name = "com_github_bazelbuild_rules_go", - importpath = "github.com/bazelbuild/rules_go", - sum = "h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k=", - version = "v0.41.0", + sum = "h1:jhiMzJ+8unnLRtV8rpbWBFE9pFNzIqgUTyZU5aA++w8=", + version = "v0.0.0-20221004120235-7186f635531b", ) go_repository( @@ -133,13 +80,6 @@ def go_deps(): sum = "h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=", version = "v0.3.4", ) - go_repository( - name = "com_github_davecgh_go_spew", - importpath = "github.com/davecgh/go-spew", - sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", - version = "v1.1.1", - ) - go_repository( name = "com_github_emirpasic_gods", importpath = "github.com/emirpasic/gods", @@ -158,12 +98,6 @@ def go_deps(): sum = "h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A=", version = "v0.1.0", ) - go_repository( - name = "com_github_fsnotify_fsnotify", - importpath = "github.com/fsnotify/fsnotify", - sum = "h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=", - version = "v1.6.0", - ) go_repository( name = "com_github_ghodss_yaml", @@ -180,14 +114,14 @@ def go_deps(): go_repository( name = "com_github_golang_mock", importpath = "github.com/golang/mock", - sum = "h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=", - version = "v1.6.0", + sum = "h1:G5FRp8JnTd7RQH5kemVNlMeyXQAztQ3mOWV95KxsXH8=", + version = "v1.1.1", ) go_repository( name = "com_github_golang_protobuf", importpath = "github.com/golang/protobuf", - sum = "h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=", - version = "v1.5.2", + sum = "h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM=", + version = "v1.4.3", ) go_repository( name = "com_github_google_go_cmp", @@ -195,12 +129,6 @@ def go_deps(): sum = "h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=", version = "v0.5.9", ) - go_repository( - name = "com_github_pmezard_go_difflib", - importpath = "github.com/pmezard/go-difflib", - sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", - version = "v1.0.0", - ) go_repository( name = "com_github_prometheus_client_model", @@ -208,25 +136,6 @@ def go_deps(): sum = "h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=", version = "v0.0.0-20190812154241-14fe0d1b01d4", ) - go_repository( - name = "com_github_smacker_go_tree_sitter", - importpath = "github.com/smacker/go-tree-sitter", - sum = "h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4=", - version = "v0.0.0-20240422154435-0628b34cbf9c", - ) - go_repository( - name = "com_github_stretchr_objx", - importpath = "github.com/stretchr/objx", - sum = "h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=", - version = "v0.5.2", - ) - go_repository( - name = "com_github_stretchr_testify", - importpath = "github.com/stretchr/testify", - sum = "h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=", - version = "v1.9.0", - ) - go_repository( name = "com_github_yuin_goldmark", importpath = "github.com/yuin/goldmark", @@ -251,13 +160,6 @@ def go_deps(): sum = "h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=", version = "v2.4.0", ) - go_repository( - name = "in_gopkg_yaml_v3", - importpath = "gopkg.in/yaml.v3", - sum = "h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=", - version = "v3.0.1", - ) - go_repository( name = "net_starlark_go", importpath = "go.starlark.net", @@ -279,14 +181,14 @@ def go_deps(): go_repository( name = "org_golang_google_grpc", importpath = "google.golang.org/grpc", - sum = "h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU=", - version = "v1.50.0", + sum = "h1:rRYRFMVgRv6E0D70Skyfsr28tDXIuuPZyWGMPdMcnXg=", + version = "v1.27.0", ) go_repository( name = "org_golang_google_protobuf", importpath = "google.golang.org/protobuf", - sum = "h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=", - version = "v1.28.0", + sum = "h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=", + version = "v1.25.0", ) go_repository( name = "org_golang_x_crypto", @@ -309,14 +211,14 @@ def go_deps(): go_repository( name = "org_golang_x_mod", importpath = "golang.org/x/mod", - sum = "h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=", - version = "v0.10.0", + sum = "h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s=", + version = "v0.6.0-dev.0.20220419223038-86c51ed26bb4", ) go_repository( name = "org_golang_x_net", importpath = "golang.org/x/net", - sum = "h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=", - version = "v0.10.0", + sum = "h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=", + version = "v0.0.0-20220722155237-a158d28d115b", ) go_repository( name = "org_golang_x_oauth2", @@ -327,20 +229,20 @@ def go_deps(): go_repository( name = "org_golang_x_sync", importpath = "golang.org/x/sync", - sum = "h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=", - version = "v0.2.0", + sum = "h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=", + version = "v0.0.0-20220722155255-886fb9371eb4", ) go_repository( name = "org_golang_x_sys", importpath = "golang.org/x/sys", - sum = "h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=", - version = "v0.8.0", + sum = "h1:k5II8e6QD8mITdi+okbbmR/cIyEbeXLBhy5Ha4nevyc=", + version = "v0.0.0-20221010170243-090e33056c14", ) go_repository( name = "org_golang_x_text", importpath = "golang.org/x/text", - sum = "h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=", - version = "v0.3.3", + sum = "h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=", + version = "v0.3.7", ) go_repository( name = "org_golang_x_tools", @@ -348,8 +250,8 @@ def go_deps(): "gazelle:exclude **/testdata/**/*", ], importpath = "golang.org/x/tools", - sum = "h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=", - version = "v0.9.1", + sum = "h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=", + version = "v0.1.12", ) go_repository( name = "org_golang_x_xerrors", diff --git a/gazelle/go.mod b/gazelle/go.mod index 4b65e71d67..b9b79ac7a2 100644 --- a/gazelle/go.mod +++ b/gazelle/go.mod @@ -4,23 +4,17 @@ go 1.19 require ( github.com/bazelbuild/bazel-gazelle v0.31.1 - github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82 + github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d github.com/bazelbuild/rules_go v0.41.0 github.com/bmatcuk/doublestar/v4 v4.6.1 github.com/emirpasic/gods v1.18.1 github.com/ghodss/yaml v1.0.0 - github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c - github.com/stretchr/testify v1.9.0 - golang.org/x/sync v0.2.0 gopkg.in/yaml.v2 v2.4.0 ) require ( - github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/go-cmp v0.5.9 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/mod v0.10.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/tools v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/gazelle/go.sum b/gazelle/go.sum index 46e0127e8f..fcfcb283ec 100644 --- a/gazelle/go.sum +++ b/gazelle/go.sum @@ -2,8 +2,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/bazelbuild/bazel-gazelle v0.31.1 h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg= github.com/bazelbuild/bazel-gazelle v0.31.1/go.mod h1:Ul0pqz50f5wxz0QNzsZ+mrEu4AVAVJZEB5xLnHgIG9c= -github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82 h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw= -github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo= +github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d h1:Fl1FfItZp34QIQmmDTbZXHB5XA6JfbNNfH7tRRGWvQo= +github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo= github.com/bazelbuild/rules_go v0.41.0 h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k= github.com/bazelbuild/rules_go v0.41.0/go.mod h1:TMHmtfpvyfsxaqfL9WnahCsXMWDMICTw7XeK9yVb+YU= github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= @@ -13,9 +13,6 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -41,17 +38,7 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4= -github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c/go.mod h1:q99oHDsbP0xRwmn7Vmob8gbSMNyvJ83OauXPSuHQuKE= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= go.starlark.net v0.0.0-20210223155950-e043a3d3c984/go.mod h1:t3mmBBPzAVvK0L0n1drDmrQsJ8FoIx4INCqVMTr/Zo0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -68,8 +55,6 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= -golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -105,8 +90,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel index 195c77623d..4cca8b31dc 100644 --- a/gazelle/python/BUILD.bazel +++ b/gazelle/python/BUILD.bazel @@ -1,31 +1,31 @@ load("@bazel_gazelle//:def.bzl", "gazelle_binary") -load("@bazel_skylib//rules:copy_file.bzl", "copy_file") -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@rules_python//python:defs.bzl", "py_binary", "py_test") load(":gazelle_test.bzl", "gazelle_test") go_library( name = "python", srcs = [ "configure.go", - "file_parser.go", "fix.go", "generate.go", "kinds.go", "language.go", + "lifecycle.go", "parser.go", "resolve.go", "std_modules.go", "target.go", ], # NOTE @aignas 2023-12-03: currently gazelle does not support embedding - # generated files, but 3.11.txt is generated by a build rule. + # generated files, but helper.zip is generated by a build rule. # # You will get a benign error like when running gazelle locally: - # > 8 gazelle: .../rules_python/gazelle/python/std_modules.go:24:3: pattern 3.11.txt: matched no files + # > 8 gazelle: .../rules_python/gazelle/python/lifecycle.go:26:3: pattern helper.zip: matched no files # # See following for more info: # https://github.com/bazelbuild/bazel-gazelle/issues/1513 - embedsrcs = ["stdlib_list.txt"], # keep # TODO: use user-defined version? + embedsrcs = [":helper.zip"], # keep importpath = "github.com/bazelbuild/rules_python/gazelle/python", visibility = ["//visibility:public"], deps = [ @@ -42,27 +42,35 @@ go_library( "@com_github_emirpasic_gods//lists/singlylinkedlist", "@com_github_emirpasic_gods//sets/treeset", "@com_github_emirpasic_gods//utils", - "@com_github_smacker_go_tree_sitter//:go-tree-sitter", - "@com_github_smacker_go_tree_sitter//python", - "@org_golang_x_sync//errgroup", ], ) -copy_file( - name = "stdlib_list", - src = select( - { - "@rules_python//python/config_settings:is_python_3.10": "@python_stdlib_list_3_10//file", - "@rules_python//python/config_settings:is_python_3.11": "@python_stdlib_list_3_11//file", - "@rules_python//python/config_settings:is_python_3.12": "@python_stdlib_list_3_12//file", - "@rules_python//python/config_settings:is_python_3.8": "@python_stdlib_list_3_8//file", - "@rules_python//python/config_settings:is_python_3.9": "@python_stdlib_list_3_9//file", - # This is the same behaviour as previously - "//conditions:default": "@python_stdlib_list_3_11//file", - }, - ), - out = "stdlib_list.txt", - allow_symlink = True, +py_binary( + name = "helper", + srcs = [ + "__main__.py", + "parse.py", + "std_modules.py", + ], + # This is to make sure that the current directory is added to PYTHONPATH + imports = ["."], + main = "__main__.py", + visibility = ["//visibility:public"], +) + +py_test( + name = "parse_test", + srcs = [ + "parse.py", + "parse_test.py", + ], + imports = ["."], +) + +filegroup( + name = "helper.zip", + srcs = [":helper"], + output_group = "python_zip_file", ) # gazelle:exclude testdata/ @@ -72,6 +80,7 @@ gazelle_test( srcs = ["python_test.go"], data = [ ":gazelle_binary", + ":helper", ], test_dirs = glob( # Use this so that we don't need to manually maintain the list. @@ -100,15 +109,3 @@ filegroup( srcs = glob(["**"]), visibility = ["//:__pkg__"], ) - -go_test( - name = "default_test", - srcs = [ - "file_parser_test.go", - "std_modules_test.go", - ], - embed = [":python"], - deps = [ - "@com_github_stretchr_testify//assert", - ], -) diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py new file mode 100644 index 0000000000..9974c66d13 --- /dev/null +++ b/gazelle/python/__main__.py @@ -0,0 +1,32 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# parse.py is a long-living program that communicates over STDIN and STDOUT. +# STDIN receives parse requests, one per line. It outputs the parsed modules and +# comments from all the files from each request. + +import sys + +import parse +import std_modules + +if __name__ == "__main__": + if len(sys.argv) < 2: + sys.exit("Please provide subcommand, either parse or std_modules") + if sys.argv[1] == "parse": + sys.exit(parse.main(sys.stdin, sys.stdout)) + elif sys.argv[1] == "std_modules": + sys.exit(std_modules.main(sys.stdin, sys.stdout)) + else: + sys.exit("Unknown subcommand: " + sys.argv[1]) diff --git a/gazelle/python/extensions.bzl b/gazelle/python/extensions.bzl deleted file mode 100644 index 8d339c0c7b..0000000000 --- a/gazelle/python/extensions.bzl +++ /dev/null @@ -1,5 +0,0 @@ -"python_stdlib_list module extension for use with bzlmod" - -load("//python/private:extensions.bzl", _python_stdlib_list = "python_stdlib_list") - -python_stdlib_list = _python_stdlib_list diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go deleted file mode 100644 index a2b22c2b8f..0000000000 --- a/gazelle/python/file_parser.go +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2023 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package python - -import ( - "context" - "fmt" - "os" - "path/filepath" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/python" -) - -const ( - sitterNodeTypeString = "string" - sitterNodeTypeComment = "comment" - sitterNodeTypeIdentifier = "identifier" - sitterNodeTypeDottedName = "dotted_name" - sitterNodeTypeIfStatement = "if_statement" - sitterNodeTypeAliasedImport = "aliased_import" - sitterNodeTypeWildcardImport = "wildcard_import" - sitterNodeTypeImportStatement = "import_statement" - sitterNodeTypeComparisonOperator = "comparison_operator" - sitterNodeTypeImportFromStatement = "import_from_statement" -) - -type ParserOutput struct { - FileName string - Modules []module - Comments []comment - HasMain bool -} - -type FileParser struct { - code []byte - relFilepath string - output ParserOutput -} - -func NewFileParser() *FileParser { - return &FileParser{} -} - -func ParseCode(code []byte) (*sitter.Node, error) { - parser := sitter.NewParser() - parser.SetLanguage(python.GetLanguage()) - - tree, err := parser.ParseCtx(context.Background(), nil, code) - if err != nil { - return nil, err - } - - return tree.RootNode(), nil -} - -func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { - for i := 0; i < int(node.ChildCount()); i++ { - if err := ctx.Err(); err != nil { - return false - } - child := node.Child(i) - if child.Type() == sitterNodeTypeIfStatement && - child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { - statement := child.Child(1) - a, b := statement.Child(0), statement.Child(2) - // convert "'__main__' == __name__" to "__name__ == '__main__'" - if b.Type() == sitterNodeTypeIdentifier { - a, b = b, a - } - if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && - // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) - // "__main__" is the second child of b. But now, it isn't. - // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. - // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 - b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { - return true - } - } - } - return false -} - -func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { - switch node.Type() { - case sitterNodeTypeDottedName: - return module{ - Name: node.Content(code), - LineNumber: node.StartPoint().Row + 1, - }, true - case sitterNodeTypeAliasedImport: - return parseImportStatement(node.Child(0), code) - case sitterNodeTypeWildcardImport: - return module{ - Name: "*", - LineNumber: node.StartPoint().Row + 1, - }, true - } - return module{}, false -} - -func (p *FileParser) parseImportStatements(node *sitter.Node) bool { - if node.Type() == sitterNodeTypeImportStatement { - for j := 1; j < int(node.ChildCount()); j++ { - m, ok := parseImportStatement(node.Child(j), p.code) - if !ok { - continue - } - m.Filepath = p.relFilepath - if strings.HasPrefix(m.Name, ".") { - continue - } - p.output.Modules = append(p.output.Modules, m) - } - } else if node.Type() == sitterNodeTypeImportFromStatement { - from := node.Child(1).Content(p.code) - if strings.HasPrefix(from, ".") { - return true - } - for j := 3; j < int(node.ChildCount()); j++ { - m, ok := parseImportStatement(node.Child(j), p.code) - if !ok { - continue - } - m.Filepath = p.relFilepath - m.From = from - m.Name = fmt.Sprintf("%s.%s", from, m.Name) - p.output.Modules = append(p.output.Modules, m) - } - } else { - return false - } - return true -} - -func (p *FileParser) parseComments(node *sitter.Node) bool { - if node.Type() == sitterNodeTypeComment { - p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) - return true - } - return false -} - -func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) { - p.code = code - p.relFilepath = filepath.Join(relPackagePath, filename) - p.output.FileName = filename -} - -func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { - if node == nil { - return - } - for i := 0; i < int(node.ChildCount()); i++ { - if err := ctx.Err(); err != nil { - return - } - child := node.Child(i) - if p.parseImportStatements(child) { - continue - } - if p.parseComments(child) { - continue - } - p.parse(ctx, child) - } -} - -func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { - rootNode, err := ParseCode(p.code) - if err != nil { - return nil, err - } - - p.output.HasMain = p.parseMain(ctx, rootNode) - - p.parse(ctx, rootNode) - return &p.output, nil -} - -func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) { - code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename)) - if err != nil { - return nil, err - } - p.SetCodeAndFile(code, relPackagePath, filename) - return p.Parse(ctx) -} diff --git a/gazelle/python/file_parser_test.go b/gazelle/python/file_parser_test.go deleted file mode 100644 index 3682cff753..0000000000 --- a/gazelle/python/file_parser_test.go +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright 2023 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package python - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestParseImportStatements(t *testing.T) { - t.Parallel() - units := []struct { - name string - code string - filepath string - result []module - }{ - { - name: "not has import", - code: "a = 1\nb = 2", - filepath: "", - result: nil, - }, - { - name: "has import", - code: "import unittest\nimport os.path\nfrom foo.bar import abc.xyz", - filepath: "abc.py", - result: []module{ - { - Name: "unittest", - LineNumber: 1, - Filepath: "abc.py", - From: "", - }, - { - Name: "os.path", - LineNumber: 2, - Filepath: "abc.py", - From: "", - }, - { - Name: "foo.bar.abc.xyz", - LineNumber: 3, - Filepath: "abc.py", - From: "foo.bar", - }, - }, - }, - { - name: "has import in def", - code: `def foo(): - import unittest -`, - filepath: "abc.py", - result: []module{ - { - Name: "unittest", - LineNumber: 2, - Filepath: "abc.py", - From: "", - }, - }, - }, - { - name: "invalid syntax", - code: "import os\nimport", - filepath: "abc.py", - result: []module{ - { - Name: "os", - LineNumber: 1, - Filepath: "abc.py", - From: "", - }, - }, - }, - { - name: "import as", - code: "import os as b\nfrom foo import bar as c# 123", - filepath: "abc.py", - result: []module{ - { - Name: "os", - LineNumber: 1, - Filepath: "abc.py", - From: "", - }, - { - Name: "foo.bar", - LineNumber: 2, - Filepath: "abc.py", - From: "foo", - }, - }, - }, - // align to https://docs.python.org/3/reference/simple_stmts.html#index-34 - { - name: "complex import", - code: "from unittest import *\nfrom foo import (bar as c, baz, qux as d)\nfrom . import abc", - result: []module{ - { - Name: "unittest.*", - LineNumber: 1, - From: "unittest", - }, - { - Name: "foo.bar", - LineNumber: 2, - From: "foo", - }, - { - Name: "foo.baz", - LineNumber: 2, - From: "foo", - }, - { - Name: "foo.qux", - LineNumber: 2, - From: "foo", - }, - }, - }, - } - for _, u := range units { - t.Run(u.name, func(t *testing.T) { - p := NewFileParser() - code := []byte(u.code) - p.SetCodeAndFile(code, "", u.filepath) - output, err := p.Parse(context.Background()) - assert.NoError(t, err) - assert.Equal(t, u.result, output.Modules) - }) - } -} - -func TestParseComments(t *testing.T) { - t.Parallel() - units := []struct { - name string - code string - result []comment - }{ - { - name: "not has comment", - code: "a = 1\nb = 2", - result: nil, - }, - { - name: "has comment", - code: "# a = 1\n# b = 2", - result: []comment{"# a = 1", "# b = 2"}, - }, - { - name: "has comment in if", - code: "if True:\n # a = 1\n # b = 2", - result: []comment{"# a = 1", "# b = 2"}, - }, - { - name: "has comment inline", - code: "import os# 123\nfrom pathlib import Path as b#456", - result: []comment{"# 123", "#456"}, - }, - } - for _, u := range units { - t.Run(u.name, func(t *testing.T) { - p := NewFileParser() - code := []byte(u.code) - p.SetCodeAndFile(code, "", "") - output, err := p.Parse(context.Background()) - assert.NoError(t, err) - assert.Equal(t, u.result, output.Comments) - }) - } -} - -func TestParseMain(t *testing.T) { - t.Parallel() - units := []struct { - name string - code string - result bool - }{ - { - name: "not has main", - code: "a = 1\nb = 2", - result: false, - }, - { - name: "has main in function", - code: `def foo(): - if __name__ == "__main__": - a = 3 -`, - result: false, - }, - { - name: "has main", - code: ` -import unittest - -from lib import main - - -class ExampleTest(unittest.TestCase): - def test_main(self): - self.assertEqual( - "", - main([["A", 1], ["B", 2]]), - ) - - -if __name__ == "__main__": - unittest.main() -`, - result: true, - }, - } - for _, u := range units { - t.Run(u.name, func(t *testing.T) { - p := NewFileParser() - code := []byte(u.code) - p.SetCodeAndFile(code, "", "") - output, err := p.Parse(context.Background()) - assert.NoError(t, err) - assert.Equal(t, u.result, output.HasMain) - }) - } -} - -func TestParseFull(t *testing.T) { - p := NewFileParser() - code := []byte(`from bar import abc`) - p.SetCodeAndFile(code, "foo", "a.py") - output, err := p.Parse(context.Background()) - assert.NoError(t, err) - assert.Equal(t, ParserOutput{ - Modules: []module{{Name: "bar.abc", LineNumber: 1, Filepath: "foo/a.py", From: "bar"}}, - Comments: nil, - HasMain: false, - FileName: "a.py", - }, *output) -} diff --git a/gazelle/python/language.go b/gazelle/python/language.go index 56eb97b043..568ac9225c 100644 --- a/gazelle/python/language.go +++ b/gazelle/python/language.go @@ -23,6 +23,7 @@ import ( type Python struct { Configurer Resolver + LifeCycleManager } // NewLanguage initializes a new Python that satisfies the language.Language diff --git a/gazelle/python/lifecycle.go b/gazelle/python/lifecycle.go new file mode 100644 index 0000000000..6d628e9137 --- /dev/null +++ b/gazelle/python/lifecycle.go @@ -0,0 +1,63 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + _ "embed" + "github.com/bazelbuild/bazel-gazelle/language" + "log" + "os" +) + +var ( + //go:embed helper.zip + helperZip []byte + helperPath string +) + +type LifeCycleManager struct { + language.BaseLifecycleManager + pyzFilePath string +} + +func (l *LifeCycleManager) Before(ctx context.Context) { + helperPath = os.Getenv("GAZELLE_PYTHON_HELPER") + if helperPath == "" { + pyzFile, err := os.CreateTemp("", "python_zip_") + if err != nil { + log.Fatalf("failed to write parser zip: %v", err) + } + defer pyzFile.Close() + helperPath = pyzFile.Name() + l.pyzFilePath = helperPath + if _, err := pyzFile.Write(helperZip); err != nil { + log.Fatalf("cannot write %q: %v", helperPath, err) + } + } + startParserProcess(ctx) + startStdModuleProcess(ctx) +} + +func (l *LifeCycleManager) DoneGeneratingRules() { + shutdownParserProcess() +} + +func (l *LifeCycleManager) AfterResolvingDeps(ctx context.Context) { + shutdownStdModuleProcess() + if l.pyzFilePath != "" { + os.Remove(l.pyzFilePath) + } +} diff --git a/gazelle/python/parse.py b/gazelle/python/parse.py new file mode 100644 index 0000000000..ea331bc23a --- /dev/null +++ b/gazelle/python/parse.py @@ -0,0 +1,147 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# parse.py is a long-living program that communicates over STDIN and STDOUT. +# STDIN receives parse requests, one per line. It outputs the parsed modules and +# comments from all the files from each request. + +import ast +import concurrent.futures +import json +import os +import platform +import sys +from io import BytesIO +from tokenize import COMMENT, NAME, OP, STRING, tokenize + + +def parse_import_statements(content, filepath): + modules = list() + tree = ast.parse(content, filename=filepath) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for subnode in node.names: + module = { + "name": subnode.name, + "lineno": node.lineno, + "filepath": filepath, + "from": "", + } + modules.append(module) + elif isinstance(node, ast.ImportFrom) and node.level == 0: + for subnode in node.names: + module = { + "name": f"{node.module}.{subnode.name}", + "lineno": node.lineno, + "filepath": filepath, + "from": node.module, + } + modules.append(module) + return modules + + +def parse_comments(content): + comments = list() + g = tokenize(BytesIO(content.encode("utf-8")).readline) + for toknum, tokval, _, _, _ in g: + if toknum == COMMENT: + comments.append(tokval) + return comments + + +def parse_main(content): + g = tokenize(BytesIO(content.encode("utf-8")).readline) + for token_type, token_val, start, _, _ in g: + if token_type != NAME or token_val != "if" or start[1] != 0: + continue + try: + token_type, token_val, start, _, _ = next(g) + if token_type != NAME or token_val != "__name__": + continue + token_type, token_val, start, _, _ = next(g) + if token_type != OP or token_val != "==": + continue + token_type, token_val, start, _, _ = next(g) + if token_type != STRING or token_val.strip("\"'") != "__main__": + continue + token_type, token_val, start, _, _ = next(g) + if token_type != OP or token_val != ":": + continue + return True + except StopIteration: + break + return False + + +def parse(repo_root, rel_package_path, filename): + rel_filepath = os.path.join(rel_package_path, filename) + abs_filepath = os.path.join(repo_root, rel_filepath) + with open(abs_filepath, "r") as file: + content = file.read() + # From simple benchmarks, 2 workers gave the best performance here. + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + modules_future = executor.submit( + parse_import_statements, content, rel_filepath + ) + comments_future = executor.submit(parse_comments, content) + main_future = executor.submit(parse_main, content) + modules = modules_future.result() + comments = comments_future.result() + has_main = main_future.result() + + output = { + "filename": filename, + "modules": modules, + "comments": comments, + "has_main": has_main, + } + return output + + +def create_main_executor(): + # We cannot use ProcessPoolExecutor on macOS, because the fork start method should be considered unsafe as it can + # lead to crashes of the subprocess as macOS system libraries may start threads. Meanwhile, the 'spawn' and + # 'forkserver' start methods generally cannot be used with “frozen” executables (i.e., Python zip file) on POSIX + # systems. Therefore, there is no good way to use ProcessPoolExecutor on macOS when we distribute this program with + # a zip file. + # Ref: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods + if platform.system() == "Darwin": + return concurrent.futures.ThreadPoolExecutor() + return concurrent.futures.ProcessPoolExecutor() + +def main(stdin, stdout): + with create_main_executor() as executor: + for parse_request in stdin: + parse_request = json.loads(parse_request) + repo_root = parse_request["repo_root"] + rel_package_path = parse_request["rel_package_path"] + filenames = parse_request["filenames"] + outputs = list() + if len(filenames) == 1: + outputs.append(parse(repo_root, rel_package_path, filenames[0])) + else: + futures = [ + executor.submit(parse, repo_root, rel_package_path, filename) + for filename in filenames + if filename != "" + ] + for future in concurrent.futures.as_completed(futures): + outputs.append(future.result()) + print(json.dumps(outputs), end="", file=stdout, flush=True) + stdout.buffer.write(bytes([0])) + stdout.flush() + + +if __name__ == "__main__": + exit(main(sys.stdin, sys.stdout)) diff --git a/gazelle/python/parse_test.py b/gazelle/python/parse_test.py new file mode 100644 index 0000000000..6d1fa49547 --- /dev/null +++ b/gazelle/python/parse_test.py @@ -0,0 +1,41 @@ +import unittest + +import parse + + +class TestParse(unittest.TestCase): + def test_not_has_main(self): + content = "a = 1\nb = 2" + self.assertFalse(parse.parse_main(content)) + + def test_has_main_in_function(self): + content = """ +def foo(): + if __name__ == "__main__": + a = 3 +""" + self.assertFalse(parse.parse_main(content)) + + def test_has_main(self): + content = """ +import unittest + +from lib import main + + +class ExampleTest(unittest.TestCase): + def test_main(self): + self.assertEqual( + "", + main([["A", 1], ["B", 2]]), + ) + + +if __name__ == "__main__": + unittest.main() +""" + self.assertTrue(parse.parse_main(content)) + + +if __name__ == "__main__": + unittest.main() diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go index 1b2a90dddf..184fad7c14 100644 --- a/gazelle/python/parser.go +++ b/gazelle/python/parser.go @@ -15,16 +15,65 @@ package python import ( + "bufio" "context" _ "embed" + "encoding/json" "fmt" + "io" + "log" + "os" + "os/exec" "strings" + "sync" "github.com/emirpasic/gods/sets/treeset" godsutils "github.com/emirpasic/gods/utils" - "golang.org/x/sync/errgroup" ) +var ( + parserCmd *exec.Cmd + parserStdin io.WriteCloser + parserStdout io.Reader + parserMutex sync.Mutex +) + +func startParserProcess(ctx context.Context) { + // due to #691, we need a system interpreter to boostrap, part of which is + // to locate the hermetic interpreter. + parserCmd = exec.CommandContext(ctx, "python3", helperPath, "parse") + parserCmd.Stderr = os.Stderr + + stdin, err := parserCmd.StdinPipe() + if err != nil { + log.Printf("failed to initialize parser: %v\n", err) + os.Exit(1) + } + parserStdin = stdin + + stdout, err := parserCmd.StdoutPipe() + if err != nil { + log.Printf("failed to initialize parser: %v\n", err) + os.Exit(1) + } + parserStdout = stdout + + if err := parserCmd.Start(); err != nil { + log.Printf("failed to initialize parser: %v\n", err) + os.Exit(1) + } +} + +func shutdownParserProcess() { + if err := parserStdin.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing parser: %v", err) + } + + if err := parserCmd.Wait(); err != nil { + log.Printf("failed to wait for parser: %v\n", err) + } +} + // python3Parser implements a parser for Python files that extracts the modules // as seen in the import statements. type python3Parser struct { @@ -61,36 +110,36 @@ func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, map[string // parse parses multiple Python files and returns the extracted modules from // the import statements as well as the parsed comments. func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) { + parserMutex.Lock() + defer parserMutex.Unlock() + modules := treeset.NewWith(moduleComparator) - g, ctx := errgroup.WithContext(context.Background()) - ch := make(chan struct{}, 6) // Limit the number of concurrent parses. - chRes := make(chan *ParserOutput, len(pyFilenames.Values())) - for _, v := range pyFilenames.Values() { - ch <- struct{}{} - g.Go(func(filename string) func() error { - return func() error { - defer func() { - <-ch - }() - res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename) - if err != nil { - return err - } - chRes <- res - return nil - } - }(v.(string))) + req := map[string]interface{}{ + "repo_root": p.repoRoot, + "rel_package_path": p.relPackagePath, + "filenames": pyFilenames.Values(), } - if err := g.Wait(); err != nil { - return nil, nil, nil, err + encoder := json.NewEncoder(parserStdin) + if err := encoder.Encode(&req); err != nil { + return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) } - close(ch) - close(chRes) - mainModules := make(map[string]*treeset.Set, len(chRes)) + + reader := bufio.NewReader(parserStdout) + data, err := reader.ReadBytes(0) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) + } + data = data[:len(data)-1] + var allRes []parserResponse + if err := json.Unmarshal(data, &allRes); err != nil { + return nil, nil, nil, fmt.Errorf("failed to parse: %w", err) + } + + mainModules := make(map[string]*treeset.Set, len(allRes)) allAnnotations := new(annotations) allAnnotations.ignore = make(map[string]struct{}) - for res := range chRes { + for _, res := range allRes { if res.HasMain { mainModules[res.FileName] = treeset.NewWith(moduleComparator) } @@ -145,6 +194,21 @@ func removeDupesFromStringTreeSetSlice(array []string) []string { return dedupe } +// parserResponse represents a response returned by the parser.py for a given +// parsed Python module. +type parserResponse struct { + // FileName of the parsed module + FileName string + // The modules depended by the parsed module. + Modules []module `json:"modules"` + // The comments contained in the parsed module. This contains the + // annotations as they are comments in the Python module. + Comments []comment `json:"comments"` + // HasMain indicates whether the Python module has `if __name == "__main__"` + // at the top level + HasMain bool `json:"has_main"` +} + // module represents a fully-qualified, dot-separated, Python module as seen on // the import statement, alongside the line number where it happened. type module struct { diff --git a/gazelle/python/private/BUILD.bazel b/gazelle/python/private/BUILD.bazel deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/gazelle/python/private/extensions.bzl b/gazelle/python/private/extensions.bzl deleted file mode 100644 index 5de071361c..0000000000 --- a/gazelle/python/private/extensions.bzl +++ /dev/null @@ -1,9 +0,0 @@ -"python_stdlib_list module extension for use with bzlmod" - -load("@bazel_skylib//lib:modules.bzl", "modules") -load("//:deps.bzl", "python_stdlib_list_deps") - -python_stdlib_list = modules.as_extension( - python_stdlib_list_deps, - doc = "This extension registers python stdlib list dependencies.", -) diff --git a/gazelle/python/python_test.go b/gazelle/python/python_test.go index dd8c2411f1..617b3f858e 100644 --- a/gazelle/python/python_test.go +++ b/gazelle/python/python_test.go @@ -31,6 +31,7 @@ import ( "time" "github.com/bazelbuild/bazel-gazelle/testtools" + "github.com/bazelbuild/rules_go/go/runfiles" "github.com/bazelbuild/rules_go/go/tools/bazel" "github.com/ghodss/yaml" ) @@ -41,8 +42,9 @@ const ( gazelleBinaryName = "gazelle_binary" ) +var gazellePath = mustFindGazelle() + func TestGazelleBinary(t *testing.T) { - gazellePath := mustFindGazelle() tests := map[string][]bazel.RunfileEntry{} runfiles, err := bazel.ListRunfiles() @@ -65,12 +67,13 @@ func TestGazelleBinary(t *testing.T) { if len(tests) == 0 { t.Fatal("no tests found") } + for testName, files := range tests { - testPath(t, gazellePath, testName, files) + testPath(t, testName, files) } } -func testPath(t *testing.T, gazellePath, name string, files []bazel.RunfileEntry) { +func testPath(t *testing.T, name string, files []bazel.RunfileEntry) { t.Run(name, func(t *testing.T) { t.Parallel() var inputs, goldens []testtools.FileSpec @@ -157,6 +160,11 @@ func testPath(t *testing.T, gazellePath, name string, files []bazel.RunfileEntry cmd.Stdout = &stdout cmd.Stderr = &stderr cmd.Dir = workspaceRoot + helperScript, err := runfiles.Rlocation("rules_python_gazelle_plugin/python/helper") + if err != nil { + t.Fatalf("failed to initialize Python helper: %v", err) + } + cmd.Env = append(os.Environ(), "GAZELLE_PYTHON_HELPER="+helperScript) if err := cmd.Run(); err != nil { var e *exec.ExitError if !errors.As(err, &e) { diff --git a/gazelle/python/resolve.go b/gazelle/python/resolve.go index ca306c3db8..f019a64c1a 100644 --- a/gazelle/python/resolve.go +++ b/gazelle/python/resolve.go @@ -202,7 +202,11 @@ func (py *Resolver) Resolve( matches := ix.FindRulesByImportWithConfig(c, imp, languageName) if len(matches) == 0 { // Check if the imported module is part of the standard library. - if isStdModule(module{Name: moduleName}) { + if isStd, err := isStdModule(module{Name: moduleName}); err != nil { + log.Println("Error checking if standard module: ", err) + hasFatalError = true + continue POSSIBLE_MODULE_LOOP + } else if isStd { continue MODULES_LOOP } else if cfg.ValidateImportStatements() { err := fmt.Errorf( diff --git a/gazelle/python/std_modules.go b/gazelle/python/std_modules.go index e10f87b6ea..8a016afed6 100644 --- a/gazelle/python/std_modules.go +++ b/gazelle/python/std_modules.go @@ -16,25 +16,92 @@ package python import ( "bufio" + "context" _ "embed" + "fmt" + "io" + "log" + "os" + "os/exec" + "strconv" "strings" + "sync" ) var ( - //go:embed stdlib_list.txt - stdlibList string - stdModules map[string]struct{} + stdModulesCmd *exec.Cmd + stdModulesStdin io.WriteCloser + stdModulesStdout io.Reader + stdModulesMutex sync.Mutex + stdModulesSeen map[string]struct{} ) -func init() { - stdModules = make(map[string]struct{}) - scanner := bufio.NewScanner(strings.NewReader(stdlibList)) - for scanner.Scan() { - stdModules[scanner.Text()] = struct{}{} +func startStdModuleProcess(ctx context.Context) { + stdModulesSeen = make(map[string]struct{}) + + // due to #691, we need a system interpreter to boostrap, part of which is + // to locate the hermetic interpreter. + stdModulesCmd = exec.CommandContext(ctx, "python3", helperPath, "std_modules") + stdModulesCmd.Stderr = os.Stderr + // All userland site-packages should be ignored. + stdModulesCmd.Env = []string{"PYTHONNOUSERSITE=1"} + + stdin, err := stdModulesCmd.StdinPipe() + if err != nil { + log.Printf("failed to initialize std_modules: %v\n", err) + os.Exit(1) + } + stdModulesStdin = stdin + + stdout, err := stdModulesCmd.StdoutPipe() + if err != nil { + log.Printf("failed to initialize std_modules: %v\n", err) + os.Exit(1) + } + stdModulesStdout = stdout + + if err := stdModulesCmd.Start(); err != nil { + log.Printf("failed to initialize std_modules: %v\n", err) + os.Exit(1) + } +} + +func shutdownStdModuleProcess() { + if err := stdModulesStdin.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing std module: %v", err) + } + + if err := stdModulesCmd.Wait(); err != nil { + log.Printf("failed to wait for std_modules: %v\n", err) } } -func isStdModule(m module) bool { - _, ok := stdModules[m.Name] - return ok +func isStdModule(m module) (bool, error) { + if _, seen := stdModulesSeen[m.Name]; seen { + return true, nil + } + stdModulesMutex.Lock() + defer stdModulesMutex.Unlock() + + fmt.Fprintf(stdModulesStdin, "%s\n", m.Name) + + stdoutReader := bufio.NewReader(stdModulesStdout) + line, err := stdoutReader.ReadString('\n') + if err != nil { + return false, err + } + if len(line) == 0 { + return false, fmt.Errorf("unexpected empty output from std_modules") + } + + isStd, err := strconv.ParseBool(strings.TrimSpace(line)) + if err != nil { + return false, err + } + + if isStd { + stdModulesSeen[m.Name] = struct{}{} + return true, nil + } + return false, nil } diff --git a/gazelle/python/std_modules.py b/gazelle/python/std_modules.py new file mode 100644 index 0000000000..779a325508 --- /dev/null +++ b/gazelle/python/std_modules.py @@ -0,0 +1,51 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# std_modules.py is a long-living program that communicates over STDIN and +# STDOUT. STDIN receives module names, one per line. For each module statement +# it evaluates, it outputs true/false for whether the module is part of the +# standard library or not. + +import os +import sys +from contextlib import redirect_stdout + + +def is_std_modules(module): + # If for some reason a module (such as pygame, see https://github.com/pygame/pygame/issues/542) + # prints to stdout upon import, + # the output of this script should still be parseable by golang. + # Therefore, redirect stdout while running the import. + with redirect_stdout(os.devnull): + try: + __import__(module, globals(), locals(), [], 0) + return True + except Exception: + return False + + +def main(stdin, stdout): + for module in stdin: + module = module.strip() + # Don't print the boolean directly as it is capitalized in Python. + print( + "true" if is_std_modules(module) else "false", + end="\n", + file=stdout, + ) + stdout.flush() + + +if __name__ == "__main__": + exit(main(sys.stdin, sys.stdout)) diff --git a/gazelle/python/std_modules_test.go b/gazelle/python/std_modules_test.go deleted file mode 100644 index bc22638e69..0000000000 --- a/gazelle/python/std_modules_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2023 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package python - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestIsStdModule(t *testing.T) { - assert.True(t, isStdModule(module{Name: "unittest"})) - assert.True(t, isStdModule(module{Name: "os.path"})) - assert.False(t, isStdModule(module{Name: "foo"})) -}