diff --git a/CHANGELOG.md b/CHANGELOG.md index b07fb0ebc6..c60f6cb666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,8 +22,10 @@ A brief description of the categories of changes: [x.x.x]: https://github.com/bazelbuild/rules_python/releases/tag/x.x.x ### Changed -* **BREAKING** (gazelle): Remove gazelle plugin's python deps and make it hermetic. Use a new helper - based on tree-sitter and written in go for syntax analysis. + +* (gazelle): Remove gazelle plugin's python deps and make it hermetic. + Introduced a new Go-based helper leveraging tree-sitter for syntax analysis. + Implemented the use of `pypi/stdlib-list` for standard library module verification. ### Fixed diff --git a/gazelle/BUILD.bazel b/gazelle/BUILD.bazel index cbfe0f4c04..f74338d4b5 100644 --- a/gazelle/BUILD.bazel +++ b/gazelle/BUILD.bazel @@ -12,7 +12,7 @@ gazelle( name = "gazelle_update_repos", args = [ "-from_file=go.mod", - "-to_macro=deps.bzl%gazelle_deps", + "-to_macro=deps.bzl%go_deps", "-prune", ], command = "update-repos", diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel index aba6508a59..f9681be3e7 100644 --- a/gazelle/MODULE.bazel +++ b/gazelle/MODULE.bazel @@ -4,7 +4,7 @@ module( compatibility_level = 1, ) -bazel_dep(name = "bazel_skylib", version = "1.5.0") +bazel_dep(name = "bazel_skylib", version = "1.6.1") bazel_dep(name = "rules_python", version = "0.18.0") bazel_dep(name = "rules_go", version = "0.41.0", repo_name = "io_bazel_rules_go") bazel_dep(name = "gazelle", version = "0.33.0", repo_name = "bazel_gazelle") @@ -24,4 +24,4 @@ use_repo( ) non_module_deps = use_extension("//:def.bzl", "non_module_deps") -use_repo(non_module_deps, "python_stdlib_list") +use_repo(non_module_deps, "python_stdlib_list_3_11") diff --git a/gazelle/WORKSPACE b/gazelle/WORKSPACE index f7c34f191d..39dfb867bf 100644 --- a/gazelle/WORKSPACE +++ b/gazelle/WORKSPACE @@ -1,6 +1,6 @@ workspace(name = "rules_python_gazelle_plugin") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "io_bazel_rules_go", @@ -20,12 +20,6 @@ http_archive( ], ) -http_file( - name = "python_stdlib_list", - sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt", -) - load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies") diff --git a/gazelle/def.bzl b/gazelle/def.bzl index f3d357e66a..a9fbb0f436 100644 --- a/gazelle/def.bzl +++ b/gazelle/def.bzl @@ -11,18 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -This file contains the non_module_deps rule. + +"""This module contains the Gazelle runtime dependencies for the Python extension. """ -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") +load("@bazel_skylib//lib:modules.bzl", "modules") +load(":deps.bzl", "python_stdlib_list_deps") -def non_module_deps_impl(_): - http_file( - name = "python_stdlib_list", - sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1", - url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt", - downloaded_file_path = "3.11.txt", # TODO: auto version - ) +GAZELLE_PYTHON_RUNTIME_DEPS = [ +] -non_module_deps = module_extension(implementation = non_module_deps_impl) +non_module_deps = modules.as_extension( + python_stdlib_list_deps, + doc = "This extension registers python stdlib list dependencies.", +) diff --git a/gazelle/deps.bzl b/gazelle/deps.bzl index 41b6020594..31100494b5 100644 --- a/gazelle/deps.bzl +++ b/gazelle/deps.bzl @@ -18,12 +18,25 @@ load( "@bazel_gazelle//:deps.bzl", _go_repository = "go_repository", ) +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") def go_repository(name, **kwargs): if name not in native.existing_rules(): _go_repository(name = name, **kwargs) +def python_stdlib_list_deps(): + http_file( + name = "python_stdlib_list_3_11", + sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1", + url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt", + downloaded_file_path = "3.11.txt", + ) + def gazelle_deps(): + go_deps() + python_stdlib_list_deps() + +def go_deps(): "Fetch go dependencies" go_repository( name = "co_honnef_go_tools", diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel index 75402ca943..8d09de916a 100644 --- a/gazelle/python/BUILD.bazel +++ b/gazelle/python/BUILD.bazel @@ -24,7 +24,7 @@ go_library( # # See following for more info: # https://github.com/bazelbuild/bazel-gazelle/issues/1513 - embedsrcs = ["stdlib_list.txt"], # keep + embedsrcs = ["stdlib_list.txt"], # keep # TODO: use user-defined version? importpath = "github.com/bazelbuild/rules_python/gazelle/python", visibility = ["//visibility:public"], deps = [ @@ -49,7 +49,7 @@ go_library( genrule( name = "stdlib_list", - srcs = ["@python_stdlib_list//file"], + srcs = ["@python_stdlib_list_3_11//file"], outs = ["stdlib_list.txt"], cmd_bash = "cat $(SRCS) > $@", cmd_bat = "type $(SRCS) > $@", @@ -93,7 +93,10 @@ filegroup( go_test( name = "default_test", - srcs = ["file_parser_test.go"], + srcs = [ + "file_parser_test.go", + "std_modules_test.go", + ], embed = [":python"], deps = [ "@com_github_stretchr_testify//assert", diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go index aacd34b6ef..82865a0b94 100644 --- a/gazelle/python/file_parser.go +++ b/gazelle/python/file_parser.go @@ -11,6 +11,19 @@ import ( "github.com/smacker/go-tree-sitter/python" ) +const ( + sitterNodeTypeString = "string" + sitterNodeTypeComment = "comment" + sitterNodeTypeIdentifier = "identifier" + sitterNodeTypeDottedName = "dotted_name" + sitterNodeTypeIfStatement = "if_statement" + sitterNodeTypeAliasedImport = "aliased_import" + sitterNodeTypeWildcardImport = "wildcard_import" + sitterNodeTypeImportStatement = "import_statement" + sitterNodeTypeComparisonOperator = "comparison_operator" + sitterNodeTypeImportFromStatement = "import_from_statement" +) + type ParserOutput struct { FileName string Modules []module @@ -46,20 +59,20 @@ func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { return false } child := node.Child(i) - if child.Type() == "if_statement" && child.Child(1).Type() == "comparison_operator" && - child.Child(1).Child(1).Type() == "==" { + if child.Type() == sitterNodeTypeIfStatement && + child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { statement := child.Child(1) a, b := statement.Child(0), statement.Child(2) // convert "'__main__' == __name__" to "__name__ == '__main__'" - if b.Type() == "identifier" { + if b.Type() == sitterNodeTypeIdentifier { a, b = b, a } - if a.Type() == "identifier" && a.Content(p.code) == "__name__" && + if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) // "__main__" is the second child of b. But now, it isn't. // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 - b.Type() == "string" && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { + b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { return true } } @@ -68,14 +81,15 @@ func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { } func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { - if node.Type() == "dotted_name" { + switch node.Type() { + case sitterNodeTypeDottedName: return module{ Name: node.Content(code), LineNumber: node.StartPoint().Row + 1, }, true - } else if node.Type() == "aliased_import" { + case sitterNodeTypeAliasedImport: return parseImportStatement(node.Child(0), code) - } else if node.Type() == "wildcard_import" { + case sitterNodeTypeWildcardImport: return module{ Name: "*", LineNumber: node.StartPoint().Row + 1, @@ -85,7 +99,7 @@ func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { } func (p *FileParser) parseImportStatements(node *sitter.Node) bool { - if node.Type() == "import_statement" { + if node.Type() == sitterNodeTypeImportStatement { for j := 1; j < int(node.ChildCount()); j++ { m, ok := parseImportStatement(node.Child(j), p.code) if !ok { @@ -97,7 +111,7 @@ func (p *FileParser) parseImportStatements(node *sitter.Node) bool { } p.output.Modules = append(p.output.Modules, m) } - } else if node.Type() == "import_from_statement" { + } else if node.Type() == sitterNodeTypeImportFromStatement { from := node.Child(1).Content(p.code) if strings.HasPrefix(from, ".") { return true @@ -119,7 +133,7 @@ func (p *FileParser) parseImportStatements(node *sitter.Node) bool { } func (p *FileParser) parseComments(node *sitter.Node) bool { - if node.Type() == "comment" { + if node.Type() == sitterNodeTypeComment { p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) return true } diff --git a/gazelle/python/file_parser_test.go b/gazelle/python/file_parser_test.go index 526afc7716..8131bb6c83 100644 --- a/gazelle/python/file_parser_test.go +++ b/gazelle/python/file_parser_test.go @@ -153,7 +153,7 @@ func TestParseComments(t *testing.T) { result: []comment{"# a = 1", "# b = 2"}, }, { - name: "has comment in def", + name: "has comment in if", code: "if True:\n # a = 1\n # b = 2", result: []comment{"# a = 1", "# b = 2"}, }, diff --git a/gazelle/python/std_modules_test.go b/gazelle/python/std_modules_test.go new file mode 100644 index 0000000000..bc22638e69 --- /dev/null +++ b/gazelle/python/std_modules_test.go @@ -0,0 +1,27 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsStdModule(t *testing.T) { + assert.True(t, isStdModule(module{Name: "unittest"})) + assert.True(t, isStdModule(module{Name: "os.path"})) + assert.False(t, isStdModule(module{Name: "foo"})) +}