diff --git a/util/pypi/go.mod b/util/pypi/go.mod
new file mode 100644
index 00000000..9c7791c9
--- /dev/null
+++ b/util/pypi/go.mod
@@ -0,0 +1,7 @@
+module deps.dev/util/pypi
+
+go 1.23.4
+
+replace deps.dev/util/semver => ../semver
+
+require deps.dev/util/semver v0.0.0-20241230231135-52b7655a522f
diff --git a/util/pypi/metadata.go b/util/pypi/metadata.go
new file mode 100644
index 00000000..33985e64
--- /dev/null
+++ b/util/pypi/metadata.go
@@ -0,0 +1,259 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"log"
+	"net/mail"
+	"strings"
+	"unicode/utf8"
+
+	"deps.dev/util/semver"
+)
+
+// Metadata holds metadata for a distribution as defined in
+// https://packaging.python.org/specifications/core-metadata/.
+type Metadata struct {
+	// Name and Version are the only fields required by the spec.
+	// Taken directly from Metadata and not canonicalized.
+	Name, Version string
+
+	// Optional metadata as defined by the spec.
+	Summary         string
+	Description     string
+	Homepage        string
+	Author          string
+	AuthorEmail     string
+	Maintainer      string
+	MaintainerEmail string
+	License         string
+	Classifiers     []string
+	ProjectURLs     []string
+
+	Dependencies []Dependency
+}
+
+// ParseMetadata reads a METADATA or PKG-INFO file and collects as much
+// information as possible. The earliest version of this format was a set of RFC
+// 822 headers (see https://www.python.org/dev/peps/pep-0241/) with later
+// versions (https://www.python.org/dev/peps/pep-0566/) adding the ability to
+// include a message body rendering the format essentially the same as an email.
+// The latest specification is here:
+// https://packaging.python.org/en/latest/specifications/core-metadata/. For
+// reference distlib, the library used by pip for this job, uses python's
+// standard library email reader to read these files (see
+// https://bitbucket.org/pypa/distlib/src/default/distlib/metadata.py). The
+// current version of the specification requires metadata to be encoded as
+// UTF-8, so an error will be returned if any invalid UTF-8 is discovered.
+func ParseMetadata(ctx context.Context, data string) (Metadata, error) {
+	if !utf8.ValidString(data) {
+		// TODO: maybe we could be a bit more lenient to support
+		// older packages.
+		return Metadata{}, parseErrorf("invalid UTF-8")
+	}
+	// Add a newline to the end; some files have no body which is an error to
+	// net/mail. Adding a newline ensures it will parse an empty body.
+	buf := bytes.NewBufferString(data)
+	buf.WriteByte('\n')
+	msg, err := mail.ReadMessage(buf)
+	if err != nil {
+		return Metadata{}, parseErrorf("parsing python metadata: %v", err)
+	}
+	md := Metadata{}
+
+	header := func(name string) (value string) {
+		vs := msg.Header[name]
+		if len(vs) > 1 {
+			log.Printf("Header set multiple times: %q: %q", name, vs)
+		}
+		if len(vs) == 1 && vs[0] != "UNKNOWN" {
+			value = vs[0]
+		}
+		return
+	}
+	multiHeader := func(name string) (values []string) {
+		for _, v := range msg.Header[name] {
+			if v != "UNKNOWN" {
+				values = append(values, v)
+			}
+		}
+		return
+	}
+
+	// Dependencies need some parsing and will always be needed.
+	for _, d := range msg.Header["Requires-Dist"] {
+		dep, err := ParseDependency(d)
+		if err != nil {
+			return Metadata{}, err
+		}
+		md.Dependencies = append(md.Dependencies, dep)
+	}
+
+	md.Name = header("Name")
+	md.Version = header("Version")
+	md.Summary = header("Summary")
+	md.Description = header("Description")
+	md.Homepage = header("Home-Page")
+	md.Author = header("Author")
+	md.AuthorEmail = header("Author-Email")
+	md.Maintainer = header("Maintainer")
+	md.MaintainerEmail = header("Maintainer-Email")
+	md.License = header("License")
+	md.ProjectURLs = multiHeader("Project-Url")
+	md.Classifiers = multiHeader("Classifier")
+
+	// The description may be in the message body.
+	body, err := io.ReadAll(msg.Body)
+	if err != nil {
+		return Metadata{}, parseErrorf("reading metadata description: %v", err)
+	}
+	if len(body) > 0 {
+		// Remove the extra line we added earlier to ensure a valid message.
+		body = body[:len(body)-1]
+		md.Description = string(body)
+	}
+	return md, nil
+}
+
+// Dependency is a dependency on a package.
+type Dependency struct {
+	Name        string
+	Extras      string
+	Constraint  string
+	Environment string
+}
+
+// ParseDependency parses a python requirement statement according to PEP 508
+// (https://www.python.org/dev/peps/pep-0508/), apart from URL requirements.
+func ParseDependency(v string) (Dependency, error) {
+	var d Dependency
+	if v == "" {
+		return d, parseErrorf("invalid python requirement: empty string")
+	}
+	const whitespace = " \t" // according to the PEP this is the only allowed whitespace
+	s := strings.Trim(v, whitespace)
+	// For our purposes, the name is some characters ending with space or the
+	// start of something else.
+	nameEnd := strings.IndexAny(s, whitespace+"[(;<=!~>")
+	if nameEnd == 0 {
+		return d, parseErrorf("invalid python requirement: empty name")
+	}
+	if nameEnd < 0 {
+		d.Name = CanonPackageName(s)
+		return d, nil
+	}
+	d.Name = CanonPackageName(s[:nameEnd])
+	s = strings.TrimLeft(s[nameEnd:], whitespace)
+	// Does it have extras?
+	if s[0] == '[' {
+		end := strings.IndexByte(s, ']')
+		if end < 0 {
+			return d, parseErrorf("invalid python requirement: %q has unterminated extras section", v)
+		}
+		// Extract whatever is inside the []
+		d.Extras = strings.Trim(s[1:end], whitespace)
+		s = s[end+1:]
+	}
+	// Does it have a constraint?
+	if len(s) > 0 && s[0] != ';' {
+		end := strings.IndexByte(s, ';')
+		if end < 0 {
+			end = len(s) // all of the remainder is the constraint
+		}
+		d.Constraint = strings.Trim(s[:end], whitespace)
+		// May be parenthesized, we can remove those.
+		if strings.HasPrefix(d.Constraint, "(") && strings.HasSuffix(d.Constraint, ")") {
+			d.Constraint = d.Constraint[1 : len(d.Constraint)-1]
+		}
+		s = s[end:]
+	}
+	// Anything left must be a condition starting with ';'. Otherwise there should
+	// be no way for s to be non-empty. If it is something's wrong, that's an
+	// error.
+	if len(s) > 0 && s[0] != ';' {
+		return d, parseErrorf("invalid python requirement: internal parse error on %q", v)
+	}
+	if s != "" {
+		d.Environment = strings.Trim(s[1:], whitespace) // s[1] == ';'
+	}
+	return d, nil
+}
+
+// CanonVersion canonicalizes a version string. If the version does not parse
+// according to PEP 440 it is returned as-is.
+func CanonVersion(ver string) string {
+	v, err := semver.PyPI.Parse(ver)
+	if err != nil {
+		return ver
+	}
+	return v.Canon(true)
+}
+
+// CanonPackageName returns the canonical form of the given PyPI package name.
+func CanonPackageName(name string) string {
+	// https://github.com/pypa/pip/blob/20.0.2/src/pip/_vendor/packaging/utils.py
+	// https://www.python.org/dev/peps/pep-0503/
+	// Names may only be [-_.A-Za-z0-9].
+	// Replace runs of [-_.] with a single "-", then lowercase everything.
+	var out bytes.Buffer
+	run := false // whether a run of [-_.] has started.
+	for i := 0; i < len(name); i++ {
+		switch c := name[i]; {
+		case 'a' <= c && c <= 'z', '0' <= c && c <= '9':
+			out.WriteByte(c)
+			run = false
+		case 'A' <= c && c <= 'Z':
+			out.WriteByte(c + ('a' - 'A'))
+			run = false
+		case c == '-' || c == '_' || c == '.':
+			if !run {
+				out.WriteByte('-')
+			}
+			run = true
+		default:
+			run = false
+		}
+	}
+	return out.String()
+}
+
+// ParseError is returned when we encounter data that fails to parse.
+type ParseError struct {
+	msg string
+}
+
+func (p ParseError) Error() string {
+	return p.msg
+}
+
+// parseErrorf constructs a pypiParseError with a formatted message.
+func parseErrorf(format string, args ...any) ParseError {
+	return ParseError{msg: fmt.Sprintf(format, args...)}
+}
+
+// UnsupportedError is an error used to indicate when we encounter types of
+// packaging that we can not yet handle.
+type UnsupportedError struct {
+	msg         string
+	packageType string
+}
+
+func (p UnsupportedError) Error() string {
+	return fmt.Sprintf("%s: %s", p.packageType, p.msg)
+}
diff --git a/util/pypi/metadata_test.go b/util/pypi/metadata_test.go
new file mode 100644
index 00000000..9f3c8522
--- /dev/null
+++ b/util/pypi/metadata_test.go
@@ -0,0 +1,266 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"context"
+	"errors"
+	"reflect"
+	"testing"
+)
+
+var numpyPkgInfoRaw = `Metadata-Version: 1.2
+Name: numPy
+Version: 1.16.4
+Summary:  NumPy is the fundamental package for array computing with Python.
+Home-page: https://www.numpy.org
+Author: NumPy Developers
+Author-email: numpy-discussion@python.org
+License: BSD
+Download-URL: https://pypi.python.org/pypi/numpy
+Description-Content-Type: UNKNOWN
+Description: It provides:
+        
+        - a powerful N-dimensional array object
+        - sophisticated...
+        
+Platform: Windows
+Platform: Linux
+Platform: Solaris
+Platform: Mac OS-X
+Platform: Unix
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: License :: OSI Approved
+Classifier: Programming Language :: C
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Topic :: Software Development
+Classifier: Topic :: Scientific/Engineering
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: Unix
+Classifier: Operating System :: MacOS
+Requires-Python: >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*
+Project-URL: Homepage, https://www.numpy.org
+`
+
+var numpyPkgInfo = Metadata{
+	Name:        "numPy",
+	Version:     "1.16.4",
+	Summary:     "NumPy is the fundamental package for array computing with Python.",
+	Description: "It provides:  - a powerful N-dimensional array object - sophisticated... ",
+	Homepage:    "https://www.numpy.org",
+	Author:      "NumPy Developers",
+	AuthorEmail: "numpy-discussion@python.org",
+	License:     "BSD",
+	Classifiers: []string{
+		"Development Status :: 5 - Production/Stable",
+		"License :: OSI Approved",
+		"Programming Language :: C",
+		"Programming Language :: Python",
+		"Programming Language :: Python :: Implementation :: CPython",
+		"Topic :: Software Development",
+		"Topic :: Scientific/Engineering",
+		"Operating System :: Microsoft :: Windows",
+		"Operating System :: POSIX",
+		"Operating System :: Unix",
+		"Operating System :: MacOS",
+	},
+	ProjectURLs: []string{"Homepage, https://www.numpy.org"},
+}
+
+// A real life METADATA file from a wheel, with the description in the body.
+var numbaMetadataRaw = `Metadata-Version: 2.1
+Name: Numba
+Version: 0.44.0
+Summary: compiling Python code using LLVM
+Home-page: https://github.com/numba/numba
+Author: Anaconda, Inc.
+Author-email: numba-users@continuum.io
+License: BSD
+Platform: UNKNOWN
+Requires-Dist: llvmlite (>=0.29.0)
+Requires-Dist: numpy
+Requires-Dist: funcsigs; python_version < "3.3"
+Requires-Dist: enum34; python_version < "3.4"
+Requires-Dist: singledispatch; python_version < "3.4"
+
+*****
+Numba
+*****
+
+.. image:: https://badges.gitter.im/numba/numba.svg
+   :target: https://gitter.im/numba/numba?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
+   :alt: Gitter
+
+A Just-In-Time Compiler for Numerical Functions in Python
+#########################################################
+
+Numba is an open source,
+`
+
+var numbaMetadataParsed = Metadata{
+	Name:        "Numba",
+	Version:     "0.44.0",
+	Summary:     "compiling Python code using LLVM",
+	Description: "*****\nNumba\n*****\n\n.. image:: https://badges.gitter.im/numba/numba.svg\n   :target: https://gitter.im/numba/numba?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge\n   :alt: Gitter\n\nA Just-In-Time Compiler for Numerical Functions in Python\n#########################################################\n\nNumba is an open source,\n",
+	Homepage:    "https://github.com/numba/numba",
+	Author:      "Anaconda, Inc.",
+	AuthorEmail: "numba-users@continuum.io",
+	License:     "BSD",
+	Dependencies: []Dependency{
+		{"llvmlite", "", ">=0.29.0", ""},
+		{"numpy", "", "", ""},
+		{"funcsigs", "", "", "python_version < \"3.3\""},
+		{"enum34", "", "", "python_version < \"3.4\""},
+		{"singledispatch", "", "", "python_version < \"3.4\""},
+	},
+}
+
+// badPyPIMetadata contains some invalid metadata that should trigger a parse
+// error.
+var badPyPIMetadata = []string{
+	// Missing bracket in the requirement.
+	`Metadata-Version: 2.1
+Name: numba
+Version: 0.44.0
+Summary: compiling Python code using LLVM
+Requires-Dist: llvmlite[banana (>=0.29.0)
+
+*****
+Numba
+`,
+	// Incorrect line folding.
+	`Metadata-Version: 2.1
+Name: numba
+Version: 0.44.0
+Summary: compiling Python code using LLVM
+License: A long license that require
+many lines to express.
+Yes.
+Requires-Dist: llvmlite (>=0.29.0)
+`,
+	// Invalid UTF-8, uses an ISO-8859 non-breaking space.
+	`Metadata-Version: 2.1
+Name: numba
+Version: 0.44.0
+Summary: compiling Python` + string([]byte{0xA0}) + ` code using LLVM
+`,
+}
+
+func TestParseMetadata(t *testing.T) {
+	ctx := context.Background()
+
+	// real examples we want to be able to parse
+	got, err := ParseMetadata(ctx, numpyPkgInfoRaw)
+	if err != nil {
+		t.Errorf("Parsing numpy metadata: %v", err)
+	}
+	if !reflect.DeepEqual(got, numpyPkgInfo) {
+		t.Errorf("numpy metadata:\n got: %#v\nwant: %#v", got, numpyPkgInfo)
+	}
+	got, err = ParseMetadata(ctx, numbaMetadataRaw)
+	if err != nil {
+		t.Errorf("Parsing numba metadata: %v", err)
+	}
+	if !reflect.DeepEqual(got, numbaMetadataParsed) {
+		t.Errorf("numba metadata:\n got: %#v\nwant: %#v", got, numbaMetadataParsed)
+	}
+	for i, md := range badPyPIMetadata {
+		got, err := ParseMetadata(ctx, md)
+		var pErr ParseError
+		if ok := errors.As(err, &pErr); !ok {
+			t.Errorf("Parsing bad metadata %d: got: (%v, %#v), want ParseError", i, got, err)
+		}
+	}
+}
+
+func TestParseDependency(t *testing.T) {
+	for _, c := range []struct {
+		r string
+		w *Dependency
+	}{
+		// Cases we do handle.
+		// plain names:
+		{"plain", &Dependency{"plain", "", "", ""}},
+		{"colon;", &Dependency{"colon", "", "", ""}},
+		{" leading-space", &Dependency{"leading-space", "", "", ""}},
+		{"trailing-space\t", &Dependency{"trailing-space", "", "", ""}},
+		// extras:
+		{"empty-extra[]", &Dependency{"empty-extra", "", "", ""}},
+		{"spaced\t[hello ] ", &Dependency{"spaced", "hello", "", ""}},
+		{"extra[more]", &Dependency{"extra", "more", "", ""}},
+		{"extras[even, more]", &Dependency{"extras", "even, more", "", ""}},
+		// bare constraints, including with non-canonical names:
+		{"constraint >=2.1.2", &Dependency{"constraint", "", ">=2.1.2", ""}},
+		{"Multi ~=3.6, !=3.8.1", &Dependency{"multi", "", "~=3.6, !=3.8.1", ""}},
+		{"no_space>=1,!=3.4", &Dependency{"no-space", "", ">=1,!=3.4", ""}},
+		// conditions:
+		{"condition;python_version < \"3.6\"", &Dependency{"condition", "", "", "python_version < \"3.6\""}},
+		{"space_condition ; platform_machine == x86_64", &Dependency{"space-condition", "", "", "platform_machine == x86_64"}},
+		// combinations:
+		{"extra-constraint[more] ==2.0", &Dependency{"extra-constraint", "more", "==2.0", ""}},
+		{"extra-condition[stuff]; implementation_name == cpython", &Dependency{"extra-condition", "stuff", "", "implementation_name == cpython"}},
+		{"constraint-condition <1.0.0-alpha; extra == \"stuff\"", &Dependency{"constraint-condition", "", "<1.0.0-alpha", "extra == \"stuff\""}},
+		{"alltheabove[all,the,things] >=0.0; python_version >= 2.0", &Dependency{"alltheabove", "all,the,things", ">=0.0", "python_version >= 2.0"}},
+		{"parens (!=2.0)", &Dependency{"parens", "", "!=2.0", ""}},
+
+		// unsalvageable errors:
+		{"", nil},
+		{";", nil},
+		{"unterminated[something >2.1", nil},
+	} {
+		t.Run(c.r, func(t *testing.T) {
+			r, err := ParseDependency(c.r)
+			if err != nil {
+				if c.w != nil {
+					t.Errorf("want %q to parse: got %#v", c.r, err)
+				}
+				return
+			}
+			if c.w == nil {
+				t.Errorf("want %q to fail: got %#v", c.r, r)
+				return
+			}
+			if !reflect.DeepEqual(c.w, &r) {
+				t.Errorf("parse %q: want: %#v, got: %#v", c.r, c.w, r)
+			}
+		})
+	}
+}
+
+func TestCanonPackageName(t *testing.T) {
+	tests := []struct {
+		in, out string
+	}{
+		// Test cases from https://github.com/pypa/packaging/blob/20.0/tests/test_utils.py.
+		{"foo", "foo"},
+		{"Foo", "foo"},
+		{"fOo", "foo"},
+		{"foo.bar", "foo-bar"},
+		{"Foo.Bar", "foo-bar"},
+		{"Foo.....Bar", "foo-bar"},
+		{"foo_bar", "foo-bar"},
+		{"foo___bar", "foo-bar"},
+		{"foo-bar", "foo-bar"},
+		{"foo----bar", "foo-bar"},
+		{"foo-Տ", "foo-"}, // Strip out non-ASCII
+	}
+	for _, test := range tests {
+		if got := CanonPackageName(test.in); got != test.out {
+			t.Errorf("CanonPackageName(%s): got %s, want %s", test.in, got, test.out)
+		}
+	}
+}
diff --git a/util/pypi/sdist.go b/util/pypi/sdist.go
new file mode 100644
index 00000000..2e2d25dd
--- /dev/null
+++ b/util/pypi/sdist.go
@@ -0,0 +1,195 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"archive/tar"
+	"bufio"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"fmt"
+	"io"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+// SdistVersion attempts to extract the version from the name of an sdist file.
+// The format of the names is not standardized, but it is a strong enough
+// convention that pip relies on it (see
+// https://github.com/pypa/pip/blob/0442875a68f19b0118b0b88c747bdaf6b24853ba/src/pip/_internal/index/package_finder.py#L978).
+// The filenames are formatted <name>-<version>, where the name is not
+// necessarily canonicalized. The returned version will be canonicalized if
+// possible.
+func SdistVersion(canonName, filename string) (string, string, error) {
+	// Take every substring ending in "-" and see if it canonicalizes to the
+	// name we are looking for.
+	// Start by trimming the extension.
+	nameVersion := strings.TrimSuffix(filename, filepath.Ext(filename))
+	// .tar.gz sdists have two extensions, make sure to trim .tar.
+	nameVersion = strings.TrimSuffix(nameVersion, ".tar")
+	for i, r := range nameVersion {
+		if r != '-' {
+			continue
+		}
+		name := CanonPackageName(nameVersion[:i])
+		if name == canonName {
+			return nameVersion[:i], nameVersion[i+1:], nil
+		}
+	}
+	return "", "", fmt.Errorf("invalid filename for package %q: %q", canonName, filename)
+}
+
+// Regular expression indicating a setup.py or setup.cfg specifies dependencies.
+// There may be some false positives: a line could be commented out or not in
+// the right place. There will be no false negatives; to specify dependencies
+// there must be at least one match for this pattern.
+var installRequiresPattern = regexp.MustCompile(`install_requires[ \t]*=`)
+
+// SdistMetadata attempts to read metadata out of the supplied reader assuming
+// it contains an sdist. The reader should be either a tar or a zip file,
+// the extension of the supplied filename will be used to distinguish.
+func SdistMetadata(ctx context.Context, fileName string, r io.Reader) (*Metadata, error) {
+	// setupPy and setupCFG indicate whether we have found dependency information
+	// in a setup.py or setup.cfg.
+	setupPy, setupCFG := false, false
+	var meta Metadata
+
+	walkFn := func(name string, r io.Reader) error {
+		_, name, ok := strings.Cut(name, "/")
+		if !ok {
+			return nil
+		}
+		if name == "setup.py" && !setupPy {
+			setupPy = installRequiresPattern.MatchReader(bufio.NewReader(r))
+			return nil
+		}
+		if name == "setup.cfg" && !setupCFG {
+			setupCFG = installRequiresPattern.MatchReader(bufio.NewReader(r))
+			return nil
+		}
+		if name != "PKG-INFO" {
+			return nil
+		}
+		if meta.Name != "" {
+			// Multiple top level PKG-INFO is only possible if the contains multiple
+			// packages. This is invalid and therefore unsupported.
+			return UnsupportedError{
+				msg:         "multiple top level PKG-INFO",
+				packageType: "sdist",
+			}
+		}
+		contents, err := io.ReadAll(r)
+		if err != nil {
+			return err
+		}
+		md, err := ParseMetadata(ctx, string(contents))
+		if err != nil {
+			return err
+		}
+		meta.Name = md.Name
+		meta.Version = md.Version
+		meta.Summary = md.Summary
+		meta.Description = md.Description
+		meta.Homepage = md.Homepage
+		meta.Author = md.Author
+		meta.AuthorEmail = md.AuthorEmail
+		meta.Maintainer = md.Maintainer
+		meta.MaintainerEmail = md.MaintainerEmail
+		meta.License = md.License
+		meta.Classifiers = md.Classifiers
+		meta.ProjectURLs = md.ProjectURLs
+		if len(meta.Dependencies) == 0 {
+			meta.Dependencies = md.Dependencies
+		}
+		return nil
+	}
+	switch {
+	case strings.HasSuffix(fileName, ".tar.gz"),
+		strings.HasSuffix(fileName, ".tgz"):
+		tgz, err := gzip.NewReader(r)
+		if err != nil {
+			return nil, err
+		}
+		defer tgz.Close()
+		if err := walkTarFiles(tgz, walkFn); err != nil {
+			return nil, err
+		}
+	case strings.HasSuffix(fileName, ".zip"):
+		// TODO: try and avoid this.
+		contents, err := io.ReadAll(r)
+		if err != nil {
+			return nil, err
+		}
+		if err := walkZipFiles(bytes.NewReader(contents), int64(len(contents)), walkFn); err != nil {
+			return nil, err
+		}
+	default:
+		return nil, UnsupportedError{
+			msg:         fmt.Sprintf("unsupported sdist format: %s", fileName),
+			packageType: "sdist",
+		}
+	}
+	if meta.Name == "" {
+		return nil, UnsupportedError{
+			msg:         "no PKG-INFO",
+			packageType: "sdist",
+		}
+	}
+	if len(meta.Dependencies) == 0 {
+		switch {
+		// If we found no dependencies in PKG-INFO but saw an
+		// install_requires line in a setup.py or setup.cfg file then
+		// report and error; we can't handle those dependencies yet.
+		case setupCFG:
+			return nil, UnsupportedError{
+				msg:         "dependencies in setup.cfg, not in PKG-INFO",
+				packageType: "sdist",
+			}
+		case setupPy:
+			return nil, UnsupportedError{
+				msg:         "dependencies in setup.py, not in PKG-INFO",
+				packageType: "sdist",
+			}
+		default:
+			// It genuinely has no dependencies.
+		}
+	}
+	return &meta, nil
+}
+
+// walkTarFiles walks through the files in a tar archive, applying the given
+// function one at a time to the name of the file and a reader containing its
+// contents until all files have been visited or the first error.
+func walkTarFiles(r io.Reader, f func(string, io.Reader) error) error {
+	tfr := tar.NewReader(r)
+	for {
+		h, err := tfr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		if h.Typeflag != tar.TypeReg {
+			continue
+		}
+		if err := f(h.Name, tfr); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/util/pypi/sdist_test.go b/util/pypi/sdist_test.go
new file mode 100644
index 00000000..1f361a68
--- /dev/null
+++ b/util/pypi/sdist_test.go
@@ -0,0 +1,210 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"archive/tar"
+	"archive/zip"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"errors"
+	"io"
+	"reflect"
+	"sort"
+	"testing"
+	"time"
+)
+
+func tarfile(t *testing.T, files map[string]string) []byte {
+	var buf bytes.Buffer
+	tfw := tar.NewWriter(&buf)
+	for name, contents := range files {
+		byteContents := []byte(contents)
+		hdr := &tar.Header{
+			Name:    name,
+			Size:    int64(len(byteContents)),
+			ModTime: time.Now(),
+		}
+		if err := tfw.WriteHeader(hdr); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := tfw.Write(byteContents); err != nil {
+			t.Fatal(err)
+		}
+	}
+	if err := tfw.Close(); err != nil {
+		t.Fatal(err)
+	}
+	return buf.Bytes()
+}
+
+func targzfile(t *testing.T, files map[string]string) []byte {
+	tf := tarfile(t, files)
+	var buf bytes.Buffer
+	gzw := gzip.NewWriter(&buf)
+	if _, err := gzw.Write(tf); err != nil {
+		t.Fatal(err)
+	}
+	if err := gzw.Close(); err != nil {
+		t.Fatal(err)
+	}
+	return buf.Bytes()
+}
+
+func zipfile(t *testing.T, files map[string]string) []byte {
+	var buf bytes.Buffer
+	zw := zip.NewWriter(&buf)
+	var names []string
+	for n := range files {
+		names = append(names, n)
+	}
+	sort.Strings(names)
+	for _, name := range names {
+		w, err := zw.Create(name)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if _, err := io.WriteString(w, files[name]); err != nil {
+			t.Fatal(err)
+		}
+	}
+	if err := zw.Close(); err != nil {
+		t.Fatal(err)
+	}
+	return buf.Bytes()
+}
+
+func TestSdistMetadata(t *testing.T) {
+	ctx := context.Background()
+
+	cases := []struct {
+		files       map[string]string
+		want        *Metadata
+		unsupported string
+	}{
+		{
+			files: map[string]string{
+				"test-1.1.1/":                       "",
+				"test-1.1.1/file-to-ignore.txt":     "this is boring",
+				"test-1.1.1/PKG-INFO":               numpyPkgInfoRaw,
+				"test-1.1.1/test.egg-info/PKG-INFO": numbaMetadataRaw,
+			},
+			want: &numpyPkgInfo,
+		},
+		{
+			files: map[string]string{
+				"test-1.1.2/PKG-INFO":                   numpyPkgInfoRaw,
+				"test-1.1.2/test.egg-info/requires.txt": "requirement-a\nrequirement-b\n",
+			},
+			want: &Metadata{
+				Name:            numpyPkgInfo.Name,
+				Version:         numpyPkgInfo.Version,
+				Summary:         numpyPkgInfo.Summary,
+				Description:     numpyPkgInfo.Description,
+				Homepage:        numpyPkgInfo.Homepage,
+				Author:          numpyPkgInfo.Author,
+				AuthorEmail:     numpyPkgInfo.AuthorEmail,
+				Maintainer:      numpyPkgInfo.Maintainer,
+				MaintainerEmail: numpyPkgInfo.MaintainerEmail,
+				License:         numpyPkgInfo.License,
+				Classifiers:     numpyPkgInfo.Classifiers,
+				ProjectURLs:     numpyPkgInfo.ProjectURLs,
+				// requirements only in the
+				// egg-info/requires.txt should be ignored.
+				Dependencies: nil,
+			},
+		},
+		// No PKG-INFO is an error
+		{
+			files: map[string]string{
+				"test-1.1.1/METADATA":         numbaMetadataRaw,
+				"test-1.1.1/setup.py":         "print('hello, test')",
+				"test-1.1.1/test/__init__.py": "\n",
+			},
+			unsupported: "no PKG-INFO",
+		},
+		// Ensure cases that have dependencies that are not specified in a way we
+		// understand but are otherwise valid give an appropriate error.
+		{
+			files: map[string]string{
+				"test-1.1.3/PKG-INFO":  numpyPkgInfoRaw,
+				"test-1.1.3/setup.cfg": "[options]\ninstall_requires = \n  requirement-a\n  requirement-b\n",
+			},
+			unsupported: "setup.cfg",
+		},
+		{
+			files: map[string]string{
+				"test-1.1.4/PKG-INFO": numpyPkgInfoRaw,
+				"test-1.1.4/setup.py": "from setuptools import setup\n\nsetup(\n  install_requires=['requirement-a', 'requirement-b']\n  )\n",
+			},
+			unsupported: "setup.py",
+		},
+		{
+			files: map[string]string{
+				"double-a/PKG-INFO": numpyPkgInfoRaw,
+				"double-b/PKG-INFO": numpyPkgInfoRaw,
+			},
+			unsupported: "multiple PKG-INFO",
+		},
+	}
+	// tar.gz files
+	for _, c := range cases {
+		tf := targzfile(t, c.files)
+		if c.unsupported != "" {
+			unsupportedSdist(ctx, t, tf, "test-1.0.tar.gz", c.unsupported)
+			continue
+		}
+		got, err := SdistMetadata(ctx, "test-0.0.1.tar.gz", bytes.NewBuffer(tf))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, c.want) {
+			t.Errorf("sdist tar metadata: files:\n%+v\n got: %#v\nwant: %#v", c.files, got, c.want)
+		}
+	}
+	// zip files
+	for _, c := range cases {
+		tf := zipfile(t, c.files)
+		if c.unsupported != "" {
+			unsupportedSdist(ctx, t, tf, "test-1.0.zip", c.unsupported)
+			continue
+		}
+		got, err := SdistMetadata(ctx, "test-0.0.1.zip", bytes.NewBuffer(tf))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, c.want) {
+			t.Errorf("sdist zip metadata: files:\n%+v\n got: %#v\nwant: %#v", c.files, got, c.want)
+		}
+	}
+	// Unsupported formats.
+	unsupportedSdist(ctx, t, []byte("this is a bz2"), "test-0.0.1.tar.bz2", "bz2 archive")
+	unsupportedSdist(ctx, t, []byte("xz yay"), "test-0.0.1.tar.xz", "xz archive")
+	unsupportedSdist(ctx, t, []byte("big z"), "test-0.0.1.tar.Z", "Z archive")
+	// TODO: support the following, it is simpler than the tar.gz we do
+	// already
+	unsupportedSdist(ctx, t, []byte("raw tar"), "test-0.0.1.tar", "uncompressed tar")
+}
+
+func unsupportedSdist(ctx context.Context, t *testing.T, data []byte, name, msg string) {
+	t.Helper()
+	var uerr UnsupportedError
+	if got, err := SdistMetadata(ctx, name, bytes.NewBuffer(data)); err == nil {
+		t.Errorf("%s: want error from unsupported sdist format, got:\nmetadata:\n%+v", msg, got)
+	} else if ok := errors.As(err, &uerr); !ok {
+		t.Errorf("%s: want: pypiUnsupportedError, got: %T", msg, err)
+	}
+}
diff --git a/util/pypi/wheel.go b/util/pypi/wheel.go
new file mode 100644
index 00000000..937a56ec
--- /dev/null
+++ b/util/pypi/wheel.go
@@ -0,0 +1,187 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"archive/zip"
+	"context"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+// WheelInfo holds all of the information kept in the name of a wheel file.
+type WheelInfo struct {
+	Name      string
+	Version   string
+	BuildTag  WheelBuildTag
+	Platforms []PEP425Tag
+}
+
+// WheelBuildTag holds the components of a wheel's build tag.
+type WheelBuildTag struct {
+	Num int
+	Tag string
+}
+
+// PEP425Tag holds a compatibility tag defined in
+// https://www.python.org/dev/peps/pep-0425/
+type PEP425Tag struct {
+	Python   string
+	ABI      string
+	Platform string
+}
+
+// ParseWheelName extracts all of the information in the name of a wheel. The
+// wheel naming format is described in PEP 427
+// (https://www.python.org/dev/peps/pep-0427/#file-name-convention). The name
+// and version will always be canonicalized if possible.
+func ParseWheelName(name string) (*WheelInfo, error) {
+	if !strings.HasSuffix(name, ".whl") {
+		return nil, fmt.Errorf("not a wheel filename: %q", name)
+	}
+	// Strip the suffix
+	name = name[:len(name)-4]
+	parts := strings.Split(name, "-")
+	if len(parts) != 5 && len(parts) != 6 {
+		return nil, fmt.Errorf("wheel name %q has %d elements, not 5 or 6", name, len(parts))
+	}
+	pwi := &WheelInfo{
+		Name:    parts[0],
+		Version: parts[1],
+	}
+	if len(parts) == 6 {
+		buildTag := parts[2]
+		split := strings.IndexFunc(buildTag, func(r rune) bool {
+			return !unicode.IsDigit(r)
+		})
+		if split == 0 { // Must start with at least one digit.
+			return nil, fmt.Errorf("invalid wheel name %q: build tag %q does not start with digit", name, buildTag)
+		} else if split == -1 {
+			split = len(buildTag)
+		}
+		num, err := strconv.Atoi(buildTag[:split])
+		if err != nil {
+			return nil, fmt.Errorf("invalid wheel name %q: %v", name, err)
+		}
+		pwi.BuildTag.Num = num
+		pwi.BuildTag.Tag = buildTag[split:]
+	}
+	tag := PEP425Tag{
+		Python:   parts[len(parts)-3],
+		ABI:      parts[len(parts)-2],
+		Platform: parts[len(parts)-1],
+	}
+	pwi.Platforms = expandPEP425Tag(tag)
+	return pwi, nil
+}
+
+// WheelMetadata extracts the metadata from a wheel file. The file format is
+// defined in PEP 427 (https://www.python.org/dev/peps/pep-0427/#file-format)
+// and is relatively simple compared to sdists. In particular: wheels can not
+// have a setup.py or setup.cfg and the metadata version must be 1.1 or greater.
+// This means that the metadata definitely supports dependencies and there is
+// nowhere else to specify them.
+func WheelMetadata(ctx context.Context, r io.ReaderAt, size int64) (*Metadata, error) {
+	var meta *Metadata
+	err := walkZipFiles(r, size, func(name string, r io.Reader) error {
+		// Metadata lives in <package-name>-<version>.dist-info/METADATA.
+		dir, name, ok := strings.Cut(name, "/")
+		if !ok {
+			return nil
+		}
+		if !strings.HasSuffix(dir, ".dist-info") {
+			return nil
+		}
+		if name != "METADATA" {
+			return nil
+		}
+		if meta != nil {
+			return UnsupportedError{
+				msg:         "multiple METADATA files",
+				packageType: "wheel",
+			}
+		}
+		b, err := io.ReadAll(r)
+		if err != nil {
+			return err
+		}
+		md, err := ParseMetadata(ctx, string(b))
+		if err != nil {
+			return err
+		}
+		meta = &md
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	if meta == nil {
+		return nil, UnsupportedError{
+			msg:         "no METADATA file",
+			packageType: "wheel",
+		}
+	}
+	return meta, nil
+}
+
+// expandPEP425Tag expands any compressed tag sets in the given tag to produce
+// the full set of supported systems. It uses the algorithm described in the PEP
+// (https://www.python.org/dev/peps/pep-0425/#compressed-tag-sets). Note this
+// can generate a fair number of impossible tags that are not supported by any
+// actual Python implementation.
+func expandPEP425Tag(tag PEP425Tag) []PEP425Tag {
+	var allTags []PEP425Tag
+	for _, py := range strings.Split(tag.Python, ".") {
+		for _, abi := range strings.Split(tag.ABI, ".") {
+			for _, plat := range strings.Split(tag.Platform, ".") {
+				allTags = append(allTags, PEP425Tag{
+					Python:   py,
+					ABI:      abi,
+					Platform: plat,
+				})
+			}
+		}
+	}
+	return allTags
+}
+
+// walkZipFiles walks through the files in a zip archive, applying the given
+// function one at a time to the name of the file and a reader containing its
+// contents until all files have been visited or the first error. Unfortunately
+// there is no clear way to avoid loading the whole file into memory; zip files
+// store their file listings at the end so it is not necessarily possible to
+// process them sequentially.
+func walkZipFiles(r io.ReaderAt, size int64, callback func(string, io.Reader) error) error {
+	zr, err := zip.NewReader(r, size)
+	if err != nil {
+		return err
+	}
+	for _, f := range zr.File {
+		rc, err := f.Open()
+		if err != nil {
+			return err
+		}
+		if err := callback(f.Name, rc); err != nil {
+			return err
+		}
+		if err := rc.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/util/pypi/wheel_test.go b/util/pypi/wheel_test.go
new file mode 100644
index 00000000..62059653
--- /dev/null
+++ b/util/pypi/wheel_test.go
@@ -0,0 +1,161 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pypi
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParseWheelName(t *testing.T) {
+	// TODO: Should the pyWheelInfo.Name fields go through canon.PackageName?
+	cases := []struct {
+		in  string
+		out *WheelInfo
+	}{
+		{
+			in: "generic-0.0.1-py2.py3-none-any.whl",
+			out: &WheelInfo{
+				Name:    "generic",
+				Version: "0.0.1",
+				Platforms: []PEP425Tag{{
+					Python:   "py2",
+					ABI:      "none",
+					Platform: "any",
+				}, {
+					Python:   "py3",
+					ABI:      "none",
+					Platform: "any",
+				}},
+			},
+		},
+		{
+			in: "very_generic-0.0.2-cp3.cp2-cp3m.cp2m-win_amd64.win32.whl",
+			out: &WheelInfo{
+				Name:    "very_generic",
+				Version: "0.0.2",
+				Platforms: []PEP425Tag{{
+					Python:   "cp3",
+					ABI:      "cp3m",
+					Platform: "win_amd64",
+				}, {
+					Python:   "cp3",
+					ABI:      "cp3m",
+					Platform: "win32",
+				}, {
+					Python:   "cp3",
+					ABI:      "cp2m",
+					Platform: "win_amd64",
+				}, {
+					Python:   "cp3",
+					ABI:      "cp2m",
+					Platform: "win32",
+				}, {
+					Python:   "cp2",
+					ABI:      "cp3m",
+					Platform: "win_amd64",
+				}, {
+					Python:   "cp2",
+					ABI:      "cp3m",
+					Platform: "win32",
+				}, {
+					Python:   "cp2",
+					ABI:      "cp2m",
+					Platform: "win_amd64",
+				}, {
+					Python:   "cp2",
+					ABI:      "cp2m",
+					Platform: "win32",
+				}},
+			},
+		},
+		{
+			in: "build_num-1.1.1.1.1-2a-cp3-cp3m-manylinux1_i686.whl",
+			out: &WheelInfo{
+				Name:    "build_num",
+				Version: "1.1.1.1.1",
+				BuildTag: WheelBuildTag{
+					Num: 2,
+					Tag: "a",
+				},
+				Platforms: []PEP425Tag{{
+					Python:   "cp3",
+					ABI:      "cp3m",
+					Platform: "manylinux1_i686",
+				}},
+			},
+		},
+		{
+			in: "long_num-1.2-12341234-cp3-cp3um-manylinux1_i686.whl",
+			out: &WheelInfo{
+				Name:    "long_num",
+				Version: "1.2",
+				BuildTag: WheelBuildTag{
+					Num: 12341234,
+				},
+				Platforms: []PEP425Tag{{
+					Python:   "cp3",
+					ABI:      "cp3um",
+					Platform: "manylinux1_i686",
+				}},
+			},
+		},
+		{
+			in:  "too_short-py3-macosx_10_6_intel.whl",
+			out: nil,
+		},
+		{
+			in:  "obvious-too-long-1.3.4-abcd--py3-none-any.whl",
+			out: nil,
+		},
+		{
+			in:  "not-a-wheel-at-all.zip",
+			out: nil,
+		},
+		{
+			in:  "badtag-1.1-ab123-cp2.cp3-cp2d.cp3d-linux_x86_64.whl",
+			out: nil,
+		},
+		// Some cases that are invalid are quite hard to distinguish.
+		{
+			in: "too-long-1.2.3-py2-none-win_amd64.whl",
+			out: &WheelInfo{
+				Name:    "too",
+				Version: "long",
+				BuildTag: struct {
+					Num int
+					Tag string
+				}{
+					Num: 1,
+					Tag: ".2.3",
+				},
+				Platforms: []PEP425Tag{{
+					Python:   "py2",
+					ABI:      "none",
+					Platform: "win_amd64",
+				}},
+			},
+		},
+	}
+	for _, c := range cases {
+		if got, err := ParseWheelName(c.in); c.out == nil && err == nil {
+			t.Errorf("parse wheel name %q: want error, got: %+v", c.in, got)
+		} else if c.out != nil && err != nil {
+			t.Errorf("parse wheel name %q: want success, got err: %v", c.in, err)
+		} else if c.out != nil && !reflect.DeepEqual(c.out, got) {
+			t.Errorf("parse wheel name %q:\nwant: %#v\n got: %#v", c.in, c.out, got)
+		}
+	}
+}