From d8390feaa99091d1ba9626bec0e4ba7072fc507a Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 19 Apr 2025 12:49:55 -0400 Subject: [PATCH 1/5] Extract _resolve_download_filename with test. --- setuptools/package_index.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 1a6abebcda..b317735097 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -807,9 +807,16 @@ def open_url(self, url, warning=None): # noqa: C901 # is too complex (12) else: raise DistutilsError(f"Download error for {url}: {v}") from v - def _download_url(self, url, tmpdir): - # Determine download filename - # + @staticmethod + def _resolve_download_filename(url, tmpdir): + """ + >>> du = PackageIndex._resolve_download_filename + >>> root = getfixture('tmp_path') + >>> url = 'https://files.pythonhosted.org/packages/a9/5a/0db.../setuptools-78.1.0.tar.gz' + >>> import pathlib + >>> str(pathlib.Path(du(url, root)).relative_to(root)) + 'setuptools-78.1.0.tar.gz' + """ name, _fragment = egg_info_for_url(url) if name: while '..' in name: @@ -820,8 +827,13 @@ def _download_url(self, url, tmpdir): if name.endswith('.egg.zip'): name = name[:-4] # strip the extra .zip before download - filename = os.path.join(tmpdir, name) + return os.path.join(tmpdir, name) + def _download_url(self, url, tmpdir): + """ + Determine the download filename. + """ + filename = self._resolve_download_filename(url, tmpdir) return self._download_vcs(url, filename) or self._download_other(url, filename) @staticmethod From 250a6d17978f9f6ac3ac887091f2d32886fbbb0b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 19 Apr 2025 13:03:47 -0400 Subject: [PATCH 2/5] Add a check to ensure the name resolves relative to the tmpdir. Closes #4946 --- setuptools/package_index.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/setuptools/package_index.py b/setuptools/package_index.py index b317735097..a8f868e22b 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -810,12 +810,20 @@ def open_url(self, url, warning=None): # noqa: C901 # is too complex (12) @staticmethod def _resolve_download_filename(url, tmpdir): """ + >>> import pathlib >>> du = PackageIndex._resolve_download_filename >>> root = getfixture('tmp_path') >>> url = 'https://files.pythonhosted.org/packages/a9/5a/0db.../setuptools-78.1.0.tar.gz' - >>> import pathlib >>> str(pathlib.Path(du(url, root)).relative_to(root)) 'setuptools-78.1.0.tar.gz' + + Ensures the target is always in tmpdir. + + >>> url = 'https://anyhost/%2fhome%2fuser%2f.ssh%2fauthorized_keys' + >>> du(url, root) + Traceback (most recent call last): + ... + ValueError: Invalid filename... """ name, _fragment = egg_info_for_url(url) if name: @@ -827,7 +835,13 @@ def _resolve_download_filename(url, tmpdir): if name.endswith('.egg.zip'): name = name[:-4] # strip the extra .zip before download - return os.path.join(tmpdir, name) + filename = os.path.join(tmpdir, name) + + # ensure path resolves within the tmpdir + if not filename.startswith(str(tmpdir)): + raise ValueError(f"Invalid filename {filename}") + + return filename def _download_url(self, url, tmpdir): """ From e409e8002932f2b86aae7b1abc8f8c2ebf96df2c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 19 Apr 2025 13:35:30 -0400 Subject: [PATCH 3/5] Extract _sanitize method for sanitizing the filename. --- setuptools/package_index.py | 58 +++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/setuptools/package_index.py b/setuptools/package_index.py index a8f868e22b..fdd0c825bf 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -808,7 +808,36 @@ def open_url(self, url, warning=None): # noqa: C901 # is too complex (12) raise DistutilsError(f"Download error for {url}: {v}") from v @staticmethod - def _resolve_download_filename(url, tmpdir): + def _sanitize(name): + r""" + Replace unsafe path directives with underscores. + + >>> san = PackageIndex._sanitize + >>> san('/home/user/.ssh/authorized_keys') + '_home_user_.ssh_authorized_keys' + >>> san('..\\foo\\bing') + '__foo_bing' + >>> san('D:bar') + 'D_bar' + >>> san('C:\\bar') + 'C__bar' + >>> san('foo..bar') + 'foo..bar' + >>> san('D:../foo') + 'D___foo' + """ + pattern = '|'.join(( + # drive letters + r':', + # path separators + r'[/\\]', + # parent dirs + r'(?:(?<=([/\\]|:))\.\.(?=[/\\]|$))|(?:^\.\.(?=[/\\]|$))', + )) + return re.sub(pattern, r'_', name) + + @classmethod + def _resolve_download_filename(cls, url, tmpdir): """ >>> import pathlib >>> du = PackageIndex._resolve_download_filename @@ -816,32 +845,19 @@ def _resolve_download_filename(url, tmpdir): >>> url = 'https://files.pythonhosted.org/packages/a9/5a/0db.../setuptools-78.1.0.tar.gz' >>> str(pathlib.Path(du(url, root)).relative_to(root)) 'setuptools-78.1.0.tar.gz' - - Ensures the target is always in tmpdir. - - >>> url = 'https://anyhost/%2fhome%2fuser%2f.ssh%2fauthorized_keys' - >>> du(url, root) - Traceback (most recent call last): - ... - ValueError: Invalid filename... """ name, _fragment = egg_info_for_url(url) - if name: - while '..' in name: - name = name.replace('..', '.').replace('\\', '_') - else: - name = "__downloaded__" # default if URL has no path contents + name = cls._sanitize( + name + or + # default if URL has no path contents + '__downloaded__' + ) if name.endswith('.egg.zip'): name = name[:-4] # strip the extra .zip before download - filename = os.path.join(tmpdir, name) - - # ensure path resolves within the tmpdir - if not filename.startswith(str(tmpdir)): - raise ValueError(f"Invalid filename {filename}") - - return filename + return os.path.join(tmpdir, name) def _download_url(self, url, tmpdir): """ From 2ca4a9fe4758fcd39d771d3d3a5b4840aacebdf7 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 19 Apr 2025 13:39:54 -0400 Subject: [PATCH 4/5] Rely on re.sub to perform the decision in one expression. --- setuptools/package_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setuptools/package_index.py b/setuptools/package_index.py index fdd0c825bf..3500c2d86f 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -854,8 +854,8 @@ def _resolve_download_filename(cls, url, tmpdir): '__downloaded__' ) - if name.endswith('.egg.zip'): - name = name[:-4] # strip the extra .zip before download + # strip any extra .zip before download + name = re.sub(r'\.egg\.zip$', '.egg', name) return os.path.join(tmpdir, name) From 8faf1d7e0ca309983252e4f21837b73ee12e960f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 19 Apr 2025 13:41:15 -0400 Subject: [PATCH 5/5] Add news fragment. --- newsfragments/4946.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/4946.bugfix.rst diff --git a/newsfragments/4946.bugfix.rst b/newsfragments/4946.bugfix.rst new file mode 100644 index 0000000000..b9100dc313 --- /dev/null +++ b/newsfragments/4946.bugfix.rst @@ -0,0 +1 @@ +More fully sanitized the filename in PackageIndex._download.