diff --git a/src/lando/api/legacy/workers/landing_worker.py b/src/lando/api/legacy/workers/landing_worker.py index ff234ff4f..95f0b10af 100644 --- a/src/lando/api/legacy/workers/landing_worker.py +++ b/src/lando/api/legacy/workers/landing_worker.py @@ -147,7 +147,7 @@ def run_job(self, job: LandingJob) -> bool: # NOTE: This may need to happen on the revision-level when stack support is added. pull_number = job.revisions.first().pull_number message = f"Pull request closed by commit {commit_id}" - client = GitHubAPIClient(job.target_repo) + client = GitHubAPIClient(job.target_repo.url) client.add_comment_to_pull_request(pull_number, message) client.close_pull_request(pull_number) diff --git a/src/lando/api/views.py b/src/lando/api/views.py index 3e9bb58a5..6a056f993 100644 --- a/src/lando/api/views.py +++ b/src/lando/api/views.py @@ -209,7 +209,7 @@ class Form(forms.Form): # base_ref = forms.CharField() target_repo = Repo.objects.get(name=repo_name) - client = GitHubAPIClient(target_repo) + client = GitHubAPIClient(target_repo.url) ldap_username = request.user.email pull_request = PullRequest(client.get_pull_request(pull_number)) form = Form(json.loads(request.body)) diff --git a/src/lando/main/scm/git.py b/src/lando/main/scm/git.py index 5c99cef1a..e0b443b82 100644 --- a/src/lando/main/scm/git.py +++ b/src/lando/main/scm/git.py @@ -22,6 +22,8 @@ ) from lando.main.scm.helpers import GitPatchHelper, PatchHelper from lando.settings import LANDO_USER_EMAIL, LANDO_USER_NAME +from lando.utils.const import URL_USERINFO_RE +from lando.utils.github import GitHub from .abstract_scm import AbstractSCM @@ -33,24 +35,6 @@ ENV_COMMITTER_NAME = "GIT_COMMITTER_NAME" ENV_COMMITTER_EMAIL = "GIT_COMMITTER_EMAIL" -# From RFC-3986 [0]: -# -# userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -# -# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -# pct-encoded = "%" HEXDIG HEXDIG -# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -# / "*" / "+" / "," / ";" / "= -# -# [0] https://www.rfc-editor.org/rfc/rfc3986 -URL_USERINFO_RE = re.compile( - "(?P[-A-Za-z0-9:._~%!$&'*()*+;=]*:[-A-Za-z0-9:._~%!$&'*()*+;=]*@)", - flags=re.MULTILINE, -) -GITHUB_URL_RE = re.compile( - f"https://{URL_USERINFO_RE.pattern}?github.com/(?P[-A-Za-z0-9]+)/(?P[^/]+)" -) - class GitSCM(AbstractSCM): """An implementation of the AbstractVCS for Git, for use by the Repo and LandingWorkers.""" @@ -109,34 +93,13 @@ def push( tags: list[str] | None = None, ): """Push local code to the remote repository.""" - from lando.utils.github import GitHubAPI - push_command = ["push"] if force_push: push_command += ["--force"] - if match := re.match(GITHUB_URL_RE, push_path): - # We only fetch a token if no authentication is explicitly specified in - # the push_url. - if not match["userinfo"]: - logger.info( - "Obtaining fresh GitHub token repo", - extra={ - "push_path": push_path, - "repo_name": match["repo"], - "repo_owner": match["owner"], - }, - ) - - owner = match["owner"] - repo = match["repo"] - repo_name = repo.removesuffix(".git") - - token = GitHubAPI._get_token(owner, repo_name) - - if token: - push_path = f"https://git:{token}@github.com/{owner}/{repo}" + if GitHub.is_supported_url(push_path): + push_path = GitHub(push_path).authenticated_url push_command += [push_path] diff --git a/src/lando/main/tests/test_git.py b/src/lando/main/tests/test_git.py index 9c413047b..9f192ce6d 100644 --- a/src/lando/main/tests/test_git.py +++ b/src/lando/main/tests/test_git.py @@ -8,7 +8,7 @@ from pathlib import Path from textwrap import dedent from unittest import mock -from unittest.mock import MagicMock +from unittest.mock import MagicMock, PropertyMock import pytest @@ -670,30 +670,35 @@ def test_GitSCM_push( @pytest.fixture -def mock_github_api_get_token(monkeypatch: pytest.MonkeyPatch): - mock_get_token = MagicMock() - mock_get_token.side_effect = ["ghs_yolo"] +def mock_github_authenticated_url(monkeypatch: pytest.MonkeyPatch): + mock_authenticated_url = PropertyMock() - monkeypatch.setattr("lando.utils.github.GitHubAPI._get_token", mock_get_token) + mock_authenticated_url.return_value = ( + "ssh+git:ghs_yolo@github.com/some-org/some-repo" + ) + + monkeypatch.setattr( + "lando.utils.github.GitHub.authenticated_url", mock_authenticated_url + ) - return mock_get_token + return mock_authenticated_url -def test_GitSCM_push_get_github_token( - git_repo: Path, mock_github_api_get_token: mock.Mock +def test_GitSCM_push_github_authenticated_url( + git_repo: Path, mock_github_authenticated_url: mock.Mock ): scm = GitSCM(str(git_repo)) scm._git_run = MagicMock() scm.push("https://github.com/some/repo") - assert scm._git_run.call_count == 1, "_git_run wasn't called when pushing" + assert scm._git_run.call_count >= 1, "_git_run wasn't called when pushing" assert ( - mock_github_api_get_token.call_count == 1 - ), "_get_github_token wasn't called when pushing to a github-like URL" + mock_github_authenticated_url.call_count == 1 + ), "GitHub.authenticated_url wasn't accessed when pushing to a github-like URL" assert ( "git:ghs_yolo@github.com" in scm._git_run.call_args[0][1] - ), "github token not found in rewritten push_path" + ), "GitHub authenticated_url was not found in rewritten push_path" @pytest.mark.parametrize( diff --git a/src/lando/ui/pull_requests.py b/src/lando/ui/pull_requests.py index 928fe5e33..d3bfc639f 100644 --- a/src/lando/ui/pull_requests.py +++ b/src/lando/ui/pull_requests.py @@ -19,7 +19,7 @@ def get( ) -> TemplateResponse: """Handle the GET request for the pull request view.""" target_repo = Repo.objects.get(name=repo_name) - client = GitHubAPIClient(target_repo) + client = GitHubAPIClient(target_repo.url) pull_request = PullRequest(client.get_pull_request(number)) landing_jobs = get_jobs_for_pull(target_repo, number) diff --git a/src/lando/utils/const.py b/src/lando/utils/const.py new file mode 100644 index 000000000..128ed0695 --- /dev/null +++ b/src/lando/utils/const.py @@ -0,0 +1,16 @@ +import re + +# From RFC-3986 [0]: +# +# userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +# +# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +# pct-encoded = "%" HEXDIG HEXDIG +# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +# / "*" / "+" / "," / ";" / "= +# +# [0] https://www.rfc-editor.org/rfc/rfc3986 +URL_USERINFO_RE = re.compile( + "(?P[-A-Za-z0-9:._~%!$&'*()*+;=]*:[-A-Za-z0-9:._~%!$&'*()*+;=]*@)", + flags=re.MULTILINE, +) diff --git a/src/lando/utils/github.py b/src/lando/utils/github.py index 336e8d3a1..c6479dadd 100644 --- a/src/lando/utils/github.py +++ b/src/lando/utils/github.py @@ -1,36 +1,82 @@ import asyncio import logging +import re import requests from django.conf import settings from simple_github import AppAuth, AppInstallationAuth -from lando.main.models.repo import Repo +from lando.utils.const import URL_USERINFO_RE logger = logging.getLogger(__name__) -class GitHubAPI: - """A simple wrapper that authenticates with and communicates with the GitHub API.""" +class GitHub: + """Work with authentication to GitHub repositories.""" - GITHUB_BASE_URL = "https://api.github.com" + GITHUB_URL_RE = re.compile( + rf"https://{URL_USERINFO_RE.pattern}?github.com/(?P[-A-Za-z0-9]+)/(?P[^/]+?)(?:\.git)?(?:/|$)" + ) - def __init__(self, repo: Repo): - repo_owner = repo._github_repo_org - repo_name = repo.git_repo_name + repo_url: str + repo_owner: str + repo_name: str + userinfo: str - self.session = requests.Session() - self.session.headers.update( - { - "Authorization": f"Bearer {self._get_token(repo_owner, repo_name)}", - "User-Agent": settings.HTTP_USER_AGENT, - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - } + def __init__(self, repo_url: str): + self.repo_url = repo_url + + parsed_url_data = self.parse_url(self.repo_url) + + if parsed_url_data is None: + raise ValueError(f"Cannot parse URL as GitHub repo: {repo_url}") + + self.repo_owner = parsed_url_data["owner"] + self.repo_name = parsed_url_data["repo"] + self.userinfo = parsed_url_data["userinfo"] + + @classmethod + def is_supported_url(cls, url: str) -> bool: + """Determine whether the passed URL is a supported GitHub URL.""" + return cls.parse_url(url) is not None + + @classmethod + def parse_url(cls, url: str) -> re.Match[str] | None: + """Parse GitHub data from URL, or return None if not Github. + + Note: no normalisation is performed on the URL + """ + return re.match(cls.GITHUB_URL_RE, url) + + @property + def authenticated_url(self) -> str: + """Return an authenticated URL, suitable for use with `git` to push and pull. + + If the URL already has authentication parameters, it is returned verbatim. If + not, a token is fetched by the GitHub app, and inserted into the USERINFO part of + the URL, without any other changes (e.g., in the REST path or Query String). + """ + if self.userinfo: + # We only fetch a token if no authentication is explicitly specified in + # the repo_url. + return self.repo_url + + logger.info( + f"Obtaining fresh GitHub token for GitHub repo at {self.repo_url}", ) - @staticmethod - def _get_token(repo_owner: str, repo_name: str) -> str | None: + token = self._fetch_token() + + if token: + return self.repo_url.replace( + "https://github.com", f"https://git:{token}@github.com" + ) + + # We didn't get a token + logger.warning(f"Couldn't obtain a token for GitHub repo at {self.repo_url}") + return self.repo_url + + def _fetch_token(self) -> str | None: """Obtain a fresh GitHub token to push to the specified repo. This relies on GITHUB_APP_ID and GITHUB_APP_PRIVKEY to be set in the @@ -44,7 +90,7 @@ def _get_token(repo_owner: str, repo_name: str) -> str | None: if not app_id or not private_key: logger.warning( - f"Missing GITHUB_APP_ID or GITHUB_APP_PRIVKEY to authenticate against GitHub repo {repo_owner}/{repo_name}", + f"Missing GITHUB_APP_ID or GITHUB_APP_PRIVKEY to authenticate against GitHub repo at {self.repo_url}", ) return None @@ -52,9 +98,32 @@ def _get_token(repo_owner: str, repo_name: str) -> str | None: app_id, private_key, ) - session = AppInstallationAuth(app_auth, repo_owner, repositories=[repo_name]) + session = AppInstallationAuth( + app_auth, self.repo_owner, repositories=[self.repo_name] + ) return asyncio.run(session.get_token()) + +class GitHubAPI(GitHub): + """A simple wrapper that authenticates with and communicates with the GitHub API.""" + + session: requests.Session + + GITHUB_BASE_URL = "https://api.github.com" + + def __init__(self, repo_url: str): + super().__init__(repo_url) + + self.session = requests.Session() + self.session.headers.update( + { + "Authorization": f"Bearer {self._fetch_token()}", + "User-Agent": settings.HTTP_USER_AGENT, + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + ) + def get(self, path: str, *args, **kwargs) -> requests.Response: """Send a GET request to the GitHub API with given args and kwargs.""" url = f"{self.GITHUB_BASE_URL}/{path}" @@ -71,12 +140,9 @@ class GitHubAPIClient: _api: GitHubAPI - def __init__(self, repo: Repo): - self._api = GitHubAPI(repo) - self.repo = repo - self.repo_base_url = ( - f"repos/{self.repo._github_repo_org}/{self.repo.git_repo_name}" - ) + def __init__(self, repo_url: str): + self._api = GitHubAPI(repo_url) + self.repo_base_url = f"repos/{self._api.repo_owner}/{self._api.repo_name}" def _repo_get(self, subpath: str, *args, **kwargs) -> dict | list: """Get API endpoint scoped to the repo_base_url. diff --git a/src/lando/utils/tests/test_github.py b/src/lando/utils/tests/test_github.py new file mode 100644 index 000000000..fbb6d04dd --- /dev/null +++ b/src/lando/utils/tests/test_github.py @@ -0,0 +1,100 @@ +from unittest import mock + +import pytest + +from lando.utils.github import GitHub, GitHubAPI, GitHubAPIClient + + +@pytest.mark.parametrize( + "url, expected_support", + ( + ("https://github.com/mozilla-firefox/firefox", True), + ("https://github.com/mozilla-firefox/firefox/", True), + ("https://someuser:somepass@github.com/owner/repo.git/", True), + ("http://git.test/test-repo/", False), + ("https://hg.mozilla.org/mozilla-central/", False), + ), +) +def test_github_is_supported(url: str, expected_support: bool): + assert ( + GitHub.is_supported_url(url) == expected_support + ), f"Support for {url} incorrectly determined" + + +@pytest.mark.parametrize( + "url, expected_repo_owner, expected_repo_name", + ( + ("https://github.com/mozilla-firefox/firefox", "mozilla-firefox", "firefox"), + ("https://github.com/mozilla-firefox/firefox/", "mozilla-firefox", "firefox"), + ("https://someuser:somepass@github.com/owner/repo.git", "owner", "repo"), + ("https://someuser:somepass@github.com/owner/repo.git/", "owner", "repo"), + ), +) +def test_github_parsed_url(url: str, expected_repo_owner: str, expected_repo_name: str): + github = GitHub(url) + + assert github.repo_owner == expected_repo_owner, "Repo owner mismatch" + assert github.repo_name == expected_repo_name, "Repo name mismatch" + + +def test_github_parsed_url_not_github(): + with pytest.raises(ValueError): + GitHub("https://hg.mozilla.org/mozilla-central/") + + +@pytest.fixture +def mock_github_fetch_token(monkeypatch: pytest.MonkeyPatch) -> mock.Mock: + mock_fetch_token = mock.MagicMock() + mock_fetch_token.return_value = "token" + monkeypatch.setattr("lando.utils.github.GitHub._fetch_token", mock_fetch_token) + return mock_fetch_token + + +@pytest.mark.parametrize( + "url, expected_authenticated_url", + ( + ( + "https://github.com/mozilla-firefox/firefox/", + "https://git:token@github.com/mozilla-firefox/firefox/", + ), + ( + "https://github.com/mozilla-firefox/firefox.git/", + "https://git:token@github.com/mozilla-firefox/firefox.git/", + ), + ( + "https://github.com/mozilla-firefox/firefox.git/some?other#path", + "https://git:token@github.com/mozilla-firefox/firefox.git/some?other#path", + ), + ( + "https://someuser:somepass@github.com/owner/repo.git/", + "https://someuser:somepass@github.com/owner/repo.git/", + ), + ), +) +def test_github_authenticated_url( + mock_github_fetch_token: mock.Mock, url: str, expected_authenticated_url: str +): + assert GitHub(url).authenticated_url == expected_authenticated_url + + +def test_github_authenticated_url_no_token( + mock_github_fetch_token: mock.Mock, caplog: pytest.LogCaptureFixture +): + mock_github_fetch_token.return_value = None + + url = "https://github.com/mozilla-firefox/firefox/" + + assert GitHub(url).authenticated_url == url + assert "Couldn't obtain a token" in caplog.text + + +def test_github_api_init(mock_github_fetch_token: mock.Mock): + api_client = GitHubAPI("https://github.com/o/r") + + assert api_client.session.headers.get("Authorization") == "Bearer token" + + +def test_github_api_client_init(mock_github_fetch_token: mock.Mock): + api_client = GitHubAPIClient("https://github.com/o/r") + + assert api_client.repo_base_url == "repos/o/r"