diff --git a/src/lando/api/tests/test_hg.py b/src/lando/api/tests/test_hg.py index d5281705a..9d476dff0 100644 --- a/src/lando/api/tests/test_hg.py +++ b/src/lando/api/tests/test_hg.py @@ -292,7 +292,7 @@ def test_integrated_hgrepo_patch_hgimport_fail_success( # Mock the internal method, so the public method can do exception conversion. original_run_hg = scm._run_hg - def run_hg_conflict_on_import(*args): + def run_hg_conflict_on_import(*args, **kwargs): # Fail the native import, but not the one using `patch` if args[0][0] == "import" and "ui.patch=patch" not in args[0]: raise hglib.error.CommandError( @@ -301,7 +301,7 @@ def run_hg_conflict_on_import(*args): b"", b"forced fail: hunk FAILED -- saving rejects to file", ) - return original_run_hg(*args) + return original_run_hg(*args, **kwargs) run_hg = mock.MagicMock() run_hg.side_effect = run_hg_conflict_on_import @@ -595,7 +595,7 @@ def test_HgSCM_apply_patch_git_conflict( # Mock the internal method, so the public method can do exception conversion. original_run_hg = scm._run_hg - def run_hg_conflict_on_import(*args): + def run_hg_conflict_on_import(*args, **kwargs): # Fail the native import, but not the one using `patch` if args[0][0] == "import" and "ui.patch=patch" not in args[0]: raise hglib.error.CommandError( @@ -604,7 +604,7 @@ def run_hg_conflict_on_import(*args): b"", b"forced fail: hunk FAILED -- saving rejects to file", ) - return original_run_hg(*args) + return original_run_hg(*args, **kwargs) run_hg = mock.MagicMock() run_hg.side_effect = run_hg_conflict_on_import diff --git a/src/lando/main/scm/git.py b/src/lando/main/scm/git.py index cdaeb9543..82a7533d4 100644 --- a/src/lando/main/scm/git.py +++ b/src/lando/main/scm/git.py @@ -24,6 +24,7 @@ from lando.settings import LANDO_USER_EMAIL, LANDO_USER_NAME from lando.utils.const import URL_USERINFO_RE from lando.utils.github import GitHub +from lando.utils.strings import truncate_text from .abstract_scm import AbstractSCM @@ -230,7 +231,12 @@ def add_diff_from_patches(self, patches: str) -> str: self._git_run("add", "-A", "-f", cwd=self.path) return self._git_run( - "diff", "--staged", "--binary", cwd=self.path, rstrip=False + "diff", + "--staged", + "--binary", + cwd=self.path, + rstrip=False, + truncate_log_output=True, ) @override @@ -248,6 +254,7 @@ def get_patch(self, revision_id: str) -> str | None: "-1", revision_id, cwd=self.path, + truncate_log_output=True, ) # We only return the patch if the `From` header indicates that it's the same as # the requested revision. This may not be the case when, e.g., `git @@ -567,7 +574,13 @@ def repo_is_supported(cls, path: str) -> bool: return True @classmethod - def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str: + def _git_run( + cls, + *args, + cwd: str | None = None, + rstrip: bool = True, + truncate_log_output: bool = False, + ) -> str: """Run a git command and return full output. Parameters: @@ -578,6 +591,12 @@ def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str: cwd: str Optional path to work in, default to '/' + truncate_log_output: bool + If `True`, only the head and tail of the command's output will be + logged. Use this for commands like `git format-patch` whose full + output is too large to be useful in logs. The return value is + unaffected. + Returns: str: the standard output of the command """ @@ -620,13 +639,14 @@ def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str: ) if out: + log_output = truncate_text(out) if truncate_log_output else out logger.info( "output from git command #%s: %s", correlation_id, - out, + log_output, extra={ "command_id": correlation_id, - "output": out, + "output": log_output, "path": cwd, }, ) diff --git a/src/lando/main/scm/hg.py b/src/lando/main/scm/hg.py index 86a9cb912..3b08f8390 100644 --- a/src/lando/main/scm/hg.py +++ b/src/lando/main/scm/hg.py @@ -35,6 +35,7 @@ TreeClosed, ) from lando.main.scm.helpers import GitPatchHelper, HgPatchHelper, PatchHelper +from lando.utils.strings import truncate_text logger = logging.getLogger(__name__) @@ -359,7 +360,9 @@ def _prevent_hg_modifications(self, diff_name: str) -> None: @override def get_patch(self, revision_id: str) -> str | None: """Return a complete patch for the given revision, in the git extended diff format.""" - out = self.run_hg(["export", "--git", "-r", revision_id]) + out = self.run_hg( + ["export", "--git", "-r", revision_id], truncate_log_output=True + ) try: return out.decode("utf-8") except UnicodeDecodeError: @@ -657,16 +660,21 @@ def run_hg_cmds(self, cmds: list[list[str]]) -> bytes: last_result = self.run_hg(cmd) return last_result - def run_hg(self, args: list[str]) -> bytes: + def run_hg(self, args: list[str], *, truncate_log_output: bool = False) -> bytes: """Run a single Mercurial command, and return its output. + If `truncate_log_output` is `True`, only the head and tail of the command's + output will be logged. Use this for commands like `hg export` whose + full output is too large to be useful in logs. The return value is + unaffected. + A specific HgException will be raised on error.""" try: - return self._run_hg(args) + return self._run_hg(args, truncate_log_output=truncate_log_output) except hglib.error.CommandError as exc: raise HgException.from_hglib_error(exc) from exc - def _run_hg(self, args: list[str]) -> bytes: + def _run_hg(self, args: list[str], *, truncate_log_output: bool = False) -> bytes: """Use hglib to run a Mercurial command, and return its output.""" correlation_id = str(uuid.uuid4()) command_string = " ".join(["hg"] + [shlex.quote(str(arg)) for arg in args]) @@ -697,7 +705,10 @@ def _run_hg(self, args: list[str]) -> bytes: out = out.getvalue() err = err.getvalue() if out: - out_string = (out.rstrip().decode(self.ENCODING, errors="replace"),) + decoded_output = out.rstrip().decode(self.ENCODING, errors="replace") + out_string = ( + truncate_text(decoded_output) if truncate_log_output else decoded_output + ) logger.info( "output from hg command #%s: %s", correlation_id, diff --git a/src/lando/utils/strings.py b/src/lando/utils/strings.py new file mode 100644 index 000000000..bbdc65244 --- /dev/null +++ b/src/lando/utils/strings.py @@ -0,0 +1,20 @@ +LOG_OUTPUT_HEAD_LIMIT = 500 +LOG_OUTPUT_TAIL_LIMIT = 200 + + +def truncate_text(text: str) -> str: + """Trim long text to its head and tail. + + Useful for keeping log volume bounded for commands like `hg export` or + `git format-patch` that emit an entire patch, while preserving the most + diagnostically useful portions (commit metadata at the start, summary or + error trailer at the end). + """ + total = len(text) + if total <= LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT: + return text + + head = text[:LOG_OUTPUT_HEAD_LIMIT] + tail = text[-LOG_OUTPUT_TAIL_LIMIT:] + omitted = total - LOG_OUTPUT_HEAD_LIMIT - LOG_OUTPUT_TAIL_LIMIT + return f"{head}\n...[{omitted} bytes omitted]...\n{tail}" diff --git a/src/lando/utils/tests/test_strings.py b/src/lando/utils/tests/test_strings.py new file mode 100644 index 000000000..2d2ca7be8 --- /dev/null +++ b/src/lando/utils/tests/test_strings.py @@ -0,0 +1,48 @@ +import pytest + +from lando.utils.strings import ( + LOG_OUTPUT_HEAD_LIMIT, + LOG_OUTPUT_TAIL_LIMIT, + truncate_text, +) + + +@pytest.mark.parametrize( + "case_name,input_string", + [ + ("empty", ""), + ("short", "hello world"), + ( + "just_under_boundary", + "x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT - 1), + ), + ("at_boundary", "x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT)), + ], +) +def test_truncate_text_passes_short_input_through(case_name, input_string): + assert ( + truncate_text(input_string) == input_string + ), f"`truncate_text` should leave `{case_name}` input unchanged." + + +@pytest.mark.parametrize("middle_size", [1, 1000, 100_000]) +def test_truncate_text_long_keeps_head_and_tail(middle_size): + head = "H" * LOG_OUTPUT_HEAD_LIMIT + middle = "M" * middle_size + tail = "T" * LOG_OUTPUT_TAIL_LIMIT + long_output = head + middle + tail + + result = truncate_text(long_output) + + assert result.startswith( + head + ), "Truncated text should preserve the first `LOG_OUTPUT_HEAD_LIMIT` characters." + assert result.endswith( + tail + ), "Truncated text should preserve the last `LOG_OUTPUT_TAIL_LIMIT` characters." + assert ( + f"[{middle_size} bytes omitted]" in result + ), "Truncated text should include a marker reporting the omitted byte count." + assert ( + "M" not in result + ), "Truncated text should not contain any of the omitted middle section."