Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/lando/api/tests/test_hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def test_integrated_hgrepo_patch_hgimport_fail_success(
# Mock the internal method, so the public method can do exception conversion.
original_run_hg = scm._run_hg

def run_hg_conflict_on_import(*args):
def run_hg_conflict_on_import(*args, **kwargs):
# Fail the native import, but not the one using `patch`
if args[0][0] == "import" and "ui.patch=patch" not in args[0]:
raise hglib.error.CommandError(
Expand All @@ -301,7 +301,7 @@ def run_hg_conflict_on_import(*args):
b"",
b"forced fail: hunk FAILED -- saving rejects to file",
)
return original_run_hg(*args)
return original_run_hg(*args, **kwargs)

run_hg = mock.MagicMock()
run_hg.side_effect = run_hg_conflict_on_import
Expand Down Expand Up @@ -595,7 +595,7 @@ def test_HgSCM_apply_patch_git_conflict(
# Mock the internal method, so the public method can do exception conversion.
original_run_hg = scm._run_hg

def run_hg_conflict_on_import(*args):
def run_hg_conflict_on_import(*args, **kwargs):
# Fail the native import, but not the one using `patch`
if args[0][0] == "import" and "ui.patch=patch" not in args[0]:
raise hglib.error.CommandError(
Expand All @@ -604,7 +604,7 @@ def run_hg_conflict_on_import(*args):
b"",
b"forced fail: hunk FAILED -- saving rejects to file",
)
return original_run_hg(*args)
return original_run_hg(*args, **kwargs)

run_hg = mock.MagicMock()
run_hg.side_effect = run_hg_conflict_on_import
Expand Down
28 changes: 24 additions & 4 deletions src/lando/main/scm/git.py
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In your original description you mentioned this issue is with the hg export command. Do we need this to apply to git commands as well?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, git diff content doesn't need to be emitted in full to the logs, hence why I added truncate_log_output below.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, for both the git cases, I think we could simply remove --stdout or use --output to prevent the unnecessary output in the first place (which may be a legacy implementation anyway). I wonder if this is something we can also do in hg commands? I think if we can avoid truncating output it would be best.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a way, if the output somehow ends up being incorrect, it's probably best to have some of it in the logs, for quicker identification, than none at all. Truncating would allow to retain that.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, yes, not using --stdout may be a good idea (likely with --output-directory and --numbered-files to get predictable names).

Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from lando.settings import LANDO_USER_EMAIL, LANDO_USER_NAME
from lando.utils.const import URL_USERINFO_RE
from lando.utils.github import GitHub
from lando.utils.strings import truncate_text

from .abstract_scm import AbstractSCM

Expand Down Expand Up @@ -230,7 +231,12 @@ def add_diff_from_patches(self, patches: str) -> str:

self._git_run("add", "-A", "-f", cwd=self.path)
return self._git_run(
"diff", "--staged", "--binary", cwd=self.path, rstrip=False
"diff",
"--staged",
"--binary",
cwd=self.path,
rstrip=False,
truncate_log_output=True,
)

@override
Expand All @@ -248,6 +254,7 @@ def get_patch(self, revision_id: str) -> str | None:
"-1",
revision_id,
cwd=self.path,
truncate_log_output=True,
)
# We only return the patch if the `From` header indicates that it's the same as
# the requested revision. This may not be the case when, e.g., `git
Expand Down Expand Up @@ -567,7 +574,13 @@ def repo_is_supported(cls, path: str) -> bool:
return True

@classmethod
def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str:
def _git_run(
cls,
*args,
cwd: str | None = None,
rstrip: bool = True,
truncate_log_output: bool = False,
) -> str:
"""Run a git command and return full output.

Parameters:
Expand All @@ -578,6 +591,12 @@ def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str:
cwd: str
Optional path to work in, default to '/'

truncate_log_output: bool
If `True`, only the head and tail of the command's output will be
logged. Use this for commands like `git format-patch` whose full
output is too large to be useful in logs. The return value is
unaffected.

Returns:
str: the standard output of the command
"""
Expand Down Expand Up @@ -620,13 +639,14 @@ def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str:
)

if out:
log_output = truncate_text(out) if truncate_log_output else out
logger.info(
"output from git command #%s: %s",
correlation_id,
out,
log_output,
extra={
"command_id": correlation_id,
"output": out,
"output": log_output,
"path": cwd,
},
)
Expand Down
21 changes: 16 additions & 5 deletions src/lando/main/scm/hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
TreeClosed,
)
from lando.main.scm.helpers import GitPatchHelper, HgPatchHelper, PatchHelper
from lando.utils.strings import truncate_text

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -359,7 +360,9 @@ def _prevent_hg_modifications(self, diff_name: str) -> None:
@override
def get_patch(self, revision_id: str) -> str | None:
"""Return a complete patch for the given revision, in the git extended diff format."""
out = self.run_hg(["export", "--git", "-r", revision_id])
out = self.run_hg(
["export", "--git", "-r", revision_id], truncate_log_output=True
)
try:
return out.decode("utf-8")
except UnicodeDecodeError:
Expand Down Expand Up @@ -657,16 +660,21 @@ def run_hg_cmds(self, cmds: list[list[str]]) -> bytes:
last_result = self.run_hg(cmd)
return last_result

def run_hg(self, args: list[str]) -> bytes:
def run_hg(self, args: list[str], *, truncate_log_output: bool = False) -> bytes:
"""Run a single Mercurial command, and return its output.

If `truncate_log_output` is `True`, only the head and tail of the command's
output will be logged. Use this for commands like `hg export` whose
full output is too large to be useful in logs. The return value is
unaffected.

A specific HgException will be raised on error."""
try:
return self._run_hg(args)
return self._run_hg(args, truncate_log_output=truncate_log_output)
except hglib.error.CommandError as exc:
raise HgException.from_hglib_error(exc) from exc

def _run_hg(self, args: list[str]) -> bytes:
def _run_hg(self, args: list[str], *, truncate_log_output: bool = False) -> bytes:
"""Use hglib to run a Mercurial command, and return its output."""
correlation_id = str(uuid.uuid4())
command_string = " ".join(["hg"] + [shlex.quote(str(arg)) for arg in args])
Expand Down Expand Up @@ -697,7 +705,10 @@ def _run_hg(self, args: list[str]) -> bytes:
out = out.getvalue()
err = err.getvalue()
if out:
out_string = (out.rstrip().decode(self.ENCODING, errors="replace"),)
decoded_output = out.rstrip().decode(self.ENCODING, errors="replace")
out_string = (
truncate_text(decoded_output) if truncate_log_output else decoded_output
)
logger.info(
"output from hg command #%s: %s",
correlation_id,
Expand Down
20 changes: 20 additions & 0 deletions src/lando/utils/strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
LOG_OUTPUT_HEAD_LIMIT = 500
LOG_OUTPUT_TAIL_LIMIT = 200


def truncate_text(text: str) -> str:
"""Trim long text to its head and tail.

Useful for keeping log volume bounded for commands like `hg export` or
`git format-patch` that emit an entire patch, while preserving the most
diagnostically useful portions (commit metadata at the start, summary or
error trailer at the end).
"""
total = len(text)
if total <= LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT:
return text

head = text[:LOG_OUTPUT_HEAD_LIMIT]
tail = text[-LOG_OUTPUT_TAIL_LIMIT:]
omitted = total - LOG_OUTPUT_HEAD_LIMIT - LOG_OUTPUT_TAIL_LIMIT
return f"{head}\n...[{omitted} bytes omitted]...\n{tail}"
48 changes: 48 additions & 0 deletions src/lando/utils/tests/test_strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest

from lando.utils.strings import (
LOG_OUTPUT_HEAD_LIMIT,
LOG_OUTPUT_TAIL_LIMIT,
truncate_text,
)


@pytest.mark.parametrize(
"case_name,input_string",
[
("empty", ""),
("short", "hello world"),
(
"just_under_boundary",
"x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT - 1),
),
("at_boundary", "x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT)),
],
)
def test_truncate_text_passes_short_input_through(case_name, input_string):
assert (
truncate_text(input_string) == input_string
), f"`truncate_text` should leave `{case_name}` input unchanged."


@pytest.mark.parametrize("middle_size", [1, 1000, 100_000])
def test_truncate_text_long_keeps_head_and_tail(middle_size):
head = "H" * LOG_OUTPUT_HEAD_LIMIT
middle = "M" * middle_size
tail = "T" * LOG_OUTPUT_TAIL_LIMIT
long_output = head + middle + tail

result = truncate_text(long_output)

assert result.startswith(
head
), "Truncated text should preserve the first `LOG_OUTPUT_HEAD_LIMIT` characters."
assert result.endswith(
tail
), "Truncated text should preserve the last `LOG_OUTPUT_TAIL_LIMIT` characters."
assert (
f"[{middle_size} bytes omitted]" in result
), "Truncated text should include a marker reporting the omitted byte count."
assert (
"M" not in result
), "Truncated text should not contain any of the omitted middle section."
Loading