Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/lando/main/scm/git.py
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In your original description you mentioned this issue is with the hg export command. Do we need this to apply to git commands as well?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, git diff content doesn't need to be emitted in full to the logs, hence why I added truncate_log_output below.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, for both the git cases, I think we could simply remove --stdout or use --output to prevent the unnecessary output in the first place (which may be a legacy implementation anyway). I wonder if this is something we can also do in hg commands? I think if we can avoid truncating output it would be best.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a way, if the output somehow ends up being incorrect, it's probably best to have some of it in the logs, for quicker identification, than none at all. Truncating would allow to retain that.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, yes, not using --stdout may be a good idea (likely with --output-directory and --numbered-files to get predictable names).

Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from lando.settings import LANDO_USER_EMAIL, LANDO_USER_NAME
from lando.utils.const import URL_USERINFO_RE
from lando.utils.github import GitHub
from lando.utils.strings import truncate_output

from .abstract_scm import AbstractSCM

Expand Down Expand Up @@ -620,13 +621,14 @@ def _git_run(cls, *args, cwd: str | None = None, rstrip: bool = True) -> str:
)

if out:
truncated_output = truncate_output(out)
logger.info(
"output from git command #%s: %s",
correlation_id,
out,
truncated_output,
extra={
"command_id": correlation_id,
"output": out,
"output": truncated_output,
"path": cwd,
},
)
Expand Down
4 changes: 3 additions & 1 deletion src/lando/main/scm/hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
TreeClosed,
)
from lando.main.scm.helpers import GitPatchHelper, HgPatchHelper, PatchHelper
from lando.utils.strings import truncate_output

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -697,7 +698,8 @@ def _run_hg(self, args: list[str]) -> bytes:
out = out.getvalue()
err = err.getvalue()
if out:
out_string = (out.rstrip().decode(self.ENCODING, errors="replace"),)
decoded_output = out.rstrip().decode(self.ENCODING, errors="replace")
out_string = truncate_output(decoded_output)
logger.info(
"output from hg command #%s: %s",
correlation_id,
Expand Down
19 changes: 19 additions & 0 deletions src/lando/utils/strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
LOG_OUTPUT_HEAD_LIMIT = 500
LOG_OUTPUT_TAIL_LIMIT = 200


def truncate_output(output: str) -> str:
"""Trim long command output to its head and tail.

Keeps log volume bounded for commands like `hg export` or `git format-patch`
that emit an entire patch, while preserving the most diagnostically useful
portions (commit metadata at the start, summary or error trailer at the end).
"""
total = len(output)
if total <= LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT:
return output

head = output[:LOG_OUTPUT_HEAD_LIMIT]
tail = output[-LOG_OUTPUT_TAIL_LIMIT:]
omitted = total - LOG_OUTPUT_HEAD_LIMIT - LOG_OUTPUT_TAIL_LIMIT
return f"{head}\n...[{omitted} bytes omitted]...\n{tail}"
48 changes: 48 additions & 0 deletions src/lando/utils/tests/test_strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest

from lando.utils.strings import (
LOG_OUTPUT_HEAD_LIMIT,
LOG_OUTPUT_TAIL_LIMIT,
truncate_output,
)


@pytest.mark.parametrize(
"case_name,input_string",
[
("empty", ""),
("short", "hello world"),
(
"just_under_boundary",
"x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT - 1),
),
("at_boundary", "x" * (LOG_OUTPUT_HEAD_LIMIT + LOG_OUTPUT_TAIL_LIMIT)),
],
)
def test_truncate_output_passes_short_input_through(case_name, input_string):
assert (
truncate_output(input_string) == input_string
), f"`truncate_output` should leave `{case_name}` input unchanged."


@pytest.mark.parametrize("middle_size", [1, 1000, 100_000])
def test_truncate_output_long_keeps_head_and_tail(middle_size):
head = "H" * LOG_OUTPUT_HEAD_LIMIT
middle = "M" * middle_size
tail = "T" * LOG_OUTPUT_TAIL_LIMIT
long_output = head + middle + tail

result = truncate_output(long_output)

assert result.startswith(
head
), "Truncated output should preserve the first `LOG_OUTPUT_HEAD_LIMIT` characters."
assert result.endswith(
tail
), "Truncated output should preserve the last `LOG_OUTPUT_TAIL_LIMIT` characters."
assert (
f"[{middle_size} bytes omitted]" in result
), "Truncated output should include a marker reporting the omitted byte count."
assert (
"M" not in result
), "Truncated output should not contain any of the omitted middle section."
Loading