Skip to content

Commit

Permalink
Add support for compressed cloud-init file log parsing (#902)
Browse files Browse the repository at this point in the history
  • Loading branch information
JSCU-CNI authored Oct 16, 2024
1 parent 9cc9de4 commit cb69261
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 26 deletions.
59 changes: 43 additions & 16 deletions dissect/target/plugins/os/unix/log/messages.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from __future__ import annotations

import re
from datetime import datetime, timezone, tzinfo
from pathlib import Path
from typing import Iterator

from dissect.target import Target
from dissect.target.exceptions import UnsupportedPluginError
from dissect.target.helpers.fsutil import open_decompress
from dissect.target.helpers.record import TargetRecordDescriptor
from dissect.target.helpers.utils import year_rollover_helper
from dissect.target.plugin import Plugin, alias, export
Expand Down Expand Up @@ -66,7 +70,7 @@ def messages(self) -> Iterator[MessagesRecord]:

for log_file in self.log_files:
if "cloud-init" in log_file.name:
yield from self._parse_cloud_init_log(log_file)
yield from self._parse_cloud_init_log(log_file, tzinfo)
continue

for ts, line in year_rollover_helper(log_file, RE_TS, DEFAULT_TS_LOG_FORMAT, tzinfo):
Expand All @@ -83,7 +87,7 @@ def messages(self) -> Iterator[MessagesRecord]:
_target=self.target,
)

def _parse_cloud_init_log(self, log_file: Path) -> Iterator[MessagesRecord]:
def _parse_cloud_init_log(self, log_file: Path, tzinfo: tzinfo | None = timezone.utc) -> Iterator[MessagesRecord]:
"""Parse a cloud-init.log file.
Lines are structured in the following format:
Expand All @@ -96,18 +100,41 @@ def _parse_cloud_init_log(self, log_file: Path) -> Iterator[MessagesRecord]:
Returns: ``MessagesRecord``
"""
for line in log_file.open("rt").readlines():
if line := line.strip():
if match := RE_CLOUD_INIT_LINE.match(line):
match = match.groupdict()
yield MessagesRecord(
ts=match["ts"].split(",")[0],
daemon=match["daemon"],
pid=None,
message=match["message"],
source=log_file,
_target=self.target,
)
else:
self.target.log.warning("Could not match cloud-init log line")

ts_fmt = "%Y-%m-%d %H:%M:%S,%f"

with open_decompress(log_file, "rt") as fh:
for line in fh:
if not (line := line.strip()):
continue

if not (match := RE_CLOUD_INIT_LINE.match(line)):
self.target.log.warning("Could not match cloud-init log line in file: %s", log_file)
self.target.log.debug("No match for line '%s'", line)
continue

values = match.groupdict()

# Actual format is ``YYYY-MM-DD HH:MM:SS,000`` (asctime with milliseconds) but python has no strptime
# operator for 3 digit milliseconds, so we convert and pad to six digit microseconds.
# https://github.com/canonical/cloud-init/blob/main/cloudinit/log/loggers.py#DEFAULT_LOG_FORMAT
# https://docs.python.org/3/library/logging.html#asctime
raw_ts, _, milliseconds = values["ts"].rpartition(",")
raw_ts += "," + str((int(milliseconds) * 1000)).zfill(6)

try:
ts = datetime.strptime(raw_ts, ts_fmt).replace(tzinfo=tzinfo)

except ValueError as e:
self.target.log.warning("Timestamp '%s' does not match format '%s'", raw_ts, ts_fmt)
self.target.log.debug("", exc_info=e)
ts = datetime(1970, 1, 1, 0, 0, 0, 0)

yield MessagesRecord(
ts=ts,
daemon=values["daemon"],
pid=None,
message=values["message"],
source=log_file,
_target=self.target,
)
34 changes: 24 additions & 10 deletions tests/plugins/os/unix/log/test_messages.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import gzip
import tarfile
import textwrap
from datetime import datetime, timezone
from io import BytesIO
from unittest.mock import patch
from zoneinfo import ZoneInfo

from flow.record.fieldtypes import datetime as dt

from dissect.target import Target
from dissect.target.filesystem import VirtualFilesystem
from dissect.target.filesystems.tar import TarFilesystem
Expand Down Expand Up @@ -83,8 +82,8 @@ def test_unix_log_messages_compressed_timezone_year_rollover() -> None:
assert len(results) == 2
assert isinstance(results[0], type(MessagesRecord()))
assert isinstance(results[1], type(MessagesRecord()))
assert results[0].ts == dt(2020, 12, 31, 3, 14, 0, tzinfo=ZoneInfo("America/Chicago"))
assert results[1].ts == dt(2021, 1, 1, 13, 37, 0, tzinfo=ZoneInfo("America/Chicago"))
assert results[0].ts == datetime(2020, 12, 31, 3, 14, 0, tzinfo=ZoneInfo("America/Chicago"))
assert results[1].ts == datetime(2021, 1, 1, 13, 37, 0, tzinfo=ZoneInfo("America/Chicago"))


def test_unix_log_messages_malformed_log_year_rollover(target_unix_users: Target, fs_unix: VirtualFilesystem) -> None:
Expand All @@ -108,19 +107,34 @@ def test_unix_log_messages_malformed_log_year_rollover(target_unix_users: Target
assert len(results) == 2


def test_unix_messages_cloud_init(target_unix_users: Target, fs_unix: VirtualFilesystem) -> None:
def test_unix_messages_cloud_init(target_unix: Target, fs_unix: VirtualFilesystem) -> None:
"""test if we correctly parse plaintext and compressed cloud-init log files."""

messages = """
2005-08-09 11:55:21,000 - foo.py[DEBUG]: This is a cloud-init message!
2005-08-09 11:55:21,001 - util.py[DEBUG]: Cloud-init v. 1.2.3-4ubuntu5 running 'init-local' at Tue, 9 Aug 2005 11:55:21 +0000. Up 13.37 seconds.
""" # noqa: E501
fs_unix.map_file_fh("/var/log/installer/cloud-init.log", BytesIO(textwrap.dedent(messages).encode()))
msg_bytes = textwrap.dedent(messages).encode()

target_unix_users.add_plugin(MessagesPlugin)
fs_unix.map_file_fh("/etc/timezone", BytesIO(b"Europe/Amsterdam"))
fs_unix.map_file_fh("/var/log/installer/cloud-init.log", BytesIO(msg_bytes))
fs_unix.map_file_fh("/var/log/installer/cloud-init.log.1.gz", BytesIO(gzip.compress(msg_bytes)))
target_unix.add_plugin(MessagesPlugin)

results = list(target_unix_users.messages())
assert len(results) == 2
assert results[0].ts == dt(2005, 8, 9, 11, 55, 21)
results = sorted(list(target_unix.messages()), key=lambda r: r.source)
assert len(results) == 4

assert results[0].ts == datetime(2005, 8, 9, 11, 55, 21, 0, tzinfo=ZoneInfo("Europe/Amsterdam"))
assert results[0].daemon == "foo.py"
assert results[0].pid is None
assert results[0].message == "This is a cloud-init message!"
assert results[0].source == "/var/log/installer/cloud-init.log"

assert results[-1].ts == datetime(2005, 8, 9, 11, 55, 21, 1_000, tzinfo=ZoneInfo("Europe/Amsterdam"))
assert results[-1].daemon == "util.py"
assert results[-1].pid is None
assert (
results[-1].message
== "Cloud-init v. 1.2.3-4ubuntu5 running 'init-local' at Tue, 9 Aug 2005 11:55:21 +0000. Up 13.37 seconds." # noqa: E501
)
assert results[-1].source == "/var/log/installer/cloud-init.log.1.gz"

0 comments on commit cb69261

Please sign in to comment.