diff --git a/dissect/target/plugins/apps/container/docker.py b/dissect/target/plugins/apps/container/docker.py index ff360ed7c..a33dce5f9 100644 --- a/dissect/target/plugins/apps/container/docker.py +++ b/dissect/target/plugins/apps/container/docker.py @@ -4,7 +4,7 @@ import logging import re from pathlib import Path -from typing import Iterator, Optional +from typing import Iterator from dissect.cstruct import cstruct from dissect.util import ts @@ -128,12 +128,17 @@ def images(self) -> Iterator[DockerImageRecord]: for data_root in self.installs: images_path = data_root.joinpath("image/overlay2/repositories.json") - if images_path.exists(): - repositories = json.loads(images_path.read_text()).get("Repositories") - else: + if not images_path.exists(): self.target.log.debug("No docker images found, file %s does not exist.", images_path) continue + try: + repositories = json.loads(images_path.read_text()).get("Repositories", {}) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + self.target.log.warning("Unable to parse JSON in: %s", images_path) + self.target.log.debug("", exc_info=e) + continue + for name, tags in repositories.items(): for tag, hash in tags.items(): image_metadata_path = data_root.joinpath( @@ -142,8 +147,12 @@ def images(self) -> Iterator[DockerImageRecord]: created = None if image_metadata_path.exists(): - image_metadata = json.loads(image_metadata_path.read_text()) - created = convert_timestamp(image_metadata.get("created")) + try: + image_metadata = json.loads(image_metadata_path.read_text()) + created = convert_timestamp(image_metadata.get("created")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + self.target.log.warning("Unable to parse JSON in: %s", image_metadata_path) + self.target.log.debug("", exc_info=e) yield DockerImageRecord( name=name, @@ -160,47 +169,51 @@ def containers(self) -> Iterator[DockerContainerRecord]: for data_root in self.installs: for config_path in data_root.joinpath("containers").glob("**/config.v2.json"): - config = json.loads(config_path.read_text()) + try: + config = json.loads(config_path.read_text()) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + self.target.log.warning("Unable to parse JSON in file: %s", config_path) + self.target.log.debug("", exc_info=e) + continue + container_id = config.get("ID") # determine state - running = config.get("State").get("Running") + running = config.get("State", {}).get("Running") if running: - ports = config.get("NetworkSettings").get("Ports", {}) - pid = config.get("Pid") - else: - ports = config.get("Config").get("ExposedPorts", {}) - pid = None + ports = config.get("NetworkSettings", {}).get("Ports", {}) + + if not running or not ports: + ports = config.get("Config", {}).get("ExposedPorts", {}) # parse volumes volumes = [] - if mount_points := config.get("MountPoints"): - for mp in mount_points: - mount_point = mount_points[mp] + if mount_points := config.get("MountPoints", {}): + for mount_point in mount_points.values(): volumes.append(f"{mount_point.get('Source')}:{mount_point.get('Destination')}") # determine mount point mount_path = None - if config.get("Driver") == "overlay2": + if container_id and config.get("Driver") == "overlay2": mount_path = data_root.joinpath("image/overlay2/layerdb/mounts", container_id) if not mount_path.exists(): - self.target.log.warning("Overlay2 mount path for container %s does not exist!", container_id) + self.target.log.warning("Overlay2 mount path does not exist for container: %s", container_id) else: - self.target.log.warning("Encountered unsupported container filesystem %s", config.get("Driver")) + self.target.log.warning("Encountered unsupported container filesystem: %s", config.get("Driver")) yield DockerContainerRecord( container_id=container_id, - image=config.get("Config").get("Image"), - image_id=config.get("Image").split(":")[-1], - command=config.get("Config").get("Cmd"), + image=config.get("Config", {}).get("Image"), + image_id=config.get("Image", "").split(":")[-1], + command=f"{config.get('Path', '')} {' '.join(config.get('Args', []))}".strip(), created=convert_timestamp(config.get("Created")), running=running, - pid=pid, - started=convert_timestamp(config.get("State").get("StartedAt")), - finished=convert_timestamp(config.get("State").get("FinishedAt")), + pid=config.get("State", {}).get("Pid"), + started=convert_timestamp(config.get("State", {}).get("StartedAt")), + finished=convert_timestamp(config.get("State", {}).get("FinishedAt")), ports=convert_ports(ports), - names=config.get("Name").replace("/", "", 1), + names=config.get("Name", "").replace("/", "", 1), volumes=volumes, mount_path=mount_path, config_path=config_path, @@ -288,19 +301,19 @@ def _parse_json_log(self, path: Path) -> Iterator[dict]: for line in open_decompress(path, "rt"): try: entry = json.loads(line) - except json.JSONDecodeError as e: - self.target.log.warning("Could not decode JSON line in file %s", path) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + self.target.log.warning("Could not decode JSON line in file: %s", path) self.target.log.debug("", exc_info=e) continue yield entry -def get_data_path(path: Path) -> Optional[str]: +def get_data_path(path: Path) -> str | None: """Returns the configured Docker daemon data-root path.""" try: config = json.loads(path.open("rt").read()) - except json.JSONDecodeError as e: - log.warning("Could not read JSON file '%s'", path) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + log.warning("Could not read JSON file: %s", path) log.debug(exc_info=e) return config.get("data-root") @@ -341,7 +354,7 @@ def find_installs(target: Target) -> Iterator[Path]: yield data_root_path -def convert_timestamp(timestamp: str) -> str: +def convert_timestamp(timestamp: str | None) -> str: """Docker sometimes uses (unpadded) 9 digit nanosecond precision in their timestamp logs, eg. "2022-12-19T13:37:00.123456789Z". @@ -350,6 +363,9 @@ def convert_timestamp(timestamp: str) -> str: compatbility with the 6 digit %f microsecond directive. """ + if not timestamp: + return + timestamp_nanoseconds_plus_postfix = timestamp[19:] match = RE_DOCKER_NS.match(timestamp_nanoseconds_plus_postfix) diff --git a/tests/plugins/apps/container/test_docker.py b/tests/plugins/apps/container/test_docker.py index dd2b33be4..9ca209f51 100644 --- a/tests/plugins/apps/container/test_docker.py +++ b/tests/plugins/apps/container/test_docker.py @@ -1,4 +1,5 @@ import datetime +import json import operator from io import BytesIO from typing import Iterator @@ -14,6 +15,7 @@ find_installs, strip_log, ) +from dissect.target.plugins.os.unix._os import UnixPlugin from tests._utils import absolute_path @@ -134,3 +136,67 @@ def test_backspace_interpretation() -> None: '~ # \x1b[6necho \'\x08\x1b[J"ths \x08\x1b[J\x08\x1b[Js \x08\x1b[J\x08\x1b[Jis is a secret!" > secret.txt\r\n' ) assert strip_log(input, exc_backspace=True) == '~ # echo "this is a secret!" > secret.txt' + + +def test_regression_running_container_parsing(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test if we correctly discover and reconstruct exposed container ports and commands on a running container""" + + id = "deadbeef" + config = { + "ID": id, + "Image": "sha256:blabla", + "Name": "/foo", + "Path": "/bin/some-binary", + "Driver": "overlay2", + "Args": [ + "--some-argument 1", + "--another-argument 2", + ], + "State": { + "Running": True, + "Pid": 1337, + "StartedAt": "2024-12-31T13:37:00.123456789Z", + }, + "Config": { + "Hostname": "foo", + "ExposedPorts": { + "1337/tcp": {}, + "1337/udp": {}, + }, + "Image": "docker.io/debian", + }, + "NetworkSettings": { + "Ports": {}, + }, + "MountPoints": { + "/dest/file.txt": { + "Source": "/somewhere/on/host/file.txt", + "Destination": "/dest/file.txt", + }, + }, + } + + fs_unix.map_file_fh(f"/var/lib/docker/containers/{id}/config.v2.json", BytesIO(json.dumps(config).encode())) + fs_unix.map_file_fh("/etc/hostname", BytesIO(b"hostname")) + + target_unix.add_plugin(UnixPlugin) + target_unix.add_plugin(DockerPlugin) + + results = list(target_unix.docker.containers()) + assert len(results) == 1 + + assert results[0].hostname == "hostname" + assert results[0].container_id == id + assert results[0].image == "docker.io/debian" + assert results[0].image_id == "blabla" + assert results[0].command == "/bin/some-binary --some-argument 1 --another-argument 2" + assert results[0].created is None + assert results[0].running + assert results[0].pid == 1337 + assert results[0].started == datetime.datetime(2024, 12, 31, 13, 37, 0, 123456, tzinfo=datetime.timezone.utc) + assert results[0].finished is None + assert results[0].ports == str({"1337/tcp": "0.0.0.0:1337", "1337/udp": "0.0.0.0:1337"}) + assert results[0].names == "foo" + assert results[0].volumes == ["/somewhere/on/host/file.txt:/dest/file.txt"] + assert results[0].mount_path == "/var/lib/docker/image/overlay2/layerdb/mounts/deadbeef" + assert results[0].config_path == "/var/lib/docker/containers/deadbeef/config.v2.json"