diff --git a/cmd/detect.go b/cmd/detect.go new file mode 100644 index 00000000..3a48cab1 --- /dev/null +++ b/cmd/detect.go @@ -0,0 +1,196 @@ +// Copyright (c) 2023 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cmd + +import ( + "fmt" + "io/ioutil" + "os" + "strconv" + "strings" + "sync" +) + +var ( + selfPid = os.Getpid() + + pid2ProcPath = "/proc/2/status" + rockPath = "/.rock/metadata.yaml" + lxdPath = "/proc/1/environ" + dockerEnvPath = "/.dockerenv" + dockerInitPath = "/.dockerinit" + + // Confined may be initialised by a binary derived from + // from this repository. This provides an override mechanism + // to bypass (speed up) detection if it is not needed. + Confined *bool + + once sync.Once + failure error +) + +// IsInit returns true if the system manager is the first process +// started by the Linux kernel. +func IsInit() bool { + return selfPid == 1 +} + +// IsConfined works out if we are running inside a container. +// If the error is set, the detection result is meaningless. +func IsConfined() (bool, error) { + // This will not only force a single detection, but also block additional + // concurrent calls until the primary is complete. + once.Do(checks) + + if Confined == nil { + failure = fmt.Errorf("confined state was globally set to nil") + } + + if failure != nil { + return false, failure + } + + return *Confined, nil +} + +// checks is a curated list of checks for speedy detection of +// confined environments. The quickest most obvious checks should be +// performed first. The list is iterated until a confinement check returns +// true. If all the checks fail to detect a confined runtime, we can +// assume its a unconfined virtual/real machine. +// +// If the system manager is used as library for derived projects where +// its certain that no confinement exist, use the Confined global to +// bypass this check. +func checks() { + var res bool + var err error + + // Was Confined globally set already? + if Confined != nil { + return + } + + // If any check encounters an error, or if we reach a + // conclusion, we update the globals and return. + defer func() { + Confined = &res + failure = err + }() + + for _, c := range checkList { + res, err = c() + if err != nil || res { + return + } + } +} + +var checkList = []func() (bool, error){ + isRock, + isLxd, + isDocker, + noKernel, +} + +// isRock checks if it can access /.meta/metadata.yaml. +func isRock() (bool, error) { + _, err := os.Stat(rockPath) + if err == nil { + return true, nil + } else if os.IsNotExist(err) { + return false, nil + } else { + return false, fmt.Errorf("rock detection file stat returned an error") + } +} + +// isLxd checks if /proc/1/environ contains the "container=xxx" variable. This +// check should work for OCI compliant images in general. +func isLxd() (bool, error) { + _, err := os.Stat(lxdPath) + if err == nil { + s, err := ioutil.ReadFile(lxdPath) + if err != nil { + return false, err + } else { + lines := strings.Split(string(s), "\000") + for _, l := range lines { + kv := strings.Split(l, "=") + if kv[0] == "container" { + return true, nil + } + } + } + return false, nil + } else if os.IsNotExist(err) { + return false, nil + } else { + return false, fmt.Errorf("lxd/oci detection file stat returned an error") + } +} + +// isDocker checks for /.dockerenv or /.dockerinit +func isDocker() (bool, error) { + _, err1 := os.Stat(dockerInitPath) + _, err2 := os.Stat(dockerEnvPath) + if err1 == nil || err2 == nil { + return true, nil + } else if os.IsNotExist(err1) && os.IsNotExist(err2) { + return false, nil + } else { + return false, fmt.Errorf("docker detection file stat returned an error") + } +} + +// noKernel returns true if a kernel is not visible. The check will inspect +// the PPID of PID2 if it exists. If the PPID is zero its kernel owned, which +// strongly suggests we have complete PID visibility, and not confined. +// +// This check can be used to confirm the service manager is run inside of +// a container runtime. The following two known situations will result in +// invalid results: +// +// 1. If /proc is not mounted, it will return true +// 2. If docker passes through host pids, "docker run --pid host", it will +// detect the kernel, even though its inside a container. +// +// This is used as a last best effort test for container runtime cases not +// picked up by earlier tests. It is also very useful to verify that indeed +// the environment appears like a normal machine with unconfined access, as +// this is what the assumption will be. +func noKernel() (bool, error) { + // This path may not exist in a specific userspace, so we + // will not report any file not found errors. + s, err := ioutil.ReadFile(pid2ProcPath) + if err != nil && !os.IsNotExist(err) { + return false, err + } else if err == nil { + lines := strings.Split(string(s), "\n") + for _, l := range lines { + kv := strings.Split(l, "\t") + if len(kv) == 2 && kv[0] == "PPid:" { + ppid, err := strconv.Atoi(kv[1]) + if err != nil { + return false, err + } + if ppid == 0 { + return false, nil + } + } + } + } + return true, nil +} diff --git a/cmd/detect_test.go b/cmd/detect_test.go new file mode 100644 index 00000000..3330c567 --- /dev/null +++ b/cmd/detect_test.go @@ -0,0 +1,99 @@ +// Copyright (c) 2014-2023 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cmd_test + +import ( + "io/fs" + "io/ioutil" + "path/filepath" + "testing" + + . "gopkg.in/check.v1" + + "github.com/canonical/pebble/cmd" +) + +// Hook up check.v1 into the "go test" runner +func Test(t *testing.T) { TestingT(t) } + +type cmdTestSuite struct{} + +var _ = Suite(&cmdTestSuite{}) + +// createProcPid2Status creates a /proc//status file. +func createProcPid2Status(c *C, data string, perm fs.FileMode) string { + path := filepath.Join(c.MkDir(), "status") + err := ioutil.WriteFile(path, []byte(data), perm) + c.Assert(err, IsNil) + return path +} + +func (s *cmdTestSuite) TestNoKernelPathNotFound(c *C) { + defer cmd.MockPid2ProcPath("/1/2/3/4/5")() + v, err := cmd.NoKernel() + // We expect true because we cannot "see" the kernel. + // As stated in the function description, this is one + // of the expected cases, because we want to support + // systems without /proc mounted. + c.Assert(v, Equals, true) + c.Assert(err, IsNil) +} + +func (s *cmdTestSuite) TestNoKernelPathError(c *C) { + path := createProcPid2Status(c, "", 0o000) + defer cmd.MockPid2ProcPath(path)() + v, err := cmd.NoKernel() + c.Assert(v, Equals, false) + c.Assert(err, ErrorMatches, "*permission denied") +} + +func (s *cmdTestSuite) TestNoKernelValidPath(c *C) { + + for _, d := range []struct { + status string + container bool + err string + }{ + // Note the /proc//status format is: + // :\t + // The delimiter is a tab, not spaces. + {` +Pid: 2 +PPid: 0 +Something: 32`, false, ""}, + {` +Pid: 2 +PPid: 1 +Something: 32`, true, ""}, + {` +Pid: 2 +PPid: str +Something: 32`, false, "*invalid syntax*"}, + {` +something +1 2 3 4`, true, ""}, + } { + cmd.ResetContainerInit() + path := createProcPid2Status(c, d.status, 0o644) + defer cmd.MockPid2ProcPath(path)() + v, err := cmd.NoKernel() + c.Assert(v, Equals, d.container) + if d.err == "" { + c.Assert(err, IsNil) + } else { + c.Assert(err, ErrorMatches, d.err) + } + } +} diff --git a/cmd/export_test.go b/cmd/export_test.go new file mode 100644 index 00000000..526e8431 --- /dev/null +++ b/cmd/export_test.go @@ -0,0 +1,57 @@ +// Copyright (c) 2014-2023 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cmd + +import ( + "sync" +) + +var ( + IsRock = isRock + IsDocker = isDocker + IsLxd = isLxd + NoKernel = noKernel +) + +// MockPid2ProcPath assigns a temporary path to where the PID2 +// status can be found. +func MockPid2ProcPath(path string) (restore func()) { + orig := pid2ProcPath + pid2ProcPath = path + return func() { pid2ProcPath = orig } +} + +// MockPid allows faking the pid of this process +func MockPid(pid int) (restore func()) { + orig := selfPid + selfPid = pid + return func() { selfPid = orig } +} + +// MockVersion allows mocking the version which would +// otherwise only be real once the generator script +// has run. +func MockVersion(version string) (restore func()) { + old := Version + Version = version + return func() { Version = old } +} + +// ResetContainerInit forces the container runtime check +// to retry with globals reset +func ResetContainerInit() { + once = sync.Once{} + Confined = nil +} diff --git a/cmd/version.go b/cmd/version.go index 95d43d4f..c19e7136 100644 --- a/cmd/version.go +++ b/cmd/version.go @@ -18,9 +18,3 @@ package cmd // Version will be overwritten at build-time via mkversion.sh var Version = "unknown" - -func MockVersion(version string) (restore func()) { - old := Version - Version = version - return func() { Version = old } -} diff --git a/internals/overlord/overlord.go b/internals/overlord/overlord.go index 771a670d..89778dac 100644 --- a/internals/overlord/overlord.go +++ b/internals/overlord/overlord.go @@ -27,6 +27,7 @@ import ( "github.com/canonical/x-go/randutil" "gopkg.in/tomb.v2" + "github.com/canonical/pebble/cmd" "github.com/canonical/pebble/internals/osutil" "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/overlord/cmdstate" @@ -147,12 +148,17 @@ func loadState(statePath string, restartHandler restart.Handler, backend state.B if err != nil { return nil, fmt.Errorf("fatal: cannot find current boot ID: %v", err) } - // If pebble is PID 1 we don't care about /proc/sys/kernel/random/boot_id - // as we are most likely running in a container. LXD mounts it's own boot_id - // to correctly emulate the boot_id behaviour of non-containerized systems. - // Within containerd/docker, boot_id is consistent with the host, which provides - // us no context of restarts, so instead fallback to /proc/sys/kernel/random/uuid. - if os.Getpid() == 1 { + + confined, err := cmd.IsConfined() + if err != nil { + return nil, fmt.Errorf("fatal: confinement detection returned an error: %v", err) + } + if confined { + // We need a unique boot id to support failed reboot detection logic in the + // overlord. This is not guaranteed for a container runtime because not + // all implementations (e.g. Docker) updates the boot id on restart of the + // container. In this case we always return a different id on request, + // which will disable reboot failure detection for container runtimes. curBootID, err = randutil.RandomKernelUUID() if err != nil { return nil, fmt.Errorf("fatal: cannot generate psuedo boot-id: %v", err)