From a8b169adb23fa26e67475c550f417c9adae1db0f Mon Sep 17 00:00:00 2001 From: Danny Canter Date: Sat, 31 Dec 2022 04:35:12 -0800 Subject: [PATCH] cgroup2: Add Kill method to manager This adds in support for killing all of the processes in a cgroup. In 5.14+ this is very simple, a cgroup.kill file exists that all you need to do is write "1" to https://lwn.net/Articles/855924/. On kernels prior, or if the file doesn't exist to be more pedantic to account for potential backports, I've taken the approach runc currently uses which is a manual process of freezing the cgroup -> sending a signal to all of the processes -> thawing the cgroup. This also adds in a simple test for this that should work on 5.15+ and prior kernels. Signed-off-by: Danny Canter --- README.md | 13 +++++++ cgroup2/manager.go | 84 ++++++++++++++++++++++++++++++++++++++++- cgroup2/manager_test.go | 48 +++++++++++++++++++++++ cgroup2/utils.go | 10 +++++ 4 files changed, 154 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ed823120..a90b8726 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,19 @@ if err != nil { } ``` +### Kill all processes in a cgroup + +```go +m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice") +if err != nil { + return err +} +err = m.Kill() +if err != nil { + return err +} +``` + ### Attention All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name diff --git a/cgroup2/manager.go b/cgroup2/manager.go index cef8cd9b..f64d482c 100644 --- a/cgroup2/manager.go +++ b/cgroup2/manager.go @@ -42,6 +42,7 @@ import ( const ( subtreeControl = "cgroup.subtree_control" controllersFile = "cgroup.controllers" + killFile = "cgroup.kill" defaultCgroup2Path = "/sys/fs/cgroup" defaultSlice = "system.slice" ) @@ -366,6 +367,86 @@ func (c *Manager) AddThread(tid uint64) error { return writeValues(c.path, []Value{v}) } +// Kill will try to forcibly exit all of the processes in the cgroup. This is +// equivalent to sending a SIGKILL to every process. On kernels 5.14 and greater +// this will use the cgroup.kill file, on anything that doesn't have the cgroup.kill +// file, a manual process of freezing -> sending a SIGKILL to every process -> thawing +// will be used. +func (c *Manager) Kill() error { + v := Value{ + filename: killFile, + value: "1", + } + err := writeValues(c.path, []Value{v}) + if err == nil { + return nil + } + logrus.Warnf("falling back to slower kill implementation: %s", err) + // Fallback to slow method. + return c.fallbackKill() +} + +// fallbackKill is a slower fallback to the more modern (kernels 5.14+) +// approach of writing to the cgroup.kill file. This is heavily pulled +// from runc's same approach (in signalAllProcesses), with the only differences +// being this is just tailored to the API exposed in this library, and we don't +// need to care about signals other than SIGKILL. +// +// https://github.com/opencontainers/runc/blob/8da0a0b5675764feaaaaad466f6567a9983fcd08/libcontainer/init_linux.go#L523-L529 +func (c *Manager) fallbackKill() error { + if err := c.Freeze(); err != nil { + logrus.Warn(err) + } + pids, err := c.Procs(true) + if err != nil { + if err := c.Thaw(); err != nil { + logrus.Warn(err) + } + return err + } + var procs []*os.Process + for _, pid := range pids { + p, err := os.FindProcess(int(pid)) + if err != nil { + logrus.Warn(err) + continue + } + procs = append(procs, p) + if err := p.Signal(unix.SIGKILL); err != nil { + logrus.Warn(err) + } + } + if err := c.Thaw(); err != nil { + logrus.Warn(err) + } + + subreaper, err := getSubreaper() + if err != nil { + // The error here means that PR_GET_CHILD_SUBREAPER is not + // supported because this code might run on a kernel older + // than 3.4. We don't want to throw an error in that case, + // and we simplify things, considering there is no subreaper + // set. + subreaper = 0 + } + + for _, p := range procs { + // In case a subreaper has been setup, this code must not + // wait for the process. Otherwise, we cannot be sure the + // current process will be reaped by the subreaper, while + // the subreaper might be waiting for this process in order + // to retrieve its exit code. + if subreaper == 0 { + if _, err := p.Wait(); err != nil { + if !errors.Is(err, unix.ECHILD) { + logrus.Warnf("wait on pid %d failed: %s", p.Pid, err) + } + } + } + } + return nil +} + func (c *Manager) Delete() error { // kernel prevents cgroups with running process from being removed, check the tree is empty processes, err := c.Procs(true) @@ -763,7 +844,8 @@ func setDevices(path string, devices []specs.LinuxDeviceCgroup) error { // the reason this is necessary is because the "-" character has a special meaning in // systemd slice. For example, when creating a slice called "my-group-112233.slice", // systemd will create a hierarchy like this: -// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice +// +// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice func getSystemdFullPath(slice, group string) string { return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group)) } diff --git a/cgroup2/manager_test.go b/cgroup2/manager_test.go index 667aa573..d243d579 100644 --- a/cgroup2/manager_test.go +++ b/cgroup2/manager_test.go @@ -142,6 +142,54 @@ func TestSystemdFullPath(t *testing.T) { } } +func TestKill(t *testing.T) { + checkCgroupMode(t) + manager, err := NewManager(defaultCgroup2Path, "/test1", ToResources(&specs.LinuxResources{})) + if err != nil { + t.Fatal(err) + } + var procs []*exec.Cmd + for i := 0; i < 5; i++ { + cmd := exec.Command("sleep", "infinity") + if err := cmd.Start(); err != nil { + t.Fatal(err) + } + if cmd.Process == nil { + t.Fatal("Process is nil") + } + if err := manager.AddProc(uint64(cmd.Process.Pid)); err != nil { + t.Fatal(err) + } + procs = append(procs, cmd) + } + // Verify we have 5 pids before beginning Kill below. + pids, err := manager.Procs(true) + if err != nil { + t.Fatal(err) + } + if len(pids) != 5 { + t.Fatalf("expected 5 pids, got %d", len(pids)) + } + // Now run kill, and check that nothing is running after. + if err := manager.Kill(); err != nil { + t.Fatal(err) + } + + done := make(chan struct{}) + go func() { + for _, proc := range procs { + _ = proc.Wait() + } + done <- struct{}{} + }() + + select { + case <-time.After(time.Second * 3): + t.Fatal("timed out waiting for processes to exit") + case <-done: + } +} + func TestMoveTo(t *testing.T) { checkCgroupMode(t) manager, err := NewManager(defaultCgroup2Path, "/test1", ToResources(&specs.LinuxResources{})) diff --git a/cgroup2/utils.go b/cgroup2/utils.go index 74bef4ac..77650183 100644 --- a/cgroup2/utils.go +++ b/cgroup2/utils.go @@ -26,12 +26,14 @@ import ( "strconv" "strings" "time" + "unsafe" "github.com/containerd/cgroups/v3/cgroup2/stats" "github.com/godbus/dbus/v5" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( @@ -434,3 +436,11 @@ func readHugeTlbStats(path string) []*stats.HugeTlbStat { } return usage } + +func getSubreaper() (int, error) { + var i uintptr + if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { + return -1, err + } + return int(i), nil +}