1
0
mirror of https://github.com/opencontainers/runc.git synced 2025-11-09 13:00:56 +03:00

runc exec: use CLONE_INTO_CGROUP when available

It makes sense to make runc exec benefit from clone2(CLONE_INTO_CGROUP),
if it is available. Since it requires a recent kernel and might not work,
implement a fallback to older way of joining the cgroup.

Based on:
 - https://go-review.googlesource.com/c/go/+/417695
 - https://github.com/coreos/go-systemd/pull/458
 - https://github.com/opencontainers/cgroups/pull/26
 - https://github.com/opencontainers/runc/pull/4822

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin
2025-07-15 16:31:49 -07:00
parent 7d81b21c1a
commit 5af4dd4e64
2 changed files with 63 additions and 2 deletions

View File

@@ -16,6 +16,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"syscall"
"time" "time"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
@@ -310,18 +311,78 @@ func (p *setnsProcess) addIntoCgroupV2() error {
} }
func (p *setnsProcess) addIntoCgroup() error { func (p *setnsProcess) addIntoCgroup() error {
if p.cmd.SysProcAttr.UseCgroupFD {
// We've used cgroupfd successfully, so the process is
// already in the proper cgroup, nothing to do here.
return nil
}
if cgroups.IsCgroup2UnifiedMode() { if cgroups.IsCgroup2UnifiedMode() {
return p.addIntoCgroupV2() return p.addIntoCgroupV2()
} }
return p.addIntoCgroupV1() return p.addIntoCgroupV1()
} }
// prepareCgroupFD sets up p.cmd to use clone3 with CLONE_INTO_CGROUP
// to join cgroup early, in p.cmd.Start. Returns an *os.File which
// must be closed by the caller after p.Cmd.Start return.
func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
if !cgroups.IsCgroup2UnifiedMode() {
return nil, nil
}
base := p.manager.Path("")
if base == "" { // No cgroup to join.
return nil, nil
}
sub := ""
if p.process.SubCgroupPaths != nil {
sub = p.process.SubCgroupPaths[""]
}
cgroup := path.Join(base, sub)
if !strings.HasPrefix(cgroup, base) {
return nil, fmt.Errorf("bad sub cgroup path: %s", sub)
}
fd, err := cgroups.OpenFile(base, sub, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC)
if err != nil {
if p.rootlessCgroups {
return nil, nil
}
return nil, fmt.Errorf("can't open cgroup: %w", err)
}
logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
if p.cmd.SysProcAttr == nil {
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
p.cmd.SysProcAttr.UseCgroupFD = true
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
return fd, nil
}
func (p *setnsProcess) start() (retErr error) { func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent() defer p.comm.closeParent()
fd, err := p.prepareCgroupFD()
if err != nil {
return err
}
// Get the "before" value of oom kill count. // Get the "before" value of oom kill count.
oom, _ := p.manager.OOMKillCount() oom, _ := p.manager.OOMKillCount()
err := p.startWithCPUAffinity()
err = p.startWithCPUAffinity()
if fd != nil {
fd.Close()
}
if err != nil && p.cmd.SysProcAttr.UseCgroupFD {
logrus.Debugf("exec with CLONE_INTO_CGROUP failed: %v; retrying without", err)
// SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
p.cmd.SysProcAttr.UseCgroupFD = false
err = p.startWithCPUAffinity()
}
// Close the child-side of the pipes (controlled by child). // Close the child-side of the pipes (controlled by child).
p.comm.closeChild() p.comm.closeChild()
if err != nil { if err != nil {

View File

@@ -282,7 +282,7 @@ function check_exec_debug() {
# Check we can't join non-existing subcgroup. # Check we can't join non-existing subcgroup.
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"o such file or directory"* ]] [[ "$output" == *" cgroup"*"o such file or directory"* ]]
# Check we can join top-level cgroup (implicit). # Check we can join top-level cgroup (implicit).
runc exec test_busybox grep '^0::/$' /proc/self/cgroup runc exec test_busybox grep '^0::/$' /proc/self/cgroup