You've already forked runc
mirror of
https://github.com/opencontainers/runc.git
synced 2025-11-09 13:00:56 +03:00
runc exec: use CLONE_INTO_CGROUP when available
It makes sense to make runc exec benefit from clone2(CLONE_INTO_CGROUP), if it is available. Since it requires a recent kernel and might not work, implement a fallback to older way of joining the cgroup. Based on: - https://go-review.googlesource.com/c/go/+/417695 - https://github.com/coreos/go-systemd/pull/458 - https://github.com/opencontainers/cgroups/pull/26 - https://github.com/opencontainers/runc/pull/4822 Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
@@ -310,18 +311,78 @@ func (p *setnsProcess) addIntoCgroupV2() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *setnsProcess) addIntoCgroup() error {
|
func (p *setnsProcess) addIntoCgroup() error {
|
||||||
|
if p.cmd.SysProcAttr.UseCgroupFD {
|
||||||
|
// We've used cgroupfd successfully, so the process is
|
||||||
|
// already in the proper cgroup, nothing to do here.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
if cgroups.IsCgroup2UnifiedMode() {
|
||||||
return p.addIntoCgroupV2()
|
return p.addIntoCgroupV2()
|
||||||
}
|
}
|
||||||
return p.addIntoCgroupV1()
|
return p.addIntoCgroupV1()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prepareCgroupFD sets up p.cmd to use clone3 with CLONE_INTO_CGROUP
|
||||||
|
// to join cgroup early, in p.cmd.Start. Returns an *os.File which
|
||||||
|
// must be closed by the caller after p.Cmd.Start return.
|
||||||
|
func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
|
||||||
|
if !cgroups.IsCgroup2UnifiedMode() {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
base := p.manager.Path("")
|
||||||
|
if base == "" { // No cgroup to join.
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
sub := ""
|
||||||
|
if p.process.SubCgroupPaths != nil {
|
||||||
|
sub = p.process.SubCgroupPaths[""]
|
||||||
|
}
|
||||||
|
cgroup := path.Join(base, sub)
|
||||||
|
if !strings.HasPrefix(cgroup, base) {
|
||||||
|
return nil, fmt.Errorf("bad sub cgroup path: %s", sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(base, sub, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC)
|
||||||
|
if err != nil {
|
||||||
|
if p.rootlessCgroups {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("can't open cgroup: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
|
||||||
|
if p.cmd.SysProcAttr == nil {
|
||||||
|
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||||
|
}
|
||||||
|
p.cmd.SysProcAttr.UseCgroupFD = true
|
||||||
|
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
|
||||||
|
|
||||||
|
return fd, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p *setnsProcess) start() (retErr error) {
|
func (p *setnsProcess) start() (retErr error) {
|
||||||
defer p.comm.closeParent()
|
defer p.comm.closeParent()
|
||||||
|
|
||||||
|
fd, err := p.prepareCgroupFD()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Get the "before" value of oom kill count.
|
// Get the "before" value of oom kill count.
|
||||||
oom, _ := p.manager.OOMKillCount()
|
oom, _ := p.manager.OOMKillCount()
|
||||||
err := p.startWithCPUAffinity()
|
|
||||||
|
err = p.startWithCPUAffinity()
|
||||||
|
if fd != nil {
|
||||||
|
fd.Close()
|
||||||
|
}
|
||||||
|
if err != nil && p.cmd.SysProcAttr.UseCgroupFD {
|
||||||
|
logrus.Debugf("exec with CLONE_INTO_CGROUP failed: %v; retrying without", err)
|
||||||
|
// SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
|
||||||
|
p.cmd.SysProcAttr.UseCgroupFD = false
|
||||||
|
err = p.startWithCPUAffinity()
|
||||||
|
}
|
||||||
|
|
||||||
// Close the child-side of the pipes (controlled by child).
|
// Close the child-side of the pipes (controlled by child).
|
||||||
p.comm.closeChild()
|
p.comm.closeChild()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -282,7 +282,7 @@ function check_exec_debug() {
|
|||||||
# Check we can't join non-existing subcgroup.
|
# Check we can't join non-existing subcgroup.
|
||||||
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
|
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
|
||||||
[ "$status" -ne 0 ]
|
[ "$status" -ne 0 ]
|
||||||
[[ "$output" == *" adding pid "*"o such file or directory"* ]]
|
[[ "$output" == *" cgroup"*"o such file or directory"* ]]
|
||||||
|
|
||||||
# Check we can join top-level cgroup (implicit).
|
# Check we can join top-level cgroup (implicit).
|
||||||
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
|
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
|
||||||
|
|||||||
Reference in New Issue
Block a user