From 3834222d88beae5b40e04c3359e76156758ec5e7 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 14 May 2020 19:12:54 -0700 Subject: [PATCH 01/12] libct/cgroups/utils: getControllerPath return err for v2 This function is not used and were never used in any cgroupv2 code. To have it stay that way, let it return error in case it's called for v2. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index a9486818..f54ac64e 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -418,7 +418,7 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { if IsCgroup2UnifiedMode() { - return "/", nil + return "", errUnified } if p, ok := cgroups[subsystem]; ok { From 44b75e760ece4fe09a7cd8c56540aff00cad732f Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 16 Jun 2020 10:20:31 -0700 Subject: [PATCH 02/12] libct/cgroups: separate getCgroupMountsV1 This function should not really be used for cgroupv2 code. Currently it is used in kubernetes code, so we can't remove the v2 case yet. Add a TODO item to remove v2 code once kubernetes is converted to not use it, and separate out v1 code. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index f54ac64e..cb3e9a12 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -219,8 +219,11 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, // GetCgroupMounts returns the mounts for the cgroup subsystems. // all indicates whether to return just the first instance or all the mounts. +// This function should not be used from cgroupv2 code, as in this case +// all the controllers are available under the constant unifiedMountpoint. func GetCgroupMounts(all bool) ([]Mount, error) { if IsCgroup2UnifiedMode() { + // TODO: remove cgroupv2 case once all external users are converted availableControllers, err := GetAllSubsystems() if err != nil { return nil, err @@ -233,6 +236,10 @@ func GetCgroupMounts(all bool) ([]Mount, error) { return []Mount{m}, nil } + return getCgroupMountsV1(all) +} + +func getCgroupMountsV1(all bool) ([]Mount, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { return nil, err From 142d0f2d5d75a2848beefb373708dab78f6d9329 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 14 May 2020 19:38:11 -0700 Subject: [PATCH 03/12] libct/cgroups/utils: make FindCgroupMountpoint* v1-specific It's bad and wrong to use these functions for any cgroupv2 code, and there are no existing users (in runc, at least). Make them return an error in such case. Also, remove the cgroupv2-specific handling from findCgroupMountpointAndRootFromReader(). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index cb3e9a12..8f77522e 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -54,13 +54,17 @@ func IsCgroup2UnifiedMode() bool { // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { if IsCgroup2UnifiedMode() { - return unifiedMountpoint, nil + return "", errUnified } mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) return mnt, err } func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { + if IsCgroup2UnifiedMode() { + return "", "", errUnified + } + // We are not using mount.GetMounts() because it's super-inefficient, // parsing it directly sped up x10 times because of not using Sscanf. // It was one of two major performance drawbacks in container start. @@ -74,10 +78,6 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, } defer f.Close() - if IsCgroup2UnifiedMode() { - subsystem = "" - } - return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) } @@ -91,7 +91,7 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst } if strings.HasPrefix(fields[4], cgroupPath) { for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem { + if opt == subsystem { return fields[4], fields[3], nil } } From 52b56bc28eb74443d2a249eb7d706759067abd12 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 12:03:16 -0700 Subject: [PATCH 04/12] libc/criuSwrk: remove applyCgroups param Its value can be easily deduced from the request type. While at it, simplify the call logic. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 2c7420ef..ca1d439d 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -703,7 +703,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc. Features: criuFeat, } - err := c.criuSwrk(nil, req, criuOpts, false, nil) + err := c.criuSwrk(nil, req, criuOpts, nil) if err != nil { logrus.Debugf("%s", err) return errors.New("CRIU feature check failed") @@ -1045,7 +1045,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { } } - err = c.criuSwrk(nil, req, criuOpts, false, nil) + err = c.criuSwrk(nil, req, criuOpts, nil) if err != nil { return err } @@ -1339,10 +1339,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) } } - return c.criuSwrk(process, req, criuOpts, true, extraFiles) + return c.criuSwrk(process, req, criuOpts, extraFiles) } func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + // need to apply cgroups only on restore + if req.GetType() != criurpc.CriuReqType_RESTORE { + return nil + } + // XXX: Do we need to deal with this case? AFAIK criu still requires root. if err := c.cgroupManager.Apply(pid); err != nil { return err @@ -1369,7 +1374,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { return nil } -func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error { +func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error { fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) if err != nil { return err @@ -1433,11 +1438,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * } }() - if applyCgroups { - err := c.criuApplyCgroups(criuProcess.Pid, req) - if err != nil { - return err - } + if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil { + return err } var extFds []string From d5c57dcea6d8b19c0f87fc0a212d3020215af06e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 12:22:27 -0700 Subject: [PATCH 05/12] libct/criuApplyCgroups: don't set cgroup paths for v2 There is no need to have cgroupv1-specific controller paths on restore in case of cgroupv2. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index ca1d439d..e958d159 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1357,6 +1357,11 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { return newSystemError(err) } + if cgroups.IsCgroup2UnifiedMode() { + return nil + } + // the stuff below is cgroupv1-specific + path := fmt.Sprintf("/proc/%d/cgroup", pid) cgroupsPaths, err := cgroups.ParseCgroupFile(path) if err != nil { From 5785aabc13b857edcb9c38ee92ccbf3719abdc92 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 12:26:42 -0700 Subject: [PATCH 06/12] libct/cgroups: make isSubsystemAvailable v1-specific This function is only called from cgroupv1 code, so there is no need for it to implement cgroupv2 stuff. Make it v1-specific, and panic if it is called from v2 code (since this is an internal function, the panic would mean incorrect runc code). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils.go | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index 8f77522e..a6b96d9f 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -106,16 +106,7 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst func isSubsystemAvailable(subsystem string) bool { if IsCgroup2UnifiedMode() { - controllers, err := GetAllSubsystems() - if err != nil { - return false - } - for _, c := range controllers { - if c == subsystem { - return true - } - } - return false + panic("don't call isSubsystemAvailable from cgroupv2 code") } cgroups, err := ParseCgroupFile("/proc/self/cgroup") From d244b4058ee81ca43182bffe536d63ec70326904 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 28 May 2020 15:23:18 -0700 Subject: [PATCH 07/12] libct/cgroups: improve ParseCgroupFile docs In particular, state that for cgroup v2 the result is very different. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index a6b96d9f..eba4379b 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -373,8 +373,15 @@ func readProcsFile(file string) ([]int, error) { return out, nil } -// ParseCgroupFile parses the given cgroup file, typically from -// /proc//cgroup, into a map of subgroups to cgroup names. +// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup +// or /proc//cgroup, into a map of subsystems to cgroup paths, e.g. +// "cpu": "/user.slice/user-1000.slice" +// "pids": "/user.slice/user-1000.slice" +// etc. +// +// Note that for cgroup v2 unified hierarchy, there are no per-controller +// cgroup paths, so the resulting map will have a single element where the key +// is empty string ("") and the value is the cgroup path the is in. func ParseCgroupFile(path string) (map[string]string, error) { f, err := os.Open(path) if err != nil { From 7db2d3e146d532f7d9eb4dbc690ca8c07d89749f Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 14:13:25 -0700 Subject: [PATCH 08/12] libcontainer/cgroups: rm FindCgroupMountpointDir This function is cgroupv1-specific, is only used once, and its name is very close to the name of another function, FindCgroupMountpoint. Inline it into the (only) caller. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs/apply_raw.go | 37 +++++++++++++++++++++++++++- libcontainer/cgroups/utils.go | 37 ---------------------------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index 2093e0f8..245ce84f 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -3,9 +3,11 @@ package fs import ( + "bufio" "fmt" "os" "path/filepath" + "strings" "sync" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -88,10 +90,43 @@ func getCgroupRoot() (string, error) { return cgroupRoot, nil } - root, err := cgroups.FindCgroupMountpointDir() + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } + defer f.Close() + + var root string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("mountinfo: found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "cgroup" { + // Check that the mount is properly formatted. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + root = filepath.Dir(fields[4]) + break + } + } + if err := scanner.Err(); err != nil { + return "", err + } + if root == "" { + return "", errors.New("no cgroup mount found in mountinfo") + } if _, err := os.Stat(root); err != nil { return "", err diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index eba4379b..b1171d8d 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -117,43 +117,6 @@ func isSubsystemAvailable(subsystem string) bool { return avail } -func FindCgroupMountpointDir() (string, error) { - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", err - } - defer f.Close() - - scanner := bufio.NewScanner(f) - for scanner.Scan() { - text := scanner.Text() - fields := strings.Split(text, " ") - // Safe as mountinfo encodes mountpoints with spaces as \040. - index := strings.Index(text, " - ") - postSeparatorFields := strings.Fields(text[index+3:]) - numPostFields := len(postSeparatorFields) - - // This is an error as we can't detect if the mount is for "cgroup" - if numPostFields == 0 { - return "", fmt.Errorf("Found no fields post '-' in %q", text) - } - - if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" { - // Check that the mount is properly formatted. - if numPostFields < 3 { - return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) - } - - return filepath.Dir(fields[4]), nil - } - } - if err := scanner.Err(); err != nil { - return "", err - } - - return "", NewNotFoundError("cgroup") -} - type Mount struct { Mountpoint string Root string From 0681d456fc201e3979298e4fddee0b1c2fe1bb97 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 14:49:58 -0700 Subject: [PATCH 09/12] libct/cgroups/utils: move cgroup v1 code to separate file In most project, "utils" is a big mess, and this is not an exception. Try to clean it up a bit by moving cgroup v1 specific code to a separate source file. There are no code changes in this commit, just moving it from one file to another. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/cgroups.go | 24 --- libcontainer/cgroups/utils.go | 211 -------------------------- libcontainer/cgroups/v1_utils.go | 252 +++++++++++++++++++++++++++++++ 3 files changed, 252 insertions(+), 235 deletions(-) create mode 100644 libcontainer/cgroups/v1_utils.go diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go index 071f8f2e..a16a68e9 100644 --- a/libcontainer/cgroups/cgroups.go +++ b/libcontainer/cgroups/cgroups.go @@ -3,8 +3,6 @@ package cgroups import ( - "fmt" - "github.com/opencontainers/runc/libcontainer/configs" ) @@ -51,25 +49,3 @@ type Manager interface { // Whether the cgroup path exists or not Exists() bool } - -type NotFoundError struct { - Subsystem string -} - -func (e *NotFoundError) Error() string { - return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) -} - -func NewNotFoundError(sub string) error { - return &NotFoundError{ - Subsystem: sub, - } -} - -func IsNotFound(err error) bool { - if err == nil { - return false - } - _, ok := err.(*NotFoundError) - return ok -} diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index b1171d8d..b86c4151 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -20,7 +20,6 @@ import ( ) const ( - CgroupNamePrefix = "name=" CgroupProcesses = "cgroup.procs" unifiedMountpoint = "/sys/fs/cgroup" ) @@ -28,8 +27,6 @@ const ( var ( isUnifiedOnce sync.Once isUnified bool - - errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") ) // HugePageSizeUnitList is a list of the units used by the linux kernel when @@ -51,126 +48,12 @@ func IsCgroup2UnifiedMode() bool { return isUnified } -// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt -func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) - return mnt, err -} - -func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { - if IsCgroup2UnifiedMode() { - return "", "", errUnified - } - - // We are not using mount.GetMounts() because it's super-inefficient, - // parsing it directly sped up x10 times because of not using Sscanf. - // It was one of two major performance drawbacks in container start. - if !isSubsystemAvailable(subsystem) { - return "", "", NewNotFoundError(subsystem) - } - - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", "", err - } - defer f.Close() - - return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) -} - -func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { - scanner := bufio.NewScanner(reader) - for scanner.Scan() { - txt := scanner.Text() - fields := strings.Fields(txt) - if len(fields) < 9 { - continue - } - if strings.HasPrefix(fields[4], cgroupPath) { - for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if opt == subsystem { - return fields[4], fields[3], nil - } - } - } - } - if err := scanner.Err(); err != nil { - return "", "", err - } - - return "", "", NewNotFoundError(subsystem) -} - -func isSubsystemAvailable(subsystem string) bool { - if IsCgroup2UnifiedMode() { - panic("don't call isSubsystemAvailable from cgroupv2 code") - } - - cgroups, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return false - } - _, avail := cgroups[subsystem] - return avail -} - type Mount struct { Mountpoint string Root string Subsystems []string } -func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { - if len(m.Subsystems) == 0 { - return "", fmt.Errorf("no subsystem for mount") - } - - return getControllerPath(m.Subsystems[0], cgroups) -} - -func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { - res := make([]Mount, 0, len(ss)) - scanner := bufio.NewScanner(mi) - numFound := 0 - for scanner.Scan() && numFound < len(ss) { - txt := scanner.Text() - sepIdx := strings.Index(txt, " - ") - if sepIdx == -1 { - return nil, fmt.Errorf("invalid mountinfo format") - } - if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { - continue - } - fields := strings.Split(txt, " ") - m := Mount{ - Mountpoint: fields[4], - Root: fields[3], - } - for _, opt := range strings.Split(fields[len(fields)-1], ",") { - seen, known := ss[opt] - if !known || (!all && seen) { - continue - } - ss[opt] = true - if strings.HasPrefix(opt, CgroupNamePrefix) { - opt = opt[len(CgroupNamePrefix):] - } - m.Subsystems = append(m.Subsystems, opt) - numFound++ - } - if len(m.Subsystems) > 0 || all { - res = append(res, m) - } - } - if err := scanner.Err(); err != nil { - return nil, err - } - return res, nil -} - // GetCgroupMounts returns the mounts for the cgroup subsystems. // all indicates whether to return just the first instance or all the mounts. // This function should not be used from cgroupv2 code, as in this case @@ -193,25 +76,6 @@ func GetCgroupMounts(all bool) ([]Mount, error) { return getCgroupMountsV1(all) } -func getCgroupMountsV1(all bool) ([]Mount, error) { - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return nil, err - } - defer f.Close() - - allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return nil, err - } - - allMap := make(map[string]bool) - for s := range allSubsystems { - allMap[s] = false - } - return getCgroupMountsHelper(allMap, f, all) -} - // GetAllSubsystems returns all the cgroup subsystems supported by the kernel func GetAllSubsystems() ([]string, error) { // /proc/cgroups is meaningless for v2 @@ -253,65 +117,6 @@ func GetAllSubsystems() ([]string, error) { return subsystems, nil } -// GetOwnCgroup returns the relative path to the cgroup docker is running in. -func GetOwnCgroup(subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - cgroups, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return "", err - } - - return getControllerPath(subsystem, cgroups) -} - -func GetOwnCgroupPath(subsystem string) (string, error) { - cgroup, err := GetOwnCgroup(subsystem) - if err != nil { - return "", err - } - - return getCgroupPathHelper(subsystem, cgroup) -} - -func GetInitCgroup(subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - cgroups, err := ParseCgroupFile("/proc/1/cgroup") - if err != nil { - return "", err - } - - return getControllerPath(subsystem, cgroups) -} - -func GetInitCgroupPath(subsystem string) (string, error) { - cgroup, err := GetInitCgroup(subsystem) - if err != nil { - return "", err - } - - return getCgroupPathHelper(subsystem, cgroup) -} - -func getCgroupPathHelper(subsystem, cgroup string) (string, error) { - mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) - if err != nil { - return "", err - } - - // This is needed for nested containers, because in /proc/self/cgroup we - // see paths from host, which don't exist in container. - relCgroup, err := filepath.Rel(root, cgroup) - if err != nil { - return "", err - } - - return filepath.Join(mnt, relCgroup), nil -} - func readProcsFile(file string) ([]int, error) { f, err := os.Open(file) if err != nil { @@ -384,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { return cgroups, nil } -func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - - if p, ok := cgroups[subsystem]; ok { - return p, nil - } - - if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { - return p, nil - } - - return "", NewNotFoundError(subsystem) -} - func PathExists(path string) bool { if _, err := os.Stat(path); err != nil { return false diff --git a/libcontainer/cgroups/v1_utils.go b/libcontainer/cgroups/v1_utils.go new file mode 100644 index 00000000..e32c48a5 --- /dev/null +++ b/libcontainer/cgroups/v1_utils.go @@ -0,0 +1,252 @@ +package cgroups + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// Code in this source file are specific to cgroup v1, +// and must not be used from any cgroup v2 code. + +const ( + CgroupNamePrefix = "name=" +) + +var ( + errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") +) + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) + return mnt, err +} + +func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { + if IsCgroup2UnifiedMode() { + return "", "", errUnified + } + + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. + if !isSubsystemAvailable(subsystem) { + return "", "", NewNotFoundError(subsystem) + } + + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) +} + +func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Fields(txt) + if len(fields) < 9 { + continue + } + if strings.HasPrefix(fields[4], cgroupPath) { + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func isSubsystemAvailable(subsystem string) bool { + if IsCgroup2UnifiedMode() { + panic("don't call isSubsystemAvailable from cgroupv2 code") + } + + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return false + } + _, avail := cgroups[subsystem] + return avail +} + +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + scanner := bufio.NewScanner(mi) + numFound := 0 + for scanner.Scan() && numFound < len(ss) { + txt := scanner.Text() + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + return nil, fmt.Errorf("invalid mountinfo format") + } + if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { + continue + } + fields := strings.Split(txt, " ") + m := Mount{ + Mountpoint: fields[4], + Root: fields[3], + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + seen, known := ss[opt] + if !known || (!all && seen) { + continue + } + ss[opt] = true + if strings.HasPrefix(opt, CgroupNamePrefix) { + opt = opt[len(CgroupNamePrefix):] + } + m.Subsystems = append(m.Subsystems, opt) + numFound++ + } + if len(m.Subsystems) > 0 || all { + res = append(res, m) + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +func getCgroupMountsV1(all bool) ([]Mount, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range allSubsystems { + allMap[s] = false + } + return getCgroupMountsHelper(allMap, f, all) +} + +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func GetInitCgroup(subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + cgroups, err := ParseCgroupFile("/proc/1/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetInitCgroupPath(subsystem string) (string, error) { + cgroup, err := GetInitCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see paths from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} From 0626c150c14c3575b44add72c53675c026c1f929 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 May 2020 14:41:26 -0700 Subject: [PATCH 10/12] libct/cgroupv1: fix TestGetCgroupMounts test cases When testing GetCgroupMounts, the map data is supposed to be obtained from /proc/self/cgroup, but since we're mocking things, we provide our own map. Unfortunately, not all controllers existing in mountinfos were listed. Also, "name=systemd" needs special handling, so add it. The controllers added were: * for fedoraMountinfo case: name=systemd * for systemdMountinfo case: name=systemd, net_prio * for bedrockMountinfo case: name=systemd, net_prio, pids Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/utils_test.go | 63 +++++++++++++++++------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/libcontainer/cgroups/utils_test.go b/libcontainer/cgroups/utils_test.go index 4cd213aa..ebc38f25 100644 --- a/libcontainer/cgroups/utils_test.go +++ b/libcontainer/cgroups/utils_test.go @@ -189,46 +189,52 @@ func TestGetCgroupMounts(t *testing.T) { mountInfo: fedoraMountinfo, root: "/", subsystems: map[string]bool{ - "cpuset": false, - "cpu": false, - "cpuacct": false, - "memory": false, - "devices": false, - "freezer": false, - "net_cls": false, - "blkio": false, - "perf_event": false, - "hugetlb": false, + "name=systemd": false, + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + "hugetlb": false, }, }, { mountInfo: systemdMountinfo, root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope", subsystems: map[string]bool{ - "cpuset": false, - "cpu": false, - "cpuacct": false, - "memory": false, - "devices": false, - "freezer": false, - "net_cls": false, - "blkio": false, - "perf_event": false, + "name=systemd": false, + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "net_prio": false, + "blkio": false, + "perf_event": false, }, }, { mountInfo: bedrockMountinfo, root: "/", subsystems: map[string]bool{ - "cpuset": false, - "cpu": false, - "cpuacct": false, - "memory": false, - "devices": false, - "freezer": false, - "net_cls": false, - "blkio": false, - "perf_event": false, + "name=systemd": false, + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "net_prio": false, + "blkio": false, + "perf_event": false, + "pids": false, }, }, } @@ -245,6 +251,7 @@ func TestGetCgroupMounts(t *testing.T) { } } for ss := range td.subsystems { + ss = strings.TrimPrefix(ss, CgroupNamePrefix) m, ok := cgMap[ss] if !ok { t.Fatalf("%s not found", ss) From cec5ae7c2d1dbba7e92fbb87040119f30c3f3bc1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 19 May 2020 15:09:04 -0700 Subject: [PATCH 11/12] libct/cgroupv1/getCgroupMountsHelper: minor nit It is easy to just use TrimPrefix which does nothing in case the prefix does not exist. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/v1_utils.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libcontainer/cgroups/v1_utils.go b/libcontainer/cgroups/v1_utils.go index e32c48a5..f8487b0a 100644 --- a/libcontainer/cgroups/v1_utils.go +++ b/libcontainer/cgroups/v1_utils.go @@ -141,9 +141,7 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, continue } ss[opt] = true - if strings.HasPrefix(opt, CgroupNamePrefix) { - opt = opt[len(CgroupNamePrefix):] - } + opt = strings.TrimPrefix(opt, CgroupNamePrefix) m.Subsystems = append(m.Subsystems, opt) numFound++ } From 8c5a19f79baa72e70e1fb2ed65ad687fc221cb97 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 20 May 2020 14:54:08 -0700 Subject: [PATCH 12/12] libct/cgroups/fs: rename some files no changes, just a few git renames Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs/{apply_raw.go => fs.go} | 0 libcontainer/cgroups/fs/{apply_raw_test.go => fs_test.go} | 0 libcontainer/cgroups/fs/{fs_unsupported.go => unsupported.go} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename libcontainer/cgroups/fs/{apply_raw.go => fs.go} (100%) rename libcontainer/cgroups/fs/{apply_raw_test.go => fs_test.go} (100%) rename libcontainer/cgroups/fs/{fs_unsupported.go => unsupported.go} (100%) diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/fs.go similarity index 100% rename from libcontainer/cgroups/fs/apply_raw.go rename to libcontainer/cgroups/fs/fs.go diff --git a/libcontainer/cgroups/fs/apply_raw_test.go b/libcontainer/cgroups/fs/fs_test.go similarity index 100% rename from libcontainer/cgroups/fs/apply_raw_test.go rename to libcontainer/cgroups/fs/fs_test.go diff --git a/libcontainer/cgroups/fs/fs_unsupported.go b/libcontainer/cgroups/fs/unsupported.go similarity index 100% rename from libcontainer/cgroups/fs/fs_unsupported.go rename to libcontainer/cgroups/fs/unsupported.go