1
0
mirror of https://github.com/opencontainers/runc.git synced 2025-04-18 19:44:09 +03:00

Merge pull request #4045 from fuweid/support-pidfd-socket

[feature request] *: introduce pidfd-socket flag
This commit is contained in:
lfbzhm 2023-11-22 09:13:55 +08:00 committed by GitHub
commit 95a93c132c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 371 additions and 5 deletions

1
.gitignore vendored
View File

@ -6,6 +6,7 @@ vendor/pkg
/contrib/cmd/seccompagent/seccompagent
/contrib/cmd/fs-idmap/fs-idmap
/contrib/cmd/memfd-bind/memfd-bind
/contrib/cmd/pidfd-kill/pidfd-kill
man/man8
release
Vagrantfile

View File

@ -71,10 +71,10 @@ runc-bin: runc-dmz
$(GO_BUILD) -o runc .
.PHONY: all
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill
.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind
recvtty sd-helper seccompagent fs-idmap memfd-bind:
.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill
recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill:
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
.PHONY: static
@ -194,6 +194,7 @@ clean:
rm -f contrib/cmd/sd-helper/sd-helper
rm -f contrib/cmd/seccompagent/seccompagent
rm -f contrib/cmd/memfd-bind/memfd-bind
rm -f contrib/cmd/pidfd-kill/pidfd-kill
sudo rm -rf release
rm -rf man/man8

View File

@ -0,0 +1,114 @@
package main
import (
"errors"
"fmt"
"net"
"os"
"os/signal"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/utils"
)
const (
usage = `Open Container Initiative contrib/cmd/pidfd-kill
pidfd-kill is an implementation of a consumer of runC's --pidfd-socket API.
After received SIGTERM, pidfd-kill sends the given signal to init process by
pidfd received from --pidfd-socket.
To use pidfd-kill, just specify a socket path at which you want to receive
pidfd:
$ pidfd-kill [--signal KILL] socket.sock
`
)
func main() {
app := cli.NewApp()
app.Name = "pidfd-kill"
app.Usage = usage
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "signal",
Value: "SIGKILL",
Usage: "Signal to send to the init process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "Path to write the pidfd-kill process ID to",
},
}
app.Action = func(ctx *cli.Context) error {
args := ctx.Args()
if len(args) != 1 {
return errors.New("required a single socket path")
}
socketFile := ctx.Args()[0]
pidFile := ctx.String("pid-file")
if pidFile != "" {
pid := fmt.Sprintf("%d\n", os.Getpid())
if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
return err
}
defer os.Remove(pidFile)
}
sigStr := ctx.String("signal")
if sigStr == "" {
sigStr = "SIGKILL"
}
sig := unix.SignalNum(sigStr)
pidfdFile, err := recvPidfd(socketFile)
if err != nil {
return err
}
defer pidfdFile.Close()
signalCh := make(chan os.Signal, 16)
signal.Notify(signalCh, unix.SIGTERM)
<-signalCh
return unix.PidfdSendSignal(int(pidfdFile.Fd()), sig, nil, 0)
}
if err := app.Run(os.Args); err != nil {
fmt.Fprintln(os.Stderr, "fatal error:", err)
os.Exit(1)
}
}
func recvPidfd(socketFile string) (*os.File, error) {
ln, err := net.Listen("unix", socketFile)
if err != nil {
return nil, err
}
defer ln.Close()
conn, err := ln.Accept()
if err != nil {
return nil, err
}
defer conn.Close()
unixconn, ok := conn.(*net.UnixConn)
if !ok {
return nil, errors.New("failed to cast to unixconn")
}
socket, err := unixconn.File()
if err != nil {
return nil, err
}
defer socket.Close()
return utils.RecvFile(socket)
}

View File

@ -34,6 +34,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",

View File

@ -33,6 +33,10 @@ following will output a list of processes running in the container:
Name: "console-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the exec process",
},
cli.StringFlag{
Name: "cwd",
Usage: "current working directory in the container",
@ -181,6 +185,7 @@ func execProcess(context *cli.Context) (int, error) {
shouldDestroy: false,
container: container,
consoleSocket: context.String("console-socket"),
pidfdSocket: context.String("pidfd-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
action: CT_ACT_RUN,

View File

@ -590,6 +590,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
}
if p.PidfdSocket != nil {
cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
}
if safeExe != nil {
// Due to a Go stdlib bug, we need to add safeExe to the set of
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd

View File

@ -179,6 +179,16 @@ func startInitialization() (retErr error) {
defer consoleSocket.Close()
}
var pidfdSocket *os.File
if envSockFd := os.Getenv("_LIBCONTAINER_PIDFD_SOCK"); envSockFd != "" {
sockFd, err := strconv.Atoi(envSockFd)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD_SOCK: %w", err)
}
pidfdSocket = os.NewFile(uintptr(sockFd), "pidfd-socket")
defer pidfdSocket.Close()
}
// Get mount files (O_PATH).
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
if err != nil {
@ -222,10 +232,10 @@ func startInitialization() (retErr error) {
}
// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
}
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
if err := populateProcessEnvironment(config.Env); err != nil {
return err
}
@ -240,6 +250,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
i := &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
pidfdSocket: pidfdSocket,
config: config,
logFd: logFd,
dmzExe: dmzExe,
@ -249,6 +260,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
i := &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
pidfdSocket: pidfdSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
@ -694,3 +706,20 @@ func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
return nil
}
// setupPidfd opens a process file descriptor of init process, and sends the
// file descriptor back to the socket.
func setupPidfd(socket *os.File, initType string) error {
defer socket.Close()
pidFd, err := unix.PidfdOpen(os.Getpid(), 0)
if err != nil {
return fmt.Errorf("failed to pidfd_open: %w", err)
}
if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil {
unix.Close(pidFd)
return fmt.Errorf("failed to send pidfd on socket: %w", err)
}
return unix.Close(pidFd)
}

View File

@ -77,6 +77,9 @@ type Process struct {
// ConsoleSocket provides the masterfd console.
ConsoleSocket *os.File
// PidfdSocket provides process file descriptor of it own.
PidfdSocket *os.File
// Init specifies whether the process is the first process in the container.
Init bool

View File

@ -22,6 +22,7 @@ import (
type linuxSetnsInit struct {
pipe *syncSocket
consoleSocket *os.File
pidfdSocket *os.File
config *initConfig
logFd int
dmzExe *os.File
@ -56,6 +57,11 @@ func (l *linuxSetnsInit) Init() error {
return err
}
}
if l.pidfdSocket != nil {
if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
return fmt.Errorf("failed to setup pidfd: %w", err)
}
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err

View File

@ -22,6 +22,7 @@ import (
type linuxStandardInit struct {
pipe *syncSocket
consoleSocket *os.File
pidfdSocket *os.File
parentPid int
fifoFd int
logFd int
@ -114,6 +115,12 @@ func (l *linuxStandardInit) Init() error {
}
}
if l.pidfdSocket != nil {
if err := setupPidfd(l.pidfdSocket, "standard"); err != nil {
return fmt.Errorf("failed to setup pidfd: %w", err)
}
}
// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {

4
run.go
View File

@ -35,6 +35,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
},
cli.BoolFlag{
Name: "detach, d",
Usage: "detach from the container's process",

View File

@ -17,6 +17,7 @@ RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty"
SD_HELPER="${INTEGRATION_ROOT}/../../contrib/cmd/sd-helper/sd-helper"
SECCOMP_AGENT="${INTEGRATION_ROOT}/../../contrib/cmd/seccompagent/seccompagent"
FS_IDMAP="${INTEGRATION_ROOT}/../../contrib/cmd/fs-idmap/fs-idmap"
PIDFD_KILL="${INTEGRATION_ROOT}/../../contrib/cmd/pidfd-kill/pidfd-kill"
# Some variables may not always be set. Set those to empty value,
# if unset, to avoid "unbound variable" error.
@ -707,3 +708,44 @@ function requires_idmap_fs() {
esac
# If we have another error, the integration test will fail and report it.
}
# setup_pidfd_kill runs pidfd-kill process in background and receives the
# SIGTERM as signal to send the given signal to init process.
function setup_pidfd_kill() {
local signal=$1
[ ! -v ROOT ] && return 1
local dir="${ROOT}/pidfd"
mkdir "${dir}"
export PIDFD_SOCKET="${dir}/sock"
("${PIDFD_KILL}" --pid-file "${dir}/pid" --signal "${signal}" "${PIDFD_SOCKET}" &) &
# ensure socket is ready
retry 10 1 stat "${PIDFD_SOCKET}"
}
# teardown_pidfd_kill cleanups all the resources related to pidfd-kill.
function teardown_pidfd_kill() {
[ ! -v ROOT ] && return 0
local dir="${ROOT}/pidfd"
if [ -f "${dir}/pid" ]; then
kill -9 "$(cat "${dir}/pid")"
fi
rm -rf "${dir}"
}
# pidfd_kill sends the signal to init process.
function pidfd_kill() {
[ ! -v ROOT ] && return 0
local dir="${ROOT}/pidfd"
if [ -f "${dir}/pid" ]; then
kill "$(cat "${dir}/pid")"
fi
}

View File

@ -0,0 +1,99 @@
#!/usr/bin/env bats
load helpers
function setup() {
requires root
requires_kernel 5.3
setup_busybox
update_config '.process.args = ["/bin/sleep", "1d"]'
}
function teardown() {
teardown_pidfd_kill
teardown_bundle
}
@test "runc create [ --pidfd-socket ] " {
setup_pidfd_kill "SIGTERM"
runc create --console-socket "$CONSOLE_SOCKET" --pidfd-socket "${PIDFD_SOCKET}" test_pidfd
[ "$status" -eq 0 ]
testcontainer test_pidfd created
pidfd_kill
wait_for_container 10 1 test_pidfd stopped
}
@test "runc run [ --pidfd-socket ] " {
setup_pidfd_kill "SIGKILL"
runc run -d --console-socket "$CONSOLE_SOCKET" --pidfd-socket "${PIDFD_SOCKET}" test_pidfd
[ "$status" -eq 0 ]
testcontainer test_pidfd running
pidfd_kill
wait_for_container 10 1 test_pidfd stopped
}
@test "runc exec [ --pidfd-socket ] [cgroups_v1] " {
requires cgroups_v1
set_cgroups_path
runc run -d --console-socket "$CONSOLE_SOCKET" test_pidfd
[ "$status" -eq 0 ]
testcontainer test_pidfd running
# Use sub-cgroup to ensure that exec process has been killed
test_pidfd_cgroup_path=$(get_cgroup_path "pids")
mkdir "${test_pidfd_cgroup_path}/exec_pidfd"
[ "$status" -eq 0 ]
setup_pidfd_kill "SIGKILL"
__runc exec -d --cgroup "pids:exec_pidfd" --pid-file "exec_pid.txt" --pidfd-socket "${PIDFD_SOCKET}" test_pidfd sleep 1d
[ "$status" -eq 0 ]
exec_pid=$(cat exec_pid.txt)
exec_pid_in_cgroup=$(cat "${test_pidfd_cgroup_path}/exec_pidfd/cgroup.procs")
[ "${exec_pid}" -eq "${exec_pid_in_cgroup}" ]
pidfd_kill
# ensure exec process has been reaped
retry 10 1 rmdir "${test_pidfd_cgroup_path}/exec_pidfd"
testcontainer test_pidfd running
}
@test "runc exec [ --pidfd-socket ] [cgroups_v2] " {
requires cgroups_v2
set_cgroups_path
runc run -d --console-socket "$CONSOLE_SOCKET" test_pidfd
[ "$status" -eq 0 ]
testcontainer test_pidfd running
# Use sub-cgroup to ensure that exec process has been killed
test_pidfd_cgroup_path=$(get_cgroup_path "pids")
mkdir "${test_pidfd_cgroup_path}/exec_pidfd"
[ "$status" -eq 0 ]
setup_pidfd_kill "SIGKILL"
__runc exec -d --cgroup "exec_pidfd" --pid-file "exec_pid.txt" --pidfd-socket "${PIDFD_SOCKET}" test_pidfd sleep 1d
[ "$status" -eq 0 ]
exec_pid=$(cat exec_pid.txt)
exec_pid_in_cgroup=$(cat "${test_pidfd_cgroup_path}/exec_pidfd/cgroup.procs")
[ "${exec_pid}" -eq "${exec_pid_in_cgroup}" ]
pidfd_kill
# ensure exec process has been reaped
retry 10 1 rmdir "${test_pidfd_cgroup_path}/exec_pidfd"
testcontainer test_pidfd running
}

View File

@ -18,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runc/libcontainer/system/kernelversion"
"github.com/opencontainers/runc/libcontainer/utils"
)
@ -198,6 +199,7 @@ type runner struct {
preserveFDs int
pidFile string
consoleSocket string
pidfdSocket string
container *libcontainer.Container
action CtAct
notifySocket *notifySocket
@ -254,6 +256,14 @@ func (r *runner) run(config *specs.Process) (int, error) {
}
defer tty.Close()
if r.pidfdSocket != "" {
connClose, err := setupPidfdSocket(process, r.pidfdSocket)
if err != nil {
return -1, err
}
defer connClose()
}
switch r.action {
case CT_ACT_CREATE:
err = r.container.Start(process)
@ -389,6 +399,7 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
listenFDs: listenFDs,
notifySocket: notifySocket,
consoleSocket: context.String("console-socket"),
pidfdSocket: context.String("pidfd-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
preserveFDs: context.Int("preserve-fds"),
@ -398,3 +409,36 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
}
return r.run(spec.Process)
}
func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) {
linux530 := kernelversion.KernelVersion{Kernel: 5, Major: 3}
ok, err := kernelversion.GreaterEqualThan(linux530)
if err != nil {
return nil, err
}
if !ok {
return nil, fmt.Errorf("--pidfd-socket requires >= v5.3 kernel")
}
conn, err := net.Dial("unix", sockpath)
if err != nil {
return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err)
}
uc, ok := conn.(*net.UnixConn)
if !ok {
conn.Close()
return nil, errors.New("failed to cast to UnixConn")
}
socket, err := uc.File()
if err != nil {
conn.Close()
return nil, fmt.Errorf("failed to dup socket: %w", err)
}
process.PidfdSocket = socket
return func() {
conn.Close()
}, nil
}