nsenter: improve debug logging

In order to make 'runc --debug' actually useful for debugging nsexec bugs, provide information about all the internal operations when in debug mode. [@kolyshkin: rebasing; fix formatting via indent for make validate to pass] Signed-off-by: Aleksa Sarai <asarai@suse.de> Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2025-11-09 13:00:56 +03:00 · 2020-06-08 01:24:30 +10:00
parent bb28c44f12
commit 64bb59f592
4 changed files with 186 additions and 114 deletions
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -35,8 +35,8 @@ const (
 )

 type pid struct {
-	Pid           int `json:"pid"`
-	PidFirstChild int `json:"pid_first"`
+	Pid           int `json:"stage2_pid"`
+	PidFirstChild int `json:"stage1_pid"`
 }

 // network is an internal struct used to setup container networks.
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -36,7 +36,7 @@ enum sync_t {
 	SYNC_RECVPID_PLS = 0x42,	/* Tell parent we're sending the PID. */
 	SYNC_RECVPID_ACK = 0x43,	/* PID was correctly received by parent. */
 	SYNC_GRANDCHILD = 0x44,	/* The grandchild is ready to run. */
-	SYNC_CHILD_READY = 0x45,	/* The child or grandchild is ready to return. */
+	SYNC_CHILD_FINISH = 0x45,	/* The child or grandchild has finished. */
 };

 /*
@@ -45,10 +45,14 @@ enum sync_t {
 */
 #define CREATECGROUPNS 0x80

+#define STAGE_SETUP  -1
 /* longjmp() arguments. */
-#define JUMP_PARENT 0x00
-#define JUMP_CHILD  0xA0
-#define JUMP_INIT   0xA1
+#define STAGE_PARENT  0
+#define STAGE_CHILD   1
+#define STAGE_INIT    2
+
+/* Stores the current stage of nsexec. */
+int current_stage = STAGE_SETUP;

 /* Assume the stack grows down, so arguments should be above it. */
 struct clone_t {
@@ -134,33 +138,41 @@ int setns(int fd, int nstype)
 }
 #endif

-static void write_log_with_info(const char *level, const char *function, int line, const char *format, ...)
+static void write_log(const char *level, const char *format, ...)
 {
-	char message[1024] = { };
-
+	char *message = NULL, *stage = NULL;
 	va_list args;
+	int ret;

 	if (logfd < 0 || level == NULL)
-		return;
+		goto out;

 	va_start(args, format);
-	if (vsnprintf(message, sizeof(message), format, args) < 0)
-		goto done;
-
-	dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s:%d %s\"}\n", level, function, line, message);
-done:
+	ret = vasprintf(&message, format, args);
 	va_end(args);
-}
+	if (ret < 0)
+		goto out;

-#define write_log(level, fmt, ...) \
-	write_log_with_info((level), __FUNCTION__, __LINE__, (fmt), ##__VA_ARGS__)
+	if (current_stage == STAGE_SETUP)
+		stage = strdup("nsexec");
+	else
+		ret = asprintf(&stage, "nsexec-%d", current_stage);
+	if (ret < 0)
+		goto out;
+
+	dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s[%d]: %s\"}\n", level, stage, getpid(), message);
+
+out:
+	free(message);
+	free(stage);
+}

 /* XXX: This is ugly. */
 static int syncfd = -1;

 #define bail(fmt, ...)                                       \
 	do {                                                       \
-		write_log(FATAL, "nsenter: " fmt ": %m", ##__VA_ARGS__); \
+		write_log(FATAL, fmt ": %m", ##__VA_ARGS__); \
 		exit(1);                                                 \
 	} while(0)

@@ -297,9 +309,11 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
 	if (map == NULL || map_len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/%d/uid_map to '%s'", pid, map);
 	if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
 		if (errno != EPERM)
 			bail("failed to update /proc/%d/uid_map", pid);
+		write_log(DEBUG, "update /proc/%d/uid_map got -EPERM (trying %s)", pid, path);
 		if (try_mapping_tool(path, pid, map, map_len))
 			bail("failed to use newuid map on %d", pid);
 	}
@@ -310,9 +324,11 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
 	if (map == NULL || map_len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/%d/gid_map to '%s'", pid, map);
 	if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
 		if (errno != EPERM)
 			bail("failed to update /proc/%d/gid_map", pid);
+		write_log(DEBUG, "update /proc/%d/gid_map got -EPERM (trying %s)", pid, path);
 		if (try_mapping_tool(path, pid, map, map_len))
 			bail("failed to use newgid map on %d", pid);
 	}
@@ -323,6 +339,7 @@ static void update_oom_score_adj(char *data, size_t len)
 	if (data == NULL || len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/self/oom_score_adj to '%s'", data);
 	if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
 		bail("failed to update /proc/self/oom_score_adj");
 }
@@ -507,7 +524,6 @@ void join_namespaces(char *nslist)
 	char *namespace = strtok_r(nslist, ",", &saveptr);
 	struct namespace_t {
 		int fd;
-		int ns;
 		char type[PATH_MAX];
 		char path[PATH_MAX];
 	} *namespaces = NULL;
@@ -542,7 +558,7 @@ void join_namespaces(char *nslist)
 			bail("failed to open %s", path);

 		ns->fd = fd;
-		ns->ns = nsflag(namespace);
+		strncpy(ns->type, namespace, PATH_MAX - 1);
 		strncpy(ns->path, path, PATH_MAX - 1);
 		ns->path[PATH_MAX - 1] = '\0';
 	} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
@@ -555,12 +571,14 @@ void join_namespaces(char *nslist)
 	 */

 	for (i = 0; i < num; i++) {
-		struct namespace_t ns = namespaces[i];
+		struct namespace_t *ns = &namespaces[i];
+		int flag = nsflag(ns->type);

-		if (setns(ns.fd, ns.ns) < 0)
-			bail("failed to setns to %s", ns.path);
+		write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path);
+		if (setns(ns->fd, flag) < 0)
+			bail("failed to setns into %s namespace", ns->type);

-		close(ns.fd);
+		close(ns->fd);
 	}

 	free(namespaces);
@@ -569,6 +587,14 @@ void join_namespaces(char *nslist)
 /* Defined in cloned_binary.c. */
 extern int ensure_cloned_binary(void);

+static inline int sane_kill(pid_t pid, int signum)
+{
+	if (pid > 0)
+		return kill(pid, signum);
+	else
+		return 0;
+}
+
 void nsexec(void)
 {
 	int pipenum;
@@ -605,7 +631,7 @@ void nsexec(void)
 	if (write(pipenum, "", 1) != 1)
 		bail("could not inform the parent we are past initial setup");

-	write_log(DEBUG, "nsexec started");
+	write_log(DEBUG, "=> nsexec container setup");

 	/* Parse all of the netlink configuration. */
 	nl_parse(pipenum, &config);
@@ -629,6 +655,7 @@ void nsexec(void)
 	 * containers), which is the recommendation from the kernel folks.
 	 */
 	if (config.namespaces) {
+		write_log(DEBUG, "set process as non-dumpable");
 		if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
 			bail("failed to set process as non-dumpable");
 	}
@@ -693,45 +720,49 @@ void nsexec(void)
 	 * -- Aleksa "what has my life come to?" Sarai
 	 */

-	switch (setjmp(env)) {
+	current_stage = setjmp(env);
+	switch (current_stage) {
 		/*
 		 * Stage 0: We're in the parent. Our job is just to create a new child
-		 *          (stage 1: JUMP_CHILD) process and write its uid_map and
+		 *          (stage 1: STAGE_CHILD) process and write its uid_map and
 		 *          gid_map. That process will go on to create a new process, then
 		 *          it will send us its PID which we will send to the bootstrap
 		 *          process.
 		 */
-	case JUMP_PARENT:{
+	case STAGE_PARENT:{
 			int len;
-			pid_t child, first_child = -1;
-			bool ready = false;
+			pid_t stage1_pid = -1, stage2_pid = -1;
+			bool stage1_complete, stage2_complete;

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-0");

 			/* Start the process of getting a container. */
-			child = clone_parent(&env, JUMP_CHILD);
-			if (child < 0)
-				bail("unable to fork: child_func");
+			write_log(DEBUG, "spawn stage-1");
+			stage1_pid = clone_parent(&env, STAGE_CHILD);
+			if (stage1_pid < 0)
+				bail("unable to spawn stage-1");

-			/*
-			 * State machine for synchronisation with the children.
-			 *
-			 * Father only return when both child and grandchild are
-			 * ready, so we can receive all possible error codes
-			 * generated by children.
-			 */
 			syncfd = sync_child_pipe[1];
 			close(sync_child_pipe[0]);

-			while (!ready) {
+			/*
+			 * State machine for synchronisation with the children. We only
+			 * return once both the child and grandchild are ready.
+			 */
+			write_log(DEBUG, "-> stage-1 synchronisation loop");
+			stage1_complete = false;
+			while (!stage1_complete) {
 				enum sync_t s;

 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
-					bail("failed to sync with child: next state");
+					bail("failed to sync with stage-1: next state");

 				switch (s) {
 				case SYNC_USERMAP_PLS:
+					write_log(DEBUG, "stage-1 requested userns mappings");
+
 					/*
 					 * Enable setgroups(2) if we've been asked to. But we also
 					 * have to explicitly disable setgroups(2) if we're
@@ -742,70 +773,78 @@ void nsexec(void)
 					 * For rootless multi-entry mapping, config.is_setgroup shall be true and
 					 * newuidmap/newgidmap shall be used.
 					 */
-
 					if (config.is_rootless_euid && !config.is_setgroup)
-						update_setgroups(child, SETGROUPS_DENY);
+						update_setgroups(stage1_pid, SETGROUPS_DENY);

 					/* Set up mappings. */
-					update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
-					update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
+					update_uidmap(config.uidmappath, stage1_pid, config.uidmap, config.uidmap_len);
+					update_gidmap(config.gidmappath, stage1_pid, config.gidmap, config.gidmap_len);

 					s = SYNC_USERMAP_ACK;
 					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-						kill(child, SIGKILL);
-						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: write(SYNC_USERMAP_ACK)");
 					}
 					break;
 				case SYNC_RECVPID_PLS:
-					first_child = child;
+					write_log(DEBUG, "stage-1 requested pid to be forwarded");

-					/* Get the init_func pid. */
-					if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
-						kill(first_child, SIGKILL);
-						bail("failed to sync with child: read(childpid)");
+					/* Get the stage-2 pid. */
+					if (read(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: read(stage2_pid)");
 					}

 					/* Send ACK. */
 					s = SYNC_RECVPID_ACK;
 					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-						kill(first_child, SIGKILL);
-						kill(child, SIGKILL);
-						bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: write(SYNC_RECVPID_ACK)");
 					}

-					/* Send the init_func pid back to our parent.
-					 *
-					 * Send the init_func pid and the pid of the first child back to our parent.
-					 * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
-					 * It becomes the responsibility of our parent to reap the first child.
+					/*
+					 * Send both the stage-1 and stage-2 pids back to runc.
+					 * runc needs the stage-2 to continue process management,
+					 * but because stage-1 was spawned with CLONE_PARENT we
+					 * cannot reap it within stage-0 and thus we need to ask
+					 * runc to reap the zombie for us.
 					 */
-					len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n",
-						      child, first_child);
+					write_log(DEBUG, "forward stage-1 (%d) and stage-2 (%d) pids to runc",
+						  stage1_pid, stage2_pid);
+					len =
+					    dprintf(pipenum, "{\"stage1_pid\":%d,\"stage2_pid\":%d}\n", stage1_pid,
+						    stage2_pid);
 					if (len < 0) {
-						kill(child, SIGKILL);
-						bail("unable to generate JSON for child pid");
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with runc: write(pid-JSON)");
 					}
 					break;
-				case SYNC_CHILD_READY:
-					ready = true;
+				case SYNC_CHILD_FINISH:
+					write_log(DEBUG, "stage-1 complete");
+					stage1_complete = true;
 					break;
 				default:
 					bail("unexpected sync value: %u", s);
 				}
 			}
+			write_log(DEBUG, "<- stage-1 synchronisation loop");

 			/* Now sync with grandchild. */
-
 			syncfd = sync_grandchild_pipe[1];
 			close(sync_grandchild_pipe[0]);
-
-			ready = false;
-			while (!ready) {
+			write_log(DEBUG, "-> stage-2 synchronisation loop");
+			stage2_complete = false;
+			while (!stage2_complete) {
 				enum sync_t s;

+				write_log(DEBUG, "signalling stage-2 to run");
 				s = SYNC_GRANDCHILD;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-					kill(child, SIGKILL);
+					sane_kill(stage2_pid, SIGKILL);
 					bail("failed to sync with child: write(SYNC_GRANDCHILD)");
 				}

@@ -813,27 +852,31 @@ void nsexec(void)
 					bail("failed to sync with child: next state");

 				switch (s) {
-				case SYNC_CHILD_READY:
-					ready = true;
+				case SYNC_CHILD_FINISH:
+					write_log(DEBUG, "stage-2 complete");
+					stage2_complete = true;
 					break;
 				default:
 					bail("unexpected sync value: %u", s);
 				}
 			}
+			write_log(DEBUG, "<- stage-2 synchronisation loop");
+			write_log(DEBUG, "<~ nsexec stage-0");
 			exit(0);
 		}
+		break;

 		/*
 		 * Stage 1: We're in the first child process. Our job is to join any
-		 *          provided namespaces in the netlink payload and unshare all
-		 *          of the requested namespaces. If we've been asked to
-		 *          CLONE_NEWUSER, we will ask our parent (stage 0) to set up
-		 *          our user mappings for us. Then, we create a new child
-		 *          (stage 2: JUMP_INIT) for PID namespace. We then send the
-		 *          child's PID to our parent (stage 0).
+		 *          provided namespaces in the netlink payload and unshare all of
+		 *          the requested namespaces. If we've been asked to CLONE_NEWUSER,
+		 *          we will ask our parent (stage 0) to set up our user mappings
+		 *          for us. Then, we create a new child (stage 2: STAGE_INIT) for
+		 *          PID namespace. We then send the child's PID to our parent
+		 *          (stage 0).
 		 */
-	case JUMP_CHILD:{
-			pid_t child;
+	case STAGE_CHILD:{
+			pid_t stage2_pid = -1;
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
@@ -842,11 +885,12 @@ void nsexec(void)

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-1");

 			/*
 			 * We need to setns first. We cannot do this earlier (in stage 0)
 			 * because of the fact that we forked to get here (the PID of
-			 * [stage 2: JUMP_INIT]) would be meaningless). We could send it
+			 * [stage 2: STAGE_INIT]) would be meaningless). We could send it
 			 * using cmsg(3) but that's just annoying.
 			 */
 			if (config.namespaces)
@@ -872,40 +916,50 @@ void nsexec(void)
 			 * problem.
 			 */
 			if (config.cloneflags & CLONE_NEWUSER) {
+				write_log(DEBUG, "unshare user namespace");
 				if (unshare(CLONE_NEWUSER) < 0)
 					bail("failed to unshare user namespace");
 				config.cloneflags &= ~CLONE_NEWUSER;

 				/*
-				 * We don't have the privileges to do any mapping here (see the
-				 * clone_parent rant). So signal our parent to hook us up.
+				 * We need to set ourselves as dumpable temporarily so that the
+				 * parent process can write to our procfs files.
 				 */
-
-				/* Switching is only necessary if we joined namespaces. */
 				if (config.namespaces) {
+					write_log(DEBUG, "temporarily set process as dumpable");
 					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
-						bail("failed to set process as dumpable");
+						bail("failed to temporarily set process as dumpable");
 				}
+
+				/*
+				 * We don't have the privileges to do any mapping here (see the
+				 * clone_parent rant). So signal stage-0 to do the mapping for
+				 * us.
+				 */
+				write_log(DEBUG, "request stage-0 to map user namespace");
 				s = SYNC_USERMAP_PLS;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");

 				/* ... wait for mapping ... */
-
+				write_log(DEBUG, "request stage-0 to map user namespace");
 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
 				if (s != SYNC_USERMAP_ACK)
 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
-				/* Switching is only necessary if we joined namespaces. */
+
+				/* Revert temporary re-dumpable setting. */
 				if (config.namespaces) {
+					write_log(DEBUG, "re-set process as non-dumpable");
 					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
-						bail("failed to set process as dumpable");
+						bail("failed to re-set process as non-dumpable");
 				}

 				/* Become root in the namespace proper. */
 				if (setresuid(0, 0, 0) < 0)
 					bail("failed to become root in user namespace");
 			}
+
 			/*
 			 * Unshare all of the namespaces. Now, it should be noted that this
 			 * ordering might break in the future (especially with rootless
@@ -916,8 +970,9 @@ void nsexec(void)
 			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
 			 * was broken, so we'll just do it the long way anyway.
 			 */
+			write_log(DEBUG, "unshare remaining namespace (except cgroupns)");
 			if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
-				bail("failed to unshare namespaces");
+				bail("failed to unshare remaining namespaces (except cgroupns)");

 			/*
 			 * TODO: What about non-namespace clone flags that we're dropping here?
@@ -928,41 +983,45 @@ void nsexec(void)
 			 * which would break many applications and libraries, so we must fork
 			 * to actually enter the new PID namespace.
 			 */
-			child = clone_parent(&env, JUMP_INIT);
-			if (child < 0)
-				bail("unable to fork: init_func");
+			write_log(DEBUG, "spawn stage-2");
+			stage2_pid = clone_parent(&env, STAGE_INIT);
+			if (stage2_pid < 0)
+				bail("unable to spawn stage-2");

 			/* Send the child to our parent, which knows what it's doing. */
+			write_log(DEBUG, "request stage-0 to forward stage-2 pid (%d)", stage2_pid);
 			s = SYNC_RECVPID_PLS;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
 			}
-			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
-				kill(child, SIGKILL);
-				bail("failed to sync with parent: write(childpid)");
+			if (write(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
+				sane_kill(stage2_pid, SIGKILL);
+				bail("failed to sync with parent: write(stage2_pid)");
 			}

 			/* ... wait for parent to get the pid ... */
-
 			if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
 			}
 			if (s != SYNC_RECVPID_ACK) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
 			}

-			s = SYNC_CHILD_READY;
+			write_log(DEBUG, "signal completion to stage-0");
+			s = SYNC_CHILD_FINISH;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
-				bail("failed to sync with parent: write(SYNC_CHILD_READY)");
+				sane_kill(stage2_pid, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_CHILD_FINISH)");
 			}

-			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
+			/* Our work is done. [Stage 2: STAGE_INIT] is doing the rest of the work. */
+			write_log(DEBUG, "<~ nsexec stage-1");
 			exit(0);
 		}
+		break;

 		/*
 		 * Stage 2: We're the final child process, and the only process that will
@@ -970,7 +1029,7 @@ void nsexec(void)
 		 *          final cleanup steps and then return to the Go runtime to allow
 		 *          init_linux.go to run.
 		 */
-	case JUMP_INIT:{
+	case STAGE_INIT:{
 			/*
 			 * We're inside the child now, having jumped from the
 			 * start_child() code after forking in the parent.
@@ -985,6 +1044,7 @@ void nsexec(void)

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-2");

 			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
@@ -1005,21 +1065,30 @@ void nsexec(void)
 					bail("setgroups failed");
 			}

-			/* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
+			/*
+			 * Wait until our topmost parent has finished cgroup setup in
+			 * p.manager.Apply().
+			 *
+			 * TODO(cyphar): Check if this code is actually needed because we
+			 *               should be in the cgroup even from stage-0, so
+			 *               waiting until now might not make sense.
+			 */
 			if (config.cloneflags & CLONE_NEWCGROUP) {
 				uint8_t value;
 				if (read(pipenum, &value, sizeof(value)) != sizeof(value))
 					bail("read synchronisation value failed");
 				if (value == CREATECGROUPNS) {
+					write_log(DEBUG, "unshare cgroup namespace");
 					if (unshare(CLONE_NEWCGROUP) < 0)
 						bail("failed to unshare cgroup namespace");
 				} else
 					bail("received unknown synchronisation value");
 			}

-			s = SYNC_CHILD_READY;
+			write_log(DEBUG, "signal completion to stage-0");
+			s = SYNC_CHILD_FINISH;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
-				bail("failed to sync with patent: write(SYNC_CHILD_READY)");
+				bail("failed to sync with patent: write(SYNC_CHILD_FINISH)");

 			/* Close sync pipes. */
 			close(sync_grandchild_pipe[0]);
@@ -1028,10 +1097,13 @@ void nsexec(void)
 			nl_free(&config);

 			/* Finish executing, let the Go runtime take over. */
+			write_log(DEBUG, "<= nsexec container setup");
+			write_log(DEBUG, "booting up go runtime ...");
 			return;
 		}
+		break;
 	default:
-		bail("unexpected jump value");
+		bail("unknown stage '%d' for jump value", current_stage);
 	}

 	/* Should never be reached. */
--- a/tests/integration/debug.bats
+++ b/tests/integration/debug.bats
@@ -11,7 +11,7 @@ function teardown() {
 }

 function check_debug() {
-	[[ "$*" == *"nsexec started"* ]]
+	[[ "$*" == *"nsexec container setup"* ]]
 	[[ "$*" == *"child process in init()"* ]]
 	[[ "$*" == *"init: closing the pipe to signal completion"* ]]
 }
--- a/tests/integration/exec.bats
+++ b/tests/integration/exec.bats
@@ -138,7 +138,7 @@ function teardown() {
 }

 function check_exec_debug() {
-	[[ "$*" == *"nsexec started"* ]]
+	[[ "$*" == *"nsexec container setup"* ]]
 	[[ "$*" == *"child process in init()"* ]]
 	[[ "$*" == *"setns_init: about to exec"* ]]
 }