From d1f332b911a97c91c20c280a0e9757ddba054920 Mon Sep 17 00:00:00 2001 From: Robert Swiecki Date: Tue, 18 May 2021 14:38:01 +0200 Subject: [PATCH] Enable support for clone3() and for CLONE_NEWTIME --- cmdline.cc | 8 +++--- config.proto | 2 +- mnt.cc | 2 +- pid.cc | 2 +- subproc.cc | 71 +++++++++++++++++++++++++++++++++++++--------------- subproc.h | 2 +- 6 files changed, 60 insertions(+), 27 deletions(-) diff --git a/cmdline.cc b/cmdline.cc index 1bb470b..fc04c11 100644 --- a/cmdline.cc +++ b/cmdline.cc @@ -231,7 +231,8 @@ void logParams(nsjconf_t* nsjconf) { "max_conns:%u, max_conns_per_ip:%u, time_limit:%" PRId64 ", personality:%#lx, daemonize:%s, clone_newnet:%s, " "clone_newuser:%s, clone_newns:%s, clone_newpid:%s, clone_newipc:%s, clone_newuts:%s, " - "clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, max_cpus:%zu", + "clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, " + "max_cpus:%zu", nsjconf->hostname.c_str(), nsjconf->chroot.c_str(), nsjconf->exec_file.empty() ? nsjconf->argv[0].c_str() : nsjconf->exec_file.c_str(), nsjconf->bindhost.c_str(), nsjconf->port, nsjconf->max_conns, nsjconf->max_conns_per_ip, @@ -239,8 +240,9 @@ void logParams(nsjconf_t* nsjconf) { logYesNo(nsjconf->clone_newnet), logYesNo(nsjconf->clone_newuser), logYesNo(nsjconf->clone_newns), logYesNo(nsjconf->clone_newpid), logYesNo(nsjconf->clone_newipc), logYesNo(nsjconf->clone_newuts), - logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime), logYesNo(nsjconf->keep_caps), - logYesNo(nsjconf->disable_no_new_privs), nsjconf->max_cpus); + logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime), + logYesNo(nsjconf->keep_caps), logYesNo(nsjconf->disable_no_new_privs), + nsjconf->max_cpus); for (const auto& p : nsjconf->mountpts) { LOG_I( diff --git a/config.proto b/config.proto index 128d383..25d6ee1 100644 --- a/config.proto +++ b/config.proto @@ -177,7 +177,7 @@ message NsJailConfig { optional bool clone_newuts = 52 [default = true]; /* Disable for kernel versions < 4.6 as it's not supported there */ optional bool clone_newcgroup = 53 [default = true]; - /* Supported with kernel versions >= 5.3 */ + /* Supported with kernel versions >= 5.3 */ optional bool clone_newtime = 86 [default = false]; /* Mappings for UIDs and GIDs. See the description for 'msg IdMap' diff --git a/mnt.cc b/mnt.cc index 70440b9..ef2dbd7 100644 --- a/mnt.cc +++ b/mnt.cc @@ -453,7 +453,7 @@ bool initNs(nsjconf_t* nsjconf) { return initNsInternal(nsjconf); } - pid_t pid = subproc::cloneProc(CLONE_FS | SIGCHLD); + pid_t pid = subproc::cloneProc(CLONE_FS, SIGCHLD); if (pid == -1) { return false; } diff --git a/pid.cc b/pid.cc index 593018b..8165c03 100644 --- a/pid.cc +++ b/pid.cc @@ -48,7 +48,7 @@ bool initNs(nsjconf_t* nsjconf) { * first clone/fork will work, and the rest will fail with ENOMEM (see 'man pid_namespaces' * for details on this behavior) */ - pid_t pid = subproc::cloneProc(CLONE_FS); + pid_t pid = subproc::cloneProc(CLONE_FS, 0); if (pid == -1) { PLOG_E("Couldn't create a dummy init process"); return false; diff --git a/subproc.cc b/subproc.cc index c1d9c41..e573472 100644 --- a/subproc.cc +++ b/subproc.cc @@ -100,18 +100,20 @@ static const std::string cloneFlagsToStr(uintptr_t flags) { NS_VALSTR_STRUCT(CLONE_IO), }; - uintptr_t knownFlagMask = CSIGNAL; + uintptr_t knownFlagMask = 0; for (const auto& i : cloneFlags) { if (flags & i.flag) { - res.append(i.name).append("|"); + if (!res.empty()) { + res.append("|"); + } + res.append(i.name); } knownFlagMask |= i.flag; } if (flags & ~(knownFlagMask)) { - util::StrAppend(&res, "%#tx|", flags & ~(knownFlagMask)); + util::StrAppend(&res, "|%#tx", flags & ~(knownFlagMask)); } - res.append(util::sigName(flags & CSIGNAL).c_str()); return res; } @@ -444,8 +446,8 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err) LOG_F("Launching new process failed"); } - flags |= SIGCHLD; - LOG_D("Creating new process with clone flags:%s", cloneFlagsToStr(flags).c_str()); + LOG_D("Creating new process with clone flags:%s and exit_signal:SIGCHLD", + cloneFlagsToStr(flags).c_str()); int sv[2]; if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sv) == -1) { @@ -455,7 +457,7 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err) int child_fd = sv[0]; int parent_fd = sv[1]; - pid_t pid = cloneProc(flags); + pid_t pid = cloneProc(flags, SIGCHLD); if (pid == 0) { close(parent_fd); subprocNewProc(nsjconf, netfd, fd_in, fd_out, fd_err, child_fd); @@ -464,21 +466,20 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err) } close(child_fd); if (pid == -1) { + auto saved_errno = errno; + PLOG_W("clone(flags=%s) failed", cloneFlagsToStr(flags).c_str()); if (flags & CLONE_NEWCGROUP) { - auto saved_errno = errno; - PLOG_E( + LOG_W( "nsjail tried to use the CLONE_NEWCGROUP clone flag, which is " - "supported under kernel versions >= 4.6 only. Try disabling this flag"); - errno = saved_errno; + "supported under kernel versions >= 4.6 only"); + } else if (flags & CLONE_NEWTIME) { + LOG_W( + "nsjail tried to use the CLONE_NEWTIME clone flag, which is " + "supported under kernel versions >= 5.13 only"); } - PLOG_E( - "clone(flags=%s) failed. You probably need root privileges if your system " - "doesn't support CLONE_NEWUSER. Alternatively, you might want to recompile " - "your kernel with support for namespaces or check the current value of the " - "kernel.unprivileged_userns_clone sysctl", - cloneFlagsToStr(flags).c_str()); close(parent_fd); - return -1; + errno = saved_errno; + return pid; } addProc(nsjconf, pid, netfd); @@ -517,9 +518,39 @@ static int cloneFunc(void* arg __attribute__((unused))) { * update the internal PID/TID caches, what can lead to invalid values being returned by getpid() * or incorrect PID/TIDs used in raise()/abort() functions */ -pid_t cloneProc(uintptr_t flags) { +pid_t cloneProc(uintptr_t flags, int exit_signal) { + exit_signal &= CSIGNAL; + if (flags & CLONE_VM) { LOG_E("Cannot use clone(flags & CLONE_VM)"); + errno = 0; + return -1; + } + +#if defined(__NR_clone3) + struct clone_args ca = { + .flags = (uint64_t)flags, + .pidfd = 0, + .child_tid = 0, + .parent_tid = 0, + .exit_signal = (uint64_t)exit_signal, + .stack = 0, + .stack_size = 0, + .tls = 0, + .set_tid = 0, + .set_tid_size = 0, + .cgroup = 0, + }; + + pid_t ret = util::syscall(__NR_clone3, (uintptr_t)&ca, sizeof(ca)); + if (ret != -1 || errno != ENOSYS) { + return ret; + } +#endif /* defined(__NR_clone3) */ + + if (flags & CLONE_NEWTIME) { + LOG_E("CLONE_NEWTIME was requested but clone3() is not supported"); + errno = 0; return -1; } @@ -532,7 +563,7 @@ pid_t cloneProc(uintptr_t flags) { */ void* stack = &cloneStack[sizeof(cloneStack) / 2]; /* Parent */ - return clone(cloneFunc, stack, flags, NULL, NULL, NULL); + return clone(cloneFunc, stack, flags | exit_signal, NULL, NULL, NULL); } /* Child */ return 0; diff --git a/subproc.h b/subproc.h index 5497abd..d3e1696 100644 --- a/subproc.h +++ b/subproc.h @@ -41,7 +41,7 @@ void killAndReapAll(nsjconf_t* nsjconf); /* Returns the exit code of the first failing subprocess, or 0 if none fail */ int reapProc(nsjconf_t* nsjconf); int systemExe(const std::vector& args, char** env); -pid_t cloneProc(uintptr_t flags); +pid_t cloneProc(uintptr_t flags, int exit_signal); } // namespace subproc -- 2.34.1