Enable support for clone3() and for CLONE_NEWTIME
authorRobert Swiecki <robert@swiecki.net>
Tue, 18 May 2021 12:38:01 +0000 (14:38 +0200)
committerRobert Swiecki <robert@swiecki.net>
Tue, 18 May 2021 12:38:01 +0000 (14:38 +0200)
cmdline.cc
config.proto
mnt.cc
pid.cc
subproc.cc
subproc.h

index 1bb470be76e4882091430ce81496ef1c8fa78eb3..fc04c1150d2efe0b912be69be03fbe63a1c3a42b 100644 (file)
@@ -231,7 +231,8 @@ void logParams(nsjconf_t* nsjconf) {
            "max_conns:%u, max_conns_per_ip:%u, time_limit:%" PRId64
            ", personality:%#lx, daemonize:%s, clone_newnet:%s, "
            "clone_newuser:%s, clone_newns:%s, clone_newpid:%s, clone_newipc:%s, clone_newuts:%s, "
-           "clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, max_cpus:%zu",
+           "clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, "
+           "max_cpus:%zu",
            nsjconf->hostname.c_str(), nsjconf->chroot.c_str(),
            nsjconf->exec_file.empty() ? nsjconf->argv[0].c_str() : nsjconf->exec_file.c_str(),
            nsjconf->bindhost.c_str(), nsjconf->port, nsjconf->max_conns, nsjconf->max_conns_per_ip,
@@ -239,8 +240,9 @@ void logParams(nsjconf_t* nsjconf) {
            logYesNo(nsjconf->clone_newnet), logYesNo(nsjconf->clone_newuser),
            logYesNo(nsjconf->clone_newns), logYesNo(nsjconf->clone_newpid),
            logYesNo(nsjconf->clone_newipc), logYesNo(nsjconf->clone_newuts),
-           logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime), logYesNo(nsjconf->keep_caps),
-           logYesNo(nsjconf->disable_no_new_privs), nsjconf->max_cpus);
+           logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime),
+           logYesNo(nsjconf->keep_caps), logYesNo(nsjconf->disable_no_new_privs),
+           nsjconf->max_cpus);
 
        for (const auto& p : nsjconf->mountpts) {
                LOG_I(
index 128d383ec61b6fee55e65e54958f373fe1d4a9b8..25d6ee17ef45b5b71c828fd40d4fe242d4875e1f 100644 (file)
@@ -177,7 +177,7 @@ message NsJailConfig {
     optional bool clone_newuts = 52 [default = true];
     /* Disable for kernel versions < 4.6 as it's not supported there */
     optional bool clone_newcgroup = 53 [default = true];
-       /* Supported with kernel versions >= 5.3 */
+    /* Supported with kernel versions >= 5.3 */
     optional bool clone_newtime = 86 [default = false];
 
     /* Mappings for UIDs and GIDs. See the description for 'msg IdMap'
diff --git a/mnt.cc b/mnt.cc
index 70440b97a47b93d905153a33c4637d21b38cb782..ef2dbd73f250f7bc9832765f7df0f4fbc5ab2576 100644 (file)
--- a/mnt.cc
+++ b/mnt.cc
@@ -453,7 +453,7 @@ bool initNs(nsjconf_t* nsjconf) {
                return initNsInternal(nsjconf);
        }
 
-       pid_t pid = subproc::cloneProc(CLONE_FS | SIGCHLD);
+       pid_t pid = subproc::cloneProc(CLONE_FS, SIGCHLD);
        if (pid == -1) {
                return false;
        }
diff --git a/pid.cc b/pid.cc
index 593018b0dc28419b18f08f0390cb1a327a641a6c..8165c03e9f7187ee113e1a649dd1d1044e524f44 100644 (file)
--- a/pid.cc
+++ b/pid.cc
@@ -48,7 +48,7 @@ bool initNs(nsjconf_t* nsjconf) {
         * first clone/fork will work, and the rest will fail with ENOMEM (see 'man pid_namespaces'
         * for details on this behavior)
         */
-       pid_t pid = subproc::cloneProc(CLONE_FS);
+       pid_t pid = subproc::cloneProc(CLONE_FS, 0);
        if (pid == -1) {
                PLOG_E("Couldn't create a dummy init process");
                return false;
index c1d9c418fd73c72307f1cf5b6266d9f7cdefc1b4..e573472508bf0d0db4913beec3c87d10a858248f 100644 (file)
@@ -100,18 +100,20 @@ static const std::string cloneFlagsToStr(uintptr_t flags) {
                NS_VALSTR_STRUCT(CLONE_IO),
        };
 
-       uintptr_t knownFlagMask = CSIGNAL;
+       uintptr_t knownFlagMask = 0;
        for (const auto& i : cloneFlags) {
                if (flags & i.flag) {
-                       res.append(i.name).append("|");
+                       if (!res.empty()) {
+                               res.append("|");
+                       }
+                       res.append(i.name);
                }
                knownFlagMask |= i.flag;
        }
 
        if (flags & ~(knownFlagMask)) {
-               util::StrAppend(&res, "%#tx|", flags & ~(knownFlagMask));
+               util::StrAppend(&res, "|%#tx", flags & ~(knownFlagMask));
        }
-       res.append(util::sigName(flags & CSIGNAL).c_str());
        return res;
 }
 
@@ -444,8 +446,8 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
                LOG_F("Launching new process failed");
        }
 
-       flags |= SIGCHLD;
-       LOG_D("Creating new process with clone flags:%s", cloneFlagsToStr(flags).c_str());
+       LOG_D("Creating new process with clone flags:%s and exit_signal:SIGCHLD",
+           cloneFlagsToStr(flags).c_str());
 
        int sv[2];
        if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sv) == -1) {
@@ -455,7 +457,7 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
        int child_fd = sv[0];
        int parent_fd = sv[1];
 
-       pid_t pid = cloneProc(flags);
+       pid_t pid = cloneProc(flags, SIGCHLD);
        if (pid == 0) {
                close(parent_fd);
                subprocNewProc(nsjconf, netfd, fd_in, fd_out, fd_err, child_fd);
@@ -464,21 +466,20 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
        }
        close(child_fd);
        if (pid == -1) {
+               auto saved_errno = errno;
+               PLOG_W("clone(flags=%s) failed", cloneFlagsToStr(flags).c_str());
                if (flags & CLONE_NEWCGROUP) {
-                       auto saved_errno = errno;
-                       PLOG_E(
+                       LOG_W(
                            "nsjail tried to use the CLONE_NEWCGROUP clone flag, which is "
-                           "supported under kernel versions >= 4.6 only. Try disabling this flag");
-                       errno = saved_errno;
+                           "supported under kernel versions >= 4.6 only");
+               } else if (flags & CLONE_NEWTIME) {
+                       LOG_W(
+                           "nsjail tried to use the CLONE_NEWTIME clone flag, which is "
+                           "supported under kernel versions >= 5.13 only");
                }
-               PLOG_E(
-                   "clone(flags=%s) failed. You probably need root privileges if your system "
-                   "doesn't support CLONE_NEWUSER. Alternatively, you might want to recompile "
-                   "your kernel with support for namespaces or check the current value of the "
-                   "kernel.unprivileged_userns_clone sysctl",
-                   cloneFlagsToStr(flags).c_str());
                close(parent_fd);
-               return -1;
+               errno = saved_errno;
+               return pid;
        }
        addProc(nsjconf, pid, netfd);
 
@@ -517,9 +518,39 @@ static int cloneFunc(void* arg __attribute__((unused))) {
  * update the internal PID/TID caches, what can lead to invalid values being returned by getpid()
  * or incorrect PID/TIDs used in raise()/abort() functions
  */
-pid_t cloneProc(uintptr_t flags) {
+pid_t cloneProc(uintptr_t flags, int exit_signal) {
+       exit_signal &= CSIGNAL;
+
        if (flags & CLONE_VM) {
                LOG_E("Cannot use clone(flags & CLONE_VM)");
+               errno = 0;
+               return -1;
+       }
+
+#if defined(__NR_clone3)
+       struct clone_args ca = {
+           .flags = (uint64_t)flags,
+           .pidfd = 0,
+           .child_tid = 0,
+           .parent_tid = 0,
+           .exit_signal = (uint64_t)exit_signal,
+           .stack = 0,
+           .stack_size = 0,
+           .tls = 0,
+           .set_tid = 0,
+           .set_tid_size = 0,
+           .cgroup = 0,
+       };
+
+       pid_t ret = util::syscall(__NR_clone3, (uintptr_t)&ca, sizeof(ca));
+       if (ret != -1 || errno != ENOSYS) {
+               return ret;
+       }
+#endif /* defined(__NR_clone3) */
+
+       if (flags & CLONE_NEWTIME) {
+               LOG_E("CLONE_NEWTIME was requested but clone3() is not supported");
+               errno = 0;
                return -1;
        }
 
@@ -532,7 +563,7 @@ pid_t cloneProc(uintptr_t flags) {
                 */
                void* stack = &cloneStack[sizeof(cloneStack) / 2];
                /* Parent */
-               return clone(cloneFunc, stack, flags, NULL, NULL, NULL);
+               return clone(cloneFunc, stack, flags | exit_signal, NULL, NULL, NULL);
        }
        /* Child */
        return 0;
index 5497abdf87c514d09842c3430af5d781973f8968..d3e1696ead960221a25e83064ca89cd5d135f014 100644 (file)
--- a/subproc.h
+++ b/subproc.h
@@ -41,7 +41,7 @@ void killAndReapAll(nsjconf_t* nsjconf);
 /* Returns the exit code of the first failing subprocess, or 0 if none fail */
 int reapProc(nsjconf_t* nsjconf);
 int systemExe(const std::vector<std::string>& args, char** env);
-pid_t cloneProc(uintptr_t flags);
+pid_t cloneProc(uintptr_t flags, int exit_signal);
 
 }  // namespace subproc