1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
7 #include "sandbox/linux/suid/common/sandbox.h"
10 #include <asm/unistd.h>
22 #include <sys/prctl.h>
23 #include <sys/resource.h>
24 #include <sys/socket.h>
27 #include <sys/types.h>
32 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h"
33 #include "sandbox/linux/suid/linux_util.h"
34 #include "sandbox/linux/suid/process_util.h"
36 #if !defined(CLONE_NEWPID)
37 #define CLONE_NEWPID 0x20000000
39 #if !defined(CLONE_NEWNET)
40 #define CLONE_NEWNET 0x40000000
43 static bool DropRoot();
45 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
47 static void FatalError(const char* msg, ...)
48 __attribute__((noreturn, format(printf, 1, 2)));
50 static void FatalError(const char* msg, ...) {
54 vfprintf(stderr, msg, ap);
55 fprintf(stderr, ": %s\n", strerror(errno));
61 static void ExitWithErrorSignalHandler(int signal) {
62 const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n";
63 if (-1 == write(2, msg, sizeof(msg) - 1)) {
70 // We will chroot() to the helper's /proc/self directory. Anything there will
71 // not exist anymore if we make sure to wait() for the helper.
73 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
74 // even if the helper survives as a zombie.
76 // There is very little reason to use fdinfo/ instead of fd/ but we are
77 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
78 #define SAFE_DIR "/proc/self/fdinfo"
79 #define SAFE_DIR2 "/proc/self/fd"
81 static bool SpawnChrootHelper() {
83 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
89 struct stat sdir_stat;
90 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
92 } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
95 fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
99 const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
109 // We share our files structure with an untrusted process. As a security in
110 // depth measure, we make sure that we can't open anything by mistake.
111 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
113 const struct rlimit nofile = {0, 0};
114 if (setrlimit(RLIMIT_NOFILE, &nofile))
115 FatalError("Setting RLIMIT_NOFILE");
124 bytes = read(sv[0], &msg, 1);
125 } while (bytes == -1 && errno == EINTR);
133 if (msg != kMsgChrootMe)
134 FatalError("Unknown message from sandboxed process");
138 FatalError("Cannot chdir into /proc/ directory");
141 FatalError("Cannot chroot into /proc/ directory");
144 FatalError("Cannot chdir to / after chroot");
146 const char reply = kMsgChrootSuccessful;
148 bytes = write(sv[0], &reply, 1);
149 } while (bytes == -1 && errno == EINTR);
152 FatalError("Writing reply");
155 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
156 // are chrooted there.
157 // Our (unprivileged) parent should not even be able to open "." or "/"
158 // since they would need to pass the ptrace() check. If our parent wait()
159 // for us, our root directory will completely disappear.
168 // In the parent process, we install an environment variable containing the
169 // number of the file descriptor.
171 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
172 if (printed < 0 || printed >= (int)sizeof(desc_str)) {
173 fprintf(stderr, "Failed to snprintf\n");
177 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
183 // We also install an environment variable containing the pid of the child
184 char helper_pid_str[64];
185 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
186 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
187 fprintf(stderr, "Failed to snprintf\n");
191 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
200 // Block until child_pid exits, then exit. Try to preserve the exit code.
201 static void WaitForChildAndExit(pid_t child_pid) {
203 siginfo_t reaped_child_info;
205 // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager
206 // when things are hanging.
207 // Here, the current process is going to waitid() and _exit(), so there is no
208 // point in generating a crash report. The child process is the one
210 if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) {
211 FatalError("Failed to change signal handler");
215 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
217 if (!wait_ret && reaped_child_info.si_pid == child_pid) {
218 if (reaped_child_info.si_code == CLD_EXITED) {
219 exit_code = reaped_child_info.si_status;
221 // Exit with code 0 if the child got signaled.
228 static bool MoveToNewNamespaces() {
229 // These are the sets of flags which we'll try, in order.
230 const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, };
232 // We need to close kZygoteIdFd before the child can continue. We use this
233 // socketpair to tell the child when to continue;
235 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
236 FatalError("Failed to create a socketpair");
239 for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
241 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
242 const int clone_errno = errno;
246 FatalError("Could not drop privileges");
248 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
249 FatalError("Could not close socketpair");
250 // The kZygoteIdFd needs to be closed in the parent before
251 // Zygote gets started.
252 if (close(kZygoteIdFd))
254 // Tell our child to continue
255 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
257 if (close(sync_fds[1]))
259 // We want to keep a full process tree and we don't want our childs to
260 // be reparented to (the outer PID namespace) init. So we wait for it.
261 WaitForChildAndExit(pid);
264 FatalError("Not reached");
268 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
269 FatalError("Could not close socketpair");
271 // Wait for the parent to confirm it closed kZygoteIdFd before we
273 char should_continue;
274 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
275 FatalError("Read on socketpair");
276 if (close(sync_fds[0]))
279 if (kCloneExtraFlags[i] & CLONE_NEWPID) {
280 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
282 unsetenv(kSandboxPIDNSEnvironmentVarName);
285 if (kCloneExtraFlags[i] & CLONE_NEWNET) {
286 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
288 unsetenv(kSandboxNETNSEnvironmentVarName);
294 // If EINVAL then the system doesn't support the requested flags, so
295 // continue to try a different set.
296 // On any other errno value the system *does* support these flags but
297 // something went wrong, hence we bail with an error message rather then
298 // provide less security.
299 if (errno != EINVAL) {
300 fprintf(stderr, "Failed to move to new namespace:");
301 if (kCloneExtraFlags[i] & CLONE_NEWPID) {
302 fprintf(stderr, " PID namespaces supported,");
304 if (kCloneExtraFlags[i] & CLONE_NEWNET) {
305 fprintf(stderr, " Network namespace supported,");
307 fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno));
312 // If the system doesn't support NEWPID then we carry on anyway.
316 static bool DropRoot() {
317 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
318 perror("prctl(PR_SET_DUMPABLE)");
322 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
323 perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
327 gid_t rgid, egid, sgid;
328 if (getresgid(&rgid, &egid, &sgid)) {
333 if (setresgid(rgid, rgid, rgid)) {
338 uid_t ruid, euid, suid;
339 if (getresuid(&ruid, &euid, &suid)) {
344 if (setresuid(ruid, ruid, ruid)) {
352 static bool SetupChildEnvironment() {
355 // ld.so may have cleared several environment variables because we are SUID.
356 // However, the child process might need them so zygote_host_linux.cc saves a
357 // copy in SANDBOX_$x. This is safe because we have dropped root by this
358 // point, so we can only exec a binary with the permissions of the user who
359 // ran us in the first place.
361 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
362 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
363 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
367 const char* const value = getenv(saved_envvar);
369 setenv(envvar, value, 1 /* overwrite */);
370 unsetenv(saved_envvar);
379 bool CheckAndExportApiVersion() {
380 // Check the environment to see if a specific API version was requested.
381 // assume version 0 if none.
382 long api_number = -1;
383 char* api_string = getenv(kSandboxEnvironmentApiRequest);
389 api_number = strtol(api_string, &endptr, 10);
390 if (!endptr || *endptr || errno != 0)
394 // Warn only for now.
395 if (api_number != kSUIDSandboxApiNumber) {
398 "The setuid sandbox provides API version %ld, "
401 "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
403 kSUIDSandboxApiNumber,
407 // Export our version so that the sandboxed process can verify it did not
408 // use an old sandbox.
409 char version_string[64];
411 version_string, sizeof(version_string), "%ld", kSUIDSandboxApiNumber);
412 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
420 int main(int argc, char** argv) {
426 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
430 // Allow someone to query our API version
431 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
432 printf("%ld\n", kSUIDSandboxApiNumber);
436 // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces()
437 // below, then the zygote and all the renderers are in an alternate PID
438 // namespace and do not know their real PIDs. As such, they report the wrong
439 // PIDs to the task manager.
441 // To fix this, when the zygote spawns a new renderer, it gives the renderer
442 // a dummy socket, which has a unique inode number. Then it asks the sandbox
443 // host to find the PID of the process holding that fd by searching /proc.
445 // Since the zygote and renderers are all spawned by this setuid executable,
446 // their entries in /proc are owned by root and only readable by root. In
447 // order to search /proc for the fd we want, this setuid executable has to
448 // double as a helper and perform the search. The code block below does this
449 // when you call it with --find-inode INODE_NUMBER.
450 if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) {
454 ino_t inode = strtoull(argv[2], &endptr, 10);
455 if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0)
457 if (!FindProcessHoldingSocket(&pid, inode))
462 // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers
463 // because those files are owned by root. So we need another helper here.
464 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
468 unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
469 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
474 score = strtol(argv[3], &endptr, 10);
475 if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr ||
479 return AdjustOOMScore(pid, score);
482 // Protect the core setuid sandbox functionality with an API version
483 if (!CheckAndExportApiVersion()) {
487 if (geteuid() != 0) {
489 "The setuid sandbox is not running as root. Common causes:\n"
490 " * An unprivileged process using ptrace on it, like a debugger.\n"
491 " * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n");
494 if (!MoveToNewNamespaces())
496 if (!SpawnChrootHelper())
500 if (!SetupChildEnvironment())
503 execv(argv[1], &argv[1]);
504 FatalError("execv failed");