tizen 2.4 release
[external/systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94
95 /* This assumes there is a 'tty' group */
96 #define TTY_MODE 0620
97
98 #define SNDBUF_SIZE (8*1024*1024)
99
100 static int shift_fds(int fds[], unsigned n_fds) {
101         int start, restart_from;
102
103         if (n_fds <= 0)
104                 return 0;
105
106         /* Modifies the fds array! (sorts it) */
107
108         assert(fds);
109
110         start = 0;
111         for (;;) {
112                 int i;
113
114                 restart_from = -1;
115
116                 for (i = start; i < (int) n_fds; i++) {
117                         int nfd;
118
119                         /* Already at right index? */
120                         if (fds[i] == i+3)
121                                 continue;
122
123                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
124                                 return -errno;
125
126                         safe_close(fds[i]);
127                         fds[i] = nfd;
128
129                         /* Hmm, the fd we wanted isn't free? Then
130                          * let's remember that and try again from here*/
131                         if (nfd != i+3 && restart_from < 0)
132                                 restart_from = i;
133                 }
134
135                 if (restart_from < 0)
136                         break;
137
138                 start = restart_from;
139         }
140
141         return 0;
142 }
143
144 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
145         unsigned i;
146         int r;
147
148         if (n_fds <= 0)
149                 return 0;
150
151         assert(fds);
152
153         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154
155         for (i = 0; i < n_fds; i++) {
156
157                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
158                         return r;
159
160                 /* We unconditionally drop FD_CLOEXEC from the fds,
161                  * since after all we want to pass these fds to our
162                  * children */
163
164                 if ((r = fd_cloexec(fds[i], false)) < 0)
165                         return r;
166         }
167
168         return 0;
169 }
170
171 _pure_ static const char *tty_path(const ExecContext *context) {
172         assert(context);
173
174         if (context->tty_path)
175                 return context->tty_path;
176
177         return "/dev/console";
178 }
179
180 static void exec_context_tty_reset(const ExecContext *context) {
181         assert(context);
182
183         if (context->tty_vhangup)
184                 terminal_vhangup(tty_path(context));
185
186         if (context->tty_reset)
187                 reset_terminal(tty_path(context));
188
189         if (context->tty_vt_disallocate && context->tty_path)
190                 vt_disallocate(context->tty_path);
191 }
192
193 static bool is_terminal_output(ExecOutput o) {
194         return
195                 o == EXEC_OUTPUT_TTY ||
196                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
199 }
200
201 static int open_null_as(int flags, int nfd) {
202         int fd, r;
203
204         assert(nfd >= 0);
205
206         fd = open("/dev/null", flags|O_NOCTTY);
207         if (fd < 0)
208                 return -errno;
209
210         if (fd != nfd) {
211                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
212                 safe_close(fd);
213         } else
214                 r = nfd;
215
216         return r;
217 }
218
219 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220         int fd, r;
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225
226         assert(context);
227         assert(output < _EXEC_OUTPUT_MAX);
228         assert(ident);
229         assert(nfd >= 0);
230
231         fd = socket(AF_UNIX, SOCK_STREAM, 0);
232         if (fd < 0)
233                 return -errno;
234
235         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
236         if (r < 0) {
237                 safe_close(fd);
238                 return -errno;
239         }
240
241         if (shutdown(fd, SHUT_RD) < 0) {
242                 safe_close(fd);
243                 return -errno;
244         }
245
246         fd_inc_sndbuf(fd, SNDBUF_SIZE);
247
248         dprintf(fd,
249                 "%s\n"
250                 "%s\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n",
256                 context->syslog_identifier ? context->syslog_identifier : ident,
257                 unit_id,
258                 context->syslog_priority,
259                 !!context->syslog_level_prefix,
260                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
261                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
262                 is_terminal_output(output));
263
264         if (fd != nfd) {
265                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
266                 safe_close(fd);
267         } else
268                 r = nfd;
269
270         return r;
271 }
272 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
273         int fd, r;
274
275         assert(path);
276         assert(nfd >= 0);
277
278         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
279                 return fd;
280
281         if (fd != nfd) {
282                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
283                 safe_close(fd);
284         } else
285                 r = nfd;
286
287         return r;
288 }
289
290 static bool is_terminal_input(ExecInput i) {
291         return
292                 i == EXEC_INPUT_TTY ||
293                 i == EXEC_INPUT_TTY_FORCE ||
294                 i == EXEC_INPUT_TTY_FAIL;
295 }
296
297 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298
299         if (is_terminal_input(std_input) && !apply_tty_stdin)
300                 return EXEC_INPUT_NULL;
301
302         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
303                 return EXEC_INPUT_NULL;
304
305         return std_input;
306 }
307
308 static int fixup_output(ExecOutput std_output, int socket_fd) {
309
310         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
311                 return EXEC_OUTPUT_INHERIT;
312
313         return std_output;
314 }
315
316 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
317         ExecInput i;
318
319         assert(context);
320
321         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322
323         switch (i) {
324
325         case EXEC_INPUT_NULL:
326                 return open_null_as(O_RDONLY, STDIN_FILENO);
327
328         case EXEC_INPUT_TTY:
329         case EXEC_INPUT_TTY_FORCE:
330         case EXEC_INPUT_TTY_FAIL: {
331                 int fd, r;
332
333                 fd = acquire_terminal(tty_path(context),
334                                       i == EXEC_INPUT_TTY_FAIL,
335                                       i == EXEC_INPUT_TTY_FORCE,
336                                       false,
337                                       USEC_INFINITY);
338                 if (fd < 0)
339                         return fd;
340
341                 if (fd != STDIN_FILENO) {
342                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
343                         safe_close(fd);
344                 } else
345                         r = STDIN_FILENO;
346
347                 return r;
348         }
349
350         case EXEC_INPUT_SOCKET:
351                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352
353         default:
354                 assert_not_reached("Unknown input type");
355         }
356 }
357
358 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
359         ExecOutput o;
360         ExecInput i;
361         int r;
362
363         assert(context);
364         assert(ident);
365
366         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367         o = fixup_output(context->std_output, socket_fd);
368
369         if (fileno == STDERR_FILENO) {
370                 ExecOutput e;
371                 e = fixup_output(context->std_error, socket_fd);
372
373                 /* This expects the input and output are already set up */
374
375                 /* Don't change the stderr file descriptor if we inherit all
376                  * the way and are not on a tty */
377                 if (e == EXEC_OUTPUT_INHERIT &&
378                     o == EXEC_OUTPUT_INHERIT &&
379                     i == EXEC_INPUT_NULL &&
380                     !is_terminal_input(context->std_input) &&
381                     getppid () != 1)
382                         return fileno;
383
384                 /* Duplicate from stdout if possible */
385                 if (e == o || e == EXEC_OUTPUT_INHERIT)
386                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
387
388                 o = e;
389
390         } else if (o == EXEC_OUTPUT_INHERIT) {
391                 /* If input got downgraded, inherit the original value */
392                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
393                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394
395                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
396                 if (i != EXEC_INPUT_NULL)
397                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398
399                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
400                 if (getppid() != 1)
401                         return fileno;
402
403                 /* We need to open /dev/null here anew, to get the right access mode. */
404                 return open_null_as(O_WRONLY, fileno);
405         }
406
407         switch (o) {
408
409         case EXEC_OUTPUT_NULL:
410                 return open_null_as(O_WRONLY, fileno);
411
412         case EXEC_OUTPUT_TTY:
413                 if (is_terminal_input(i))
414                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415
416                 /* We don't reset the terminal if this is just about output */
417                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418
419         case EXEC_OUTPUT_SYSLOG:
420         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421         case EXEC_OUTPUT_KMSG:
422         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423         case EXEC_OUTPUT_JOURNAL:
424         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425                 r = connect_logger_as(context, o, ident, unit_id, fileno);
426                 if (r < 0) {
427                         log_struct_unit(LOG_CRIT, unit_id,
428                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
429                                 fileno == STDOUT_FILENO ? "out" : "err",
430                                 unit_id, strerror(-r),
431                                 "ERRNO=%d", -r,
432                                 NULL);
433                         r = open_null_as(O_WRONLY, fileno);
434                 }
435                 return r;
436
437         case EXEC_OUTPUT_SOCKET:
438                 assert(socket_fd >= 0);
439                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
440
441         default:
442                 assert_not_reached("Unknown error type");
443         }
444 }
445
446 static int chown_terminal(int fd, uid_t uid) {
447         struct stat st;
448
449         assert(fd >= 0);
450
451         /* This might fail. What matters are the results. */
452         (void) fchown(fd, uid, -1);
453         (void) fchmod(fd, TTY_MODE);
454
455         if (fstat(fd, &st) < 0)
456                 return -errno;
457
458         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
459                 return -EPERM;
460
461         return 0;
462 }
463
464 static int setup_confirm_stdio(int *_saved_stdin,
465                                int *_saved_stdout) {
466         int fd = -1, saved_stdin, saved_stdout = -1, r;
467
468         assert(_saved_stdin);
469         assert(_saved_stdout);
470
471         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
472         if (saved_stdin < 0)
473                 return -errno;
474
475         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
476         if (saved_stdout < 0) {
477                 r = errno;
478                 goto fail;
479         }
480
481         fd = acquire_terminal(
482                         "/dev/console",
483                         false,
484                         false,
485                         false,
486                         DEFAULT_CONFIRM_USEC);
487         if (fd < 0) {
488                 r = fd;
489                 goto fail;
490         }
491
492         r = chown_terminal(fd, getuid());
493         if (r < 0)
494                 goto fail;
495
496         if (dup2(fd, STDIN_FILENO) < 0) {
497                 r = -errno;
498                 goto fail;
499         }
500
501         if (dup2(fd, STDOUT_FILENO) < 0) {
502                 r = -errno;
503                 goto fail;
504         }
505
506         if (fd >= 2)
507                 safe_close(fd);
508
509         *_saved_stdin = saved_stdin;
510         *_saved_stdout = saved_stdout;
511
512         return 0;
513
514 fail:
515         safe_close(saved_stdout);
516         safe_close(saved_stdin);
517         safe_close(fd);
518
519         return r;
520 }
521
522 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
523         _cleanup_close_ int fd = -1;
524         va_list ap;
525
526         assert(format);
527
528         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
529         if (fd < 0)
530                 return fd;
531
532         va_start(ap, format);
533         vdprintf(fd, format, ap);
534         va_end(ap);
535
536         return 0;
537 }
538
539 static int restore_confirm_stdio(int *saved_stdin,
540                                  int *saved_stdout) {
541
542         int r = 0;
543
544         assert(saved_stdin);
545         assert(saved_stdout);
546
547         release_terminal();
548
549         if (*saved_stdin >= 0)
550                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
551                         r = -errno;
552
553         if (*saved_stdout >= 0)
554                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
555                         r = -errno;
556
557         safe_close(*saved_stdin);
558         safe_close(*saved_stdout);
559
560         return r;
561 }
562
563 static int ask_for_confirmation(char *response, char **argv) {
564         int saved_stdout = -1, saved_stdin = -1, r;
565         _cleanup_free_ char *line = NULL;
566
567         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
568         if (r < 0)
569                 return r;
570
571         line = exec_command_line(argv);
572         if (!line)
573                 return -ENOMEM;
574
575         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576
577         restore_confirm_stdio(&saved_stdin, &saved_stdout);
578
579         return r;
580 }
581
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583         bool keep_groups = false;
584         int r;
585
586         assert(context);
587
588         /* Lookup and set GID and supplementary group list. Here too
589          * we avoid NSS lookups for gid=0. */
590
591         if (context->group || username) {
592
593                 if (context->group) {
594                         const char *g = context->group;
595
596                         if ((r = get_group_creds(&g, &gid)) < 0)
597                                 return r;
598                 }
599
600                 /* First step, initialize groups from /etc/groups */
601                 if (username && gid != 0) {
602                         if (initgroups(username, gid) < 0)
603                                 return -errno;
604
605                         keep_groups = true;
606                 }
607
608                 /* Second step, set our gids */
609                 if (setresgid(gid, gid, gid) < 0)
610                         return -errno;
611         }
612
613         if (context->supplementary_groups) {
614                 int ngroups_max, k;
615                 gid_t *gids;
616                 char **i;
617
618                 /* Final step, initialize any manually set supplementary groups */
619                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620
621                 if (!(gids = new(gid_t, ngroups_max)))
622                         return -ENOMEM;
623
624                 if (keep_groups) {
625                         if ((k = getgroups(ngroups_max, gids)) < 0) {
626                                 free(gids);
627                                 return -errno;
628                         }
629                 } else
630                         k = 0;
631
632                 STRV_FOREACH(i, context->supplementary_groups) {
633                         const char *g;
634
635                         if (k >= ngroups_max) {
636                                 free(gids);
637                                 return -E2BIG;
638                         }
639
640                         g = *i;
641                         r = get_group_creds(&g, gids+k);
642                         if (r < 0) {
643                                 free(gids);
644                                 return r;
645                         }
646
647                         k++;
648                 }
649
650                 if (setgroups(k, gids) < 0) {
651                         free(gids);
652                         return -errno;
653                 }
654
655                 free(gids);
656         }
657
658         return 0;
659 }
660
661 static int enforce_user(const ExecContext *context, uid_t uid) {
662         assert(context);
663
664         /* Sets (but doesn't lookup) the uid and make sure we keep the
665          * capabilities while doing so. */
666
667         if (context->capabilities) {
668                 _cleanup_cap_free_ cap_t d = NULL;
669                 static const cap_value_t bits[] = {
670                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
671                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
672                 };
673
674                 /* First step: If we need to keep capabilities but
675                  * drop privileges we need to make sure we keep our
676                  * caps, while we drop privileges. */
677                 if (uid != 0) {
678                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
679
680                         if (prctl(PR_GET_SECUREBITS) != sb)
681                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682                                         return -errno;
683                 }
684
685                 /* Second step: set the capabilities. This will reduce
686                  * the capabilities to the minimum we need. */
687
688                 d = cap_dup(context->capabilities);
689                 if (!d)
690                         return -errno;
691
692                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
694                         return -errno;
695
696                 if (cap_set_proc(d) < 0)
697                         return -errno;
698         }
699
700         /* Third step: actually set the uids */
701         if (setresuid(uid, uid, uid) < 0)
702                 return -errno;
703
704         /* At this point we should have all necessary capabilities but
705            are otherwise a normal user. However, the caps might got
706            corrupted due to the setresuid() so we need clean them up
707            later. This is done outside of this call. */
708
709         return 0;
710 }
711
712 #ifdef HAVE_PAM
713
714 static int null_conv(
715                 int num_msg,
716                 const struct pam_message **msg,
717                 struct pam_response **resp,
718                 void *appdata_ptr) {
719
720         /* We don't support conversations */
721
722         return PAM_CONV_ERR;
723 }
724
725 static int setup_pam(
726                 const char *name,
727                 const char *user,
728                 uid_t uid,
729                 const char *tty,
730                 char ***pam_env,
731                 int fds[], unsigned n_fds) {
732
733         static const struct pam_conv conv = {
734                 .conv = null_conv,
735                 .appdata_ptr = NULL
736         };
737
738         pam_handle_t *handle = NULL;
739         sigset_t ss, old_ss;
740         int pam_code = PAM_SUCCESS;
741         int err;
742         char **e = NULL;
743         bool close_session = false;
744         pid_t pam_pid = 0, parent_pid;
745         int flags = 0;
746
747         assert(name);
748         assert(user);
749         assert(pam_env);
750
751         /* We set up PAM in the parent process, then fork. The child
752          * will then stay around until killed via PR_GET_PDEATHSIG or
753          * systemd via the cgroup logic. It will then remove the PAM
754          * session again. The parent process will exec() the actual
755          * daemon. We do things this way to ensure that the main PID
756          * of the daemon is the one we initially fork()ed. */
757
758         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
759                 flags |= PAM_SILENT;
760
761         pam_code = pam_start(name, user, &conv, &handle);
762         if (pam_code != PAM_SUCCESS) {
763                 handle = NULL;
764                 goto fail;
765         }
766
767         if (tty) {
768                 pam_code = pam_set_item(handle, PAM_TTY, tty);
769                 if (pam_code != PAM_SUCCESS)
770                         goto fail;
771         }
772
773         pam_code = pam_acct_mgmt(handle, flags);
774         if (pam_code != PAM_SUCCESS)
775                 goto fail;
776
777         pam_code = pam_open_session(handle, flags);
778         if (pam_code != PAM_SUCCESS)
779                 goto fail;
780
781         close_session = true;
782
783         e = pam_getenvlist(handle);
784         if (!e) {
785                 pam_code = PAM_BUF_ERR;
786                 goto fail;
787         }
788
789         /* Block SIGTERM, so that we know that it won't get lost in
790          * the child */
791         if (sigemptyset(&ss) < 0 ||
792             sigaddset(&ss, SIGTERM) < 0 ||
793             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
794                 goto fail;
795
796         parent_pid = getpid();
797
798         pam_pid = fork();
799         if (pam_pid < 0)
800                 goto fail;
801
802         if (pam_pid == 0) {
803                 int sig;
804                 int r = EXIT_PAM;
805
806                 /* The child's job is to reset the PAM session on
807                  * termination */
808
809                 /* This string must fit in 10 chars (i.e. the length
810                  * of "/sbin/init"), to look pretty in /bin/ps */
811                 rename_process("(sd-pam)");
812
813                 /* Make sure we don't keep open the passed fds in this
814                 child. We assume that otherwise only those fds are
815                 open here that have been opened by PAM. */
816                 close_many(fds, n_fds);
817
818                 /* Drop privileges - we don't need any to pam_close_session
819                  * and this will make PR_SET_PDEATHSIG work in most cases.
820                  * If this fails, ignore the error - but expect sd-pam threads
821                  * to fail to exit normally */
822                 if (setresuid(uid, uid, uid) < 0)
823                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
824
825                 /* Wait until our parent died. This will only work if
826                  * the above setresuid() succeeds, otherwise the kernel
827                  * will not allow unprivileged parents kill their privileged
828                  * children this way. We rely on the control groups kill logic
829                  * to do the rest for us. */
830                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
831                         goto child_finish;
832
833                 /* Check if our parent process might already have
834                  * died? */
835                 if (getppid() == parent_pid) {
836                         for (;;) {
837                                 if (sigwait(&ss, &sig) < 0) {
838                                         if (errno == EINTR)
839                                                 continue;
840
841                                         goto child_finish;
842                                 }
843
844                                 assert(sig == SIGTERM);
845                                 break;
846                         }
847                 }
848
849                 /* If our parent died we'll end the session */
850                 if (getppid() != parent_pid) {
851                         pam_code = pam_close_session(handle, flags);
852                         if (pam_code != PAM_SUCCESS)
853                                 goto child_finish;
854                 }
855
856                 r = 0;
857
858         child_finish:
859                 pam_end(handle, pam_code | flags);
860                 _exit(r);
861         }
862
863         /* If the child was forked off successfully it will do all the
864          * cleanups, so forget about the handle here. */
865         handle = NULL;
866
867         /* Unblock SIGTERM again in the parent */
868         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
869                 goto fail;
870
871         /* We close the log explicitly here, since the PAM modules
872          * might have opened it, but we don't want this fd around. */
873         closelog();
874
875         *pam_env = e;
876         e = NULL;
877
878         return 0;
879
880 fail:
881         if (pam_code != PAM_SUCCESS) {
882                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
883                 err = -EPERM;  /* PAM errors do not map to errno */
884         } else {
885                 log_error("PAM failed: %m");
886                 err = -errno;
887         }
888
889         if (handle) {
890                 if (close_session)
891                         pam_code = pam_close_session(handle, flags);
892
893                 pam_end(handle, pam_code | flags);
894         }
895
896         strv_free(e);
897
898         closelog();
899
900         if (pam_pid > 1) {
901                 kill(pam_pid, SIGTERM);
902                 kill(pam_pid, SIGCONT);
903         }
904
905         return err;
906 }
907 #endif
908
909 static void rename_process_from_path(const char *path) {
910         char process_name[11];
911         const char *p;
912         size_t l;
913
914         /* This resulting string must fit in 10 chars (i.e. the length
915          * of "/sbin/init") to look pretty in /bin/ps */
916
917         p = basename(path);
918         if (isempty(p)) {
919                 rename_process("(...)");
920                 return;
921         }
922
923         l = strlen(p);
924         if (l > 8) {
925                 /* The end of the process name is usually more
926                  * interesting, since the first bit might just be
927                  * "systemd-" */
928                 p = p + l - 8;
929                 l = 8;
930         }
931
932         process_name[0] = '(';
933         memcpy(process_name+1, p, l);
934         process_name[1+l] = ')';
935         process_name[1+l+1] = 0;
936
937         rename_process(process_name);
938 }
939
940 #ifdef HAVE_SECCOMP
941
942 static int apply_seccomp(ExecContext *c) {
943         uint32_t negative_action, action;
944         scmp_filter_ctx *seccomp;
945         Iterator i;
946         void *id;
947         int r;
948
949         assert(c);
950
951         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
952
953         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
954         if (!seccomp)
955                 return -ENOMEM;
956
957         if (c->syscall_archs) {
958
959                 SET_FOREACH(id, c->syscall_archs, i) {
960                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
961                         if (r == -EEXIST)
962                                 continue;
963                         if (r < 0)
964                                 goto finish;
965                 }
966
967         } else {
968                 r = seccomp_add_secondary_archs(seccomp);
969                 if (r < 0)
970                         goto finish;
971         }
972
973         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
974         SET_FOREACH(id, c->syscall_filter, i) {
975                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
976                 if (r < 0)
977                         goto finish;
978         }
979
980         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
981         if (r < 0)
982                 goto finish;
983
984         r = seccomp_load(seccomp);
985
986 finish:
987         seccomp_release(seccomp);
988         return r;
989 }
990
991 static int apply_address_families(ExecContext *c) {
992         scmp_filter_ctx *seccomp;
993         Iterator i;
994         int r;
995
996         assert(c);
997
998         seccomp = seccomp_init(SCMP_ACT_ALLOW);
999         if (!seccomp)
1000                 return -ENOMEM;
1001
1002         r = seccomp_add_secondary_archs(seccomp);
1003         if (r < 0)
1004                 goto finish;
1005
1006         if (c->address_families_whitelist) {
1007                 int af, first = 0, last = 0;
1008                 void *afp;
1009
1010                 /* If this is a whitelist, we first block the address
1011                  * families that are out of range and then everything
1012                  * that is not in the set. First, we find the lowest
1013                  * and highest address family in the set. */
1014
1015                 SET_FOREACH(afp, c->address_families, i) {
1016                         af = PTR_TO_INT(afp);
1017
1018                         if (af <= 0 || af >= af_max())
1019                                 continue;
1020
1021                         if (first == 0 || af < first)
1022                                 first = af;
1023
1024                         if (last == 0 || af > last)
1025                                 last = af;
1026                 }
1027
1028                 assert((first == 0) == (last == 0));
1029
1030                 if (first == 0) {
1031
1032                         /* No entries in the valid range, block everything */
1033                         r = seccomp_rule_add(
1034                                         seccomp,
1035                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1036                                         SCMP_SYS(socket),
1037                                         0);
1038                         if (r < 0)
1039                                 goto finish;
1040
1041                 } else {
1042
1043                         /* Block everything below the first entry */
1044                         r = seccomp_rule_add(
1045                                         seccomp,
1046                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1047                                         SCMP_SYS(socket),
1048                                         1,
1049                                         SCMP_A0(SCMP_CMP_LT, first));
1050                         if (r < 0)
1051                                 goto finish;
1052
1053                         /* Block everything above the last entry */
1054                         r = seccomp_rule_add(
1055                                         seccomp,
1056                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1057                                         SCMP_SYS(socket),
1058                                         1,
1059                                         SCMP_A0(SCMP_CMP_GT, last));
1060                         if (r < 0)
1061                                 goto finish;
1062
1063                         /* Block everything between the first and last
1064                          * entry */
1065                         for (af = 1; af < af_max(); af++) {
1066
1067                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1068                                         continue;
1069
1070                                 r = seccomp_rule_add(
1071                                                 seccomp,
1072                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1073                                                 SCMP_SYS(socket),
1074                                                 1,
1075                                                 SCMP_A0(SCMP_CMP_EQ, af));
1076                                 if (r < 0)
1077                                         goto finish;
1078                         }
1079                 }
1080
1081         } else {
1082                 void *af;
1083
1084                 /* If this is a blacklist, then generate one rule for
1085                  * each address family that are then combined in OR
1086                  * checks. */
1087
1088                 SET_FOREACH(af, c->address_families, i) {
1089
1090                         r = seccomp_rule_add(
1091                                         seccomp,
1092                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1093                                         SCMP_SYS(socket),
1094                                         1,
1095                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1096                         if (r < 0)
1097                                 goto finish;
1098                 }
1099         }
1100
1101         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1102         if (r < 0)
1103                 goto finish;
1104
1105         r = seccomp_load(seccomp);
1106
1107 finish:
1108         seccomp_release(seccomp);
1109         return r;
1110 }
1111
1112 #endif
1113
1114 static void do_idle_pipe_dance(int idle_pipe[4]) {
1115         assert(idle_pipe);
1116
1117
1118         safe_close(idle_pipe[1]);
1119         safe_close(idle_pipe[2]);
1120
1121         if (idle_pipe[0] >= 0) {
1122                 int r;
1123
1124                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1125
1126                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1127                         /* Signal systemd that we are bored and want to continue. */
1128                         write(idle_pipe[3], "x", 1);
1129
1130                         /* Wait for systemd to react to the signal above. */
1131                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1132                 }
1133
1134                 safe_close(idle_pipe[0]);
1135
1136         }
1137
1138         safe_close(idle_pipe[3]);
1139 }
1140
1141 static int build_environment(
1142                 ExecContext *c,
1143                 unsigned n_fds,
1144                 usec_t watchdog_usec,
1145                 const char *home,
1146                 const char *username,
1147                 const char *shell,
1148                 char ***ret) {
1149
1150         _cleanup_strv_free_ char **our_env = NULL;
1151         unsigned n_env = 0;
1152         char *x;
1153
1154         assert(c);
1155         assert(ret);
1156
1157         our_env = new0(char*, 10);
1158         if (!our_env)
1159                 return -ENOMEM;
1160
1161         if (n_fds > 0) {
1162                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1163                         return -ENOMEM;
1164                 our_env[n_env++] = x;
1165
1166                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1167                         return -ENOMEM;
1168                 our_env[n_env++] = x;
1169         }
1170
1171         if (watchdog_usec > 0) {
1172                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1173                         return -ENOMEM;
1174                 our_env[n_env++] = x;
1175
1176                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1177                         return -ENOMEM;
1178                 our_env[n_env++] = x;
1179         }
1180
1181         if (home) {
1182                 x = strappend("HOME=", home);
1183                 if (!x)
1184                         return -ENOMEM;
1185                 our_env[n_env++] = x;
1186         }
1187
1188         if (username) {
1189                 x = strappend("LOGNAME=", username);
1190                 if (!x)
1191                         return -ENOMEM;
1192                 our_env[n_env++] = x;
1193
1194                 x = strappend("USER=", username);
1195                 if (!x)
1196                         return -ENOMEM;
1197                 our_env[n_env++] = x;
1198         }
1199
1200         if (shell) {
1201                 x = strappend("SHELL=", shell);
1202                 if (!x)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205         }
1206
1207         if (is_terminal_input(c->std_input) ||
1208             c->std_output == EXEC_OUTPUT_TTY ||
1209             c->std_error == EXEC_OUTPUT_TTY ||
1210             c->tty_path) {
1211
1212                 x = strdup(default_term_for_tty(tty_path(c)));
1213                 if (!x)
1214                         return -ENOMEM;
1215                 our_env[n_env++] = x;
1216         }
1217
1218         our_env[n_env++] = NULL;
1219         assert(n_env <= 10);
1220
1221         *ret = our_env;
1222         our_env = NULL;
1223
1224         return 0;
1225 }
1226
1227 int exec_spawn(ExecCommand *command,
1228                char **argv,
1229                ExecContext *context,
1230                int fds[], unsigned n_fds,
1231                char **environment,
1232                bool apply_permissions,
1233                bool apply_chroot,
1234                bool apply_tty_stdin,
1235                bool confirm_spawn,
1236                CGroupControllerMask cgroup_supported,
1237                const char *cgroup_path,
1238                const char *runtime_prefix,
1239                const char *unit_id,
1240                usec_t watchdog_usec,
1241                int idle_pipe[4],
1242                ExecRuntime *runtime,
1243                pid_t *ret) {
1244
1245         _cleanup_strv_free_ char **files_env = NULL;
1246         int socket_fd;
1247         char *line;
1248         pid_t pid;
1249         int r;
1250
1251         assert(command);
1252         assert(context);
1253         assert(ret);
1254         assert(fds || n_fds <= 0);
1255
1256         if (context->std_input == EXEC_INPUT_SOCKET ||
1257             context->std_output == EXEC_OUTPUT_SOCKET ||
1258             context->std_error == EXEC_OUTPUT_SOCKET) {
1259
1260                 if (n_fds != 1)
1261                         return -EINVAL;
1262
1263                 socket_fd = fds[0];
1264
1265                 fds = NULL;
1266                 n_fds = 0;
1267         } else
1268                 socket_fd = -1;
1269
1270         r = exec_context_load_environment(context, &files_env);
1271         if (r < 0) {
1272                 log_struct_unit(LOG_ERR,
1273                            unit_id,
1274                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1275                            "ERRNO=%d", -r,
1276                            NULL);
1277                 return r;
1278         }
1279
1280         if (!argv)
1281                 argv = command->argv;
1282
1283         line = exec_command_line(argv);
1284         if (!line)
1285                 return log_oom();
1286
1287         log_struct_unit(LOG_DEBUG,
1288                         unit_id,
1289                         "EXECUTABLE=%s", command->path,
1290                         "MESSAGE=About to execute: %s", line,
1291                         NULL);
1292         free(line);
1293
1294         pid = fork();
1295         if (pid < 0)
1296                 return -errno;
1297
1298         if (pid == 0) {
1299                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1300                 const char *username = NULL, *home = NULL, *shell = NULL;
1301                 unsigned n_dont_close = 0;
1302                 int dont_close[n_fds + 3];
1303                 uid_t uid = (uid_t) -1;
1304                 gid_t gid = (gid_t) -1;
1305                 sigset_t ss;
1306                 int i, err;
1307
1308                 /* child */
1309
1310                 rename_process_from_path(command->path);
1311
1312                 /* We reset exactly these signals, since they are the
1313                  * only ones we set to SIG_IGN in the main daemon. All
1314                  * others we leave untouched because we set them to
1315                  * SIG_DFL or a valid handler initially, both of which
1316                  * will be demoted to SIG_DFL. */
1317                 default_signals(SIGNALS_CRASH_HANDLER,
1318                                 SIGNALS_IGNORE, -1);
1319
1320                 if (context->ignore_sigpipe)
1321                         ignore_signals(SIGPIPE, -1);
1322
1323                 assert_se(sigemptyset(&ss) == 0);
1324                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1325                         err = -errno;
1326                         r = EXIT_SIGNAL_MASK;
1327                         goto fail_child;
1328                 }
1329
1330                 if (idle_pipe)
1331                         do_idle_pipe_dance(idle_pipe);
1332
1333                 /* Close sockets very early to make sure we don't
1334                  * block init reexecution because it cannot bind its
1335                  * sockets */
1336                 log_forget_fds();
1337
1338                 if (socket_fd >= 0)
1339                         dont_close[n_dont_close++] = socket_fd;
1340                 if (n_fds > 0) {
1341                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1342                         n_dont_close += n_fds;
1343                 }
1344                 if (runtime) {
1345                         if (runtime->netns_storage_socket[0] >= 0)
1346                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1347                         if (runtime->netns_storage_socket[1] >= 0)
1348                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1349                 }
1350
1351                 err = close_all_fds(dont_close, n_dont_close);
1352                 if (err < 0) {
1353                         r = EXIT_FDS;
1354                         goto fail_child;
1355                 }
1356
1357                 if (!context->same_pgrp)
1358                         if (setsid() < 0) {
1359                                 err = -errno;
1360                                 r = EXIT_SETSID;
1361                                 goto fail_child;
1362                         }
1363
1364                 exec_context_tty_reset(context);
1365
1366                 if (confirm_spawn) {
1367                         char response;
1368
1369                         err = ask_for_confirmation(&response, argv);
1370                         if (err == -ETIMEDOUT)
1371                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1372                         else if (err < 0)
1373                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1374                         else if (response == 's') {
1375                                 write_confirm_message("Skipping execution.\n");
1376                                 err = -ECANCELED;
1377                                 r = EXIT_CONFIRM;
1378                                 goto fail_child;
1379                         } else if (response == 'n') {
1380                                 write_confirm_message("Failing execution.\n");
1381                                 err = r = 0;
1382                                 goto fail_child;
1383                         }
1384                 }
1385
1386                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1387                  * must sure to drop O_NONBLOCK */
1388                 if (socket_fd >= 0)
1389                         fd_nonblock(socket_fd, false);
1390
1391                 err = setup_input(context, socket_fd, apply_tty_stdin);
1392                 if (err < 0) {
1393                         r = EXIT_STDIN;
1394                         goto fail_child;
1395                 }
1396
1397                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1398                 if (err < 0) {
1399                         r = EXIT_STDOUT;
1400                         goto fail_child;
1401                 }
1402
1403                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1404                 if (err < 0) {
1405                         r = EXIT_STDERR;
1406                         goto fail_child;
1407                 }
1408
1409                 if (cgroup_path) {
1410                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1411                         if (err < 0) {
1412                                 r = EXIT_CGROUP;
1413                                 goto fail_child;
1414                         }
1415                 }
1416
1417                 if (context->oom_score_adjust_set) {
1418                         char t[16];
1419
1420                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1421                         char_array_0(t);
1422
1423                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1424                                 err = -errno;
1425                                 r = EXIT_OOM_ADJUST;
1426                                 goto fail_child;
1427                         }
1428                 }
1429
1430                 if (context->nice_set)
1431                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1432                                 err = -errno;
1433                                 r = EXIT_NICE;
1434                                 goto fail_child;
1435                         }
1436
1437                 if (context->cpu_sched_set) {
1438                         struct sched_param param = {
1439                                 .sched_priority = context->cpu_sched_priority,
1440                         };
1441
1442                         r = sched_setscheduler(0,
1443                                                context->cpu_sched_policy |
1444                                                (context->cpu_sched_reset_on_fork ?
1445                                                 SCHED_RESET_ON_FORK : 0),
1446                                                &param);
1447                         if (r < 0) {
1448                                 err = -errno;
1449                                 r = EXIT_SETSCHEDULER;
1450                                 goto fail_child;
1451                         }
1452                 }
1453
1454                 if (context->cpuset)
1455                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1456                                 err = -errno;
1457                                 r = EXIT_CPUAFFINITY;
1458                                 goto fail_child;
1459                         }
1460
1461                 if (context->ioprio_set)
1462                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1463                                 err = -errno;
1464                                 r = EXIT_IOPRIO;
1465                                 goto fail_child;
1466                         }
1467
1468                 if (context->timer_slack_nsec != NSEC_INFINITY)
1469                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1470                                 err = -errno;
1471                                 r = EXIT_TIMERSLACK;
1472                                 goto fail_child;
1473                         }
1474
1475                 if (context->personality != 0xffffffffUL)
1476                         if (personality(context->personality) < 0) {
1477                                 err = -errno;
1478                                 r = EXIT_PERSONALITY;
1479                                 goto fail_child;
1480                         }
1481
1482                 if (context->utmp_id)
1483                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1484
1485                 if (context->user) {
1486                         username = context->user;
1487                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1488                         if (err < 0) {
1489                                 r = EXIT_USER;
1490                                 goto fail_child;
1491                         }
1492
1493                         if (is_terminal_input(context->std_input)) {
1494                                 err = chown_terminal(STDIN_FILENO, uid);
1495                                 if (err < 0) {
1496                                         r = EXIT_STDIN;
1497                                         goto fail_child;
1498                                 }
1499                         }
1500                 }
1501
1502 #ifdef HAVE_PAM
1503                 if (cgroup_path && context->user && context->pam_name) {
1504                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1505                         if (err < 0) {
1506                                 r = EXIT_CGROUP;
1507                                 goto fail_child;
1508                         }
1509
1510
1511                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1512                         if (err < 0) {
1513                                 r = EXIT_CGROUP;
1514                                 goto fail_child;
1515                         }
1516                 }
1517 #endif
1518
1519                 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1520                         char **rt;
1521
1522                         STRV_FOREACH(rt, context->runtime_directory) {
1523                                 _cleanup_free_ char *p;
1524
1525                                 p = strjoin(runtime_prefix, "/", *rt, NULL);
1526                                 if (!p) {
1527                                         r = EXIT_RUNTIME_DIRECTORY;
1528                                         err = -ENOMEM;
1529                                         goto fail_child;
1530                                 }
1531
1532                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1533                                 if (err < 0) {
1534                                         r = EXIT_RUNTIME_DIRECTORY;
1535                                         goto fail_child;
1536                                 }
1537                         }
1538                 }
1539
1540                 if (apply_permissions) {
1541                         err = enforce_groups(context, username, gid);
1542                         if (err < 0) {
1543                                 r = EXIT_GROUP;
1544                                 goto fail_child;
1545                         }
1546                 }
1547
1548                 umask(context->umask);
1549
1550 #ifdef HAVE_PAM
1551                 if (apply_permissions && context->pam_name && username) {
1552                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1553                         if (err < 0) {
1554                                 r = EXIT_PAM;
1555                                 goto fail_child;
1556                         }
1557                 }
1558 #endif
1559                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1560                         err = setup_netns(runtime->netns_storage_socket);
1561                         if (err < 0) {
1562                                 r = EXIT_NETWORK;
1563                                 goto fail_child;
1564                         }
1565                 }
1566
1567                 if (!strv_isempty(context->read_write_dirs) ||
1568                     !strv_isempty(context->read_only_dirs) ||
1569                     !strv_isempty(context->inaccessible_dirs) ||
1570                     context->mount_flags != 0 ||
1571                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1572                     context->private_devices ||
1573                     context->protect_system != PROTECT_SYSTEM_NO ||
1574                     context->protect_home != PROTECT_HOME_NO) {
1575
1576                         char *tmp = NULL, *var = NULL;
1577
1578                         /* The runtime struct only contains the parent
1579                          * of the private /tmp, which is
1580                          * non-accessible to world users. Inside of it
1581                          * there's a /tmp that is sticky, and that's
1582                          * the one we want to use here. */
1583
1584                         if (context->private_tmp && runtime) {
1585                                 if (runtime->tmp_dir)
1586                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1587                                 if (runtime->var_tmp_dir)
1588                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1589                         }
1590
1591                         err = setup_namespace(
1592                                         context->read_write_dirs,
1593                                         context->read_only_dirs,
1594                                         context->inaccessible_dirs,
1595                                         tmp,
1596                                         var,
1597                                         context->private_devices,
1598                                         context->protect_home,
1599                                         context->protect_system,
1600                                         context->mount_flags);
1601                         if (err < 0) {
1602                                 r = EXIT_NAMESPACE;
1603                                 goto fail_child;
1604                         }
1605                 }
1606
1607                 if (apply_chroot) {
1608                         if (context->root_directory)
1609                                 if (chroot(context->root_directory) < 0) {
1610                                         err = -errno;
1611                                         r = EXIT_CHROOT;
1612                                         goto fail_child;
1613                                 }
1614
1615                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1616                                 err = -errno;
1617                                 r = EXIT_CHDIR;
1618                                 goto fail_child;
1619                         }
1620                 } else {
1621                         _cleanup_free_ char *d = NULL;
1622
1623                         if (asprintf(&d, "%s/%s",
1624                                      context->root_directory ? context->root_directory : "",
1625                                      context->working_directory ? context->working_directory : "") < 0) {
1626                                 err = -ENOMEM;
1627                                 r = EXIT_MEMORY;
1628                                 goto fail_child;
1629                         }
1630
1631                         if (chdir(d) < 0) {
1632                                 err = -errno;
1633                                 r = EXIT_CHDIR;
1634                                 goto fail_child;
1635                         }
1636                 }
1637
1638                 /* We repeat the fd closing here, to make sure that
1639                  * nothing is leaked from the PAM modules */
1640                 err = close_all_fds(fds, n_fds);
1641                 if (err >= 0)
1642                         err = shift_fds(fds, n_fds);
1643                 if (err >= 0)
1644                         err = flags_fds(fds, n_fds, context->non_blocking);
1645                 if (err < 0) {
1646                         r = EXIT_FDS;
1647                         goto fail_child;
1648                 }
1649
1650                 if (apply_permissions) {
1651
1652                         for (i = 0; i < _RLIMIT_MAX; i++) {
1653                                 if (!context->rlimit[i])
1654                                         continue;
1655
1656                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1657                                         err = -errno;
1658                                         r = EXIT_LIMITS;
1659                                         goto fail_child;
1660                                 }
1661                         }
1662
1663                         if (context->capability_bounding_set_drop) {
1664                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1665                                 if (err < 0) {
1666                                         r = EXIT_CAPABILITIES;
1667                                         goto fail_child;
1668                                 }
1669                         }
1670
1671 #ifdef HAVE_SMACK
1672                         if (context->smack_process_label) {
1673                                 err = smack_label_apply_pid(0, context->smack_process_label);
1674                                 if (err < 0) {
1675                                         r = EXIT_SMACK_PROCESS_LABEL;
1676                                         goto fail_child;
1677                                 }
1678                         }
1679 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1680                         else {
1681                                 err = smack_label_apply_pid(0, SMACK_DEFAULT_PROCESS_LABEL);
1682                                 if (err < 0) {
1683                                         r = EXIT_SMACK_PROCESS_LABEL;
1684                                         goto fail_child;
1685                                 }
1686                         }
1687 #endif
1688 #endif
1689
1690                         if (context->user) {
1691                                 err = enforce_user(context, uid);
1692                                 if (err < 0) {
1693                                         r = EXIT_USER;
1694                                         goto fail_child;
1695                                 }
1696                         }
1697
1698                         /* PR_GET_SECUREBITS is not privileged, while
1699                          * PR_SET_SECUREBITS is. So to suppress
1700                          * potential EPERMs we'll try not to call
1701                          * PR_SET_SECUREBITS unless necessary. */
1702                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1703                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1704                                         err = -errno;
1705                                         r = EXIT_SECUREBITS;
1706                                         goto fail_child;
1707                                 }
1708
1709                         if (context->capabilities)
1710                                 if (cap_set_proc(context->capabilities) < 0) {
1711                                         err = -errno;
1712                                         r = EXIT_CAPABILITIES;
1713                                         goto fail_child;
1714                                 }
1715
1716                         if (context->no_new_privileges)
1717                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1718                                         err = -errno;
1719                                         r = EXIT_NO_NEW_PRIVILEGES;
1720                                         goto fail_child;
1721                                 }
1722
1723 #ifdef HAVE_SECCOMP
1724                         if (context->address_families_whitelist ||
1725                             !set_isempty(context->address_families)) {
1726                                 err = apply_address_families(context);
1727                                 if (err < 0) {
1728                                         r = EXIT_ADDRESS_FAMILIES;
1729                                         goto fail_child;
1730                                 }
1731                         }
1732
1733                         if (context->syscall_whitelist ||
1734                             !set_isempty(context->syscall_filter) ||
1735                             !set_isempty(context->syscall_archs)) {
1736                                 err = apply_seccomp(context);
1737                                 if (err < 0) {
1738                                         r = EXIT_SECCOMP;
1739                                         goto fail_child;
1740                                 }
1741                         }
1742 #endif
1743
1744 #ifdef HAVE_SELINUX
1745                         if (context->selinux_context && use_selinux()) {
1746                                 err = setexeccon(context->selinux_context);
1747                                 if (err < 0 && !context->selinux_context_ignore) {
1748                                         r = EXIT_SELINUX_CONTEXT;
1749                                         goto fail_child;
1750                                 }
1751                         }
1752 #endif
1753
1754 #ifdef HAVE_APPARMOR
1755                         if (context->apparmor_profile && use_apparmor()) {
1756                                 err = aa_change_onexec(context->apparmor_profile);
1757                                 if (err < 0 && !context->apparmor_profile_ignore) {
1758                                         r = EXIT_APPARMOR_PROFILE;
1759                                         goto fail_child;
1760                                 }
1761                         }
1762 #endif
1763                 }
1764
1765                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1766                 if (r < 0) {
1767                         r = EXIT_MEMORY;
1768                         goto fail_child;
1769                 }
1770
1771                 final_env = strv_env_merge(5,
1772                                            environment,
1773                                            our_env,
1774                                            context->environment,
1775                                            files_env,
1776                                            pam_env,
1777                                            NULL);
1778                 if (!final_env) {
1779                         err = -ENOMEM;
1780                         r = EXIT_MEMORY;
1781                         goto fail_child;
1782                 }
1783
1784                 final_argv = replace_env_argv(argv, final_env);
1785                 if (!final_argv) {
1786                         err = -ENOMEM;
1787                         r = EXIT_MEMORY;
1788                         goto fail_child;
1789                 }
1790
1791                 final_env = strv_env_clean(final_env);
1792
1793                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1794                         line = exec_command_line(final_argv);
1795                         if (line) {
1796                                 log_open();
1797                                 log_struct_unit(LOG_DEBUG,
1798                                                 unit_id,
1799                                                 "EXECUTABLE=%s", command->path,
1800                                                 "MESSAGE=Executing: %s", line,
1801                                                 NULL);
1802                                 log_close();
1803                                 free(line);
1804                                 line = NULL;
1805                         }
1806                 }
1807                 execve(command->path, final_argv, final_env);
1808                 err = -errno;
1809                 r = EXIT_EXEC;
1810
1811         fail_child:
1812                 if (r != 0) {
1813                         log_open();
1814                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1815                                    "EXECUTABLE=%s", command->path,
1816                                    "MESSAGE=Failed at step %s spawning %s: %s",
1817                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1818                                           command->path, strerror(-err),
1819                                    "ERRNO=%d", -err,
1820                                    NULL);
1821                         log_close();
1822                 }
1823
1824                 _exit(r);
1825         }
1826
1827         log_struct_unit(LOG_DEBUG,
1828                         unit_id,
1829                         "MESSAGE=Forked %s as "PID_FMT,
1830                         command->path, pid,
1831                         NULL);
1832
1833         /* We add the new process to the cgroup both in the child (so
1834          * that we can be sure that no user code is ever executed
1835          * outside of the cgroup) and in the parent (so that we can be
1836          * sure that when we kill the cgroup the process will be
1837          * killed too). */
1838         if (cgroup_path)
1839                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1840
1841         exec_status_start(&command->exec_status, pid);
1842
1843         *ret = pid;
1844         return 0;
1845 }
1846
1847 void exec_context_init(ExecContext *c) {
1848         assert(c);
1849
1850         c->umask = 0022;
1851         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1852         c->cpu_sched_policy = SCHED_OTHER;
1853         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1854         c->syslog_level_prefix = true;
1855         c->ignore_sigpipe = true;
1856         c->timer_slack_nsec = NSEC_INFINITY;
1857         c->personality = 0xffffffffUL;
1858         c->runtime_directory_mode = 0755;
1859 }
1860
1861 void exec_context_done(ExecContext *c) {
1862         unsigned l;
1863
1864         assert(c);
1865
1866         strv_free(c->environment);
1867         c->environment = NULL;
1868
1869         strv_free(c->environment_files);
1870         c->environment_files = NULL;
1871
1872         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1873                 free(c->rlimit[l]);
1874                 c->rlimit[l] = NULL;
1875         }
1876
1877         free(c->working_directory);
1878         c->working_directory = NULL;
1879         free(c->root_directory);
1880         c->root_directory = NULL;
1881
1882         free(c->tty_path);
1883         c->tty_path = NULL;
1884
1885         free(c->syslog_identifier);
1886         c->syslog_identifier = NULL;
1887
1888         free(c->user);
1889         c->user = NULL;
1890
1891         free(c->group);
1892         c->group = NULL;
1893
1894         strv_free(c->supplementary_groups);
1895         c->supplementary_groups = NULL;
1896
1897         free(c->pam_name);
1898         c->pam_name = NULL;
1899
1900         if (c->capabilities) {
1901                 cap_free(c->capabilities);
1902                 c->capabilities = NULL;
1903         }
1904
1905         strv_free(c->read_only_dirs);
1906         c->read_only_dirs = NULL;
1907
1908         strv_free(c->read_write_dirs);
1909         c->read_write_dirs = NULL;
1910
1911         strv_free(c->inaccessible_dirs);
1912         c->inaccessible_dirs = NULL;
1913
1914         if (c->cpuset)
1915                 CPU_FREE(c->cpuset);
1916
1917         free(c->utmp_id);
1918         c->utmp_id = NULL;
1919
1920         free(c->selinux_context);
1921         c->selinux_context = NULL;
1922
1923         free(c->apparmor_profile);
1924         c->apparmor_profile = NULL;
1925
1926         set_free(c->syscall_filter);
1927         c->syscall_filter = NULL;
1928
1929         set_free(c->syscall_archs);
1930         c->syscall_archs = NULL;
1931
1932         set_free(c->address_families);
1933         c->address_families = NULL;
1934
1935         strv_free(c->runtime_directory);
1936         c->runtime_directory = NULL;
1937 }
1938
1939 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1940         char **i;
1941
1942         assert(c);
1943
1944         if (!runtime_prefix)
1945                 return 0;
1946
1947         STRV_FOREACH(i, c->runtime_directory) {
1948                 _cleanup_free_ char *p;
1949
1950                 p = strjoin(runtime_prefix, "/", *i, NULL);
1951                 if (!p)
1952                         return -ENOMEM;
1953
1954                 /* We execute this synchronously, since we need to be
1955                  * sure this is gone when we start the service
1956                  * next. */
1957                 rm_rf_dangerous(p, false, true, false);
1958         }
1959
1960         return 0;
1961 }
1962
1963 void exec_command_done(ExecCommand *c) {
1964         assert(c);
1965
1966         free(c->path);
1967         c->path = NULL;
1968
1969         strv_free(c->argv);
1970         c->argv = NULL;
1971 }
1972
1973 void exec_command_done_array(ExecCommand *c, unsigned n) {
1974         unsigned i;
1975
1976         for (i = 0; i < n; i++)
1977                 exec_command_done(c+i);
1978 }
1979
1980 void exec_command_free_list(ExecCommand *c) {
1981         ExecCommand *i;
1982
1983         while ((i = c)) {
1984                 LIST_REMOVE(command, c, i);
1985                 exec_command_done(i);
1986                 free(i);
1987         }
1988 }
1989
1990 void exec_command_free_array(ExecCommand **c, unsigned n) {
1991         unsigned i;
1992
1993         for (i = 0; i < n; i++) {
1994                 exec_command_free_list(c[i]);
1995                 c[i] = NULL;
1996         }
1997 }
1998
1999 int exec_context_load_environment(const ExecContext *c, char ***l) {
2000         char **i, **r = NULL;
2001
2002         assert(c);
2003         assert(l);
2004
2005         STRV_FOREACH(i, c->environment_files) {
2006                 char *fn;
2007                 int k;
2008                 bool ignore = false;
2009                 char **p;
2010                 _cleanup_globfree_ glob_t pglob = {};
2011                 int count, n;
2012
2013                 fn = *i;
2014
2015                 if (fn[0] == '-') {
2016                         ignore = true;
2017                         fn ++;
2018                 }
2019
2020                 if (!path_is_absolute(fn)) {
2021                         if (ignore)
2022                                 continue;
2023
2024                         strv_free(r);
2025                         return -EINVAL;
2026                 }
2027
2028                 /* Filename supports globbing, take all matching files */
2029                 errno = 0;
2030                 if (glob(fn, 0, NULL, &pglob) != 0) {
2031                         if (ignore)
2032                                 continue;
2033
2034                         strv_free(r);
2035                         return errno ? -errno : -EINVAL;
2036                 }
2037                 count = pglob.gl_pathc;
2038                 if (count == 0) {
2039                         if (ignore)
2040                                 continue;
2041
2042                         strv_free(r);
2043                         return -EINVAL;
2044                 }
2045                 for (n = 0; n < count; n++) {
2046                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2047                         if (k < 0) {
2048                                 if (ignore)
2049                                         continue;
2050
2051                                 strv_free(r);
2052                                 return k;
2053                         }
2054                         /* Log invalid environment variables with filename */
2055                         if (p)
2056                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2057
2058                         if (r == NULL)
2059                                 r = p;
2060                         else {
2061                                 char **m;
2062
2063                                 m = strv_env_merge(2, r, p);
2064                                 strv_free(r);
2065                                 strv_free(p);
2066                                 if (!m)
2067                                         return -ENOMEM;
2068
2069                                 r = m;
2070                         }
2071                 }
2072         }
2073
2074         *l = r;
2075
2076         return 0;
2077 }
2078
2079 static bool tty_may_match_dev_console(const char *tty) {
2080         _cleanup_free_ char *active = NULL;
2081        char *console;
2082
2083         if (startswith(tty, "/dev/"))
2084                 tty += 5;
2085
2086         /* trivial identity? */
2087         if (streq(tty, "console"))
2088                 return true;
2089
2090         console = resolve_dev_console(&active);
2091         /* if we could not resolve, assume it may */
2092         if (!console)
2093                 return true;
2094
2095         /* "tty0" means the active VC, so it may be the same sometimes */
2096         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2097 }
2098
2099 bool exec_context_may_touch_console(ExecContext *ec) {
2100         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2101                 is_terminal_input(ec->std_input) ||
2102                 is_terminal_output(ec->std_output) ||
2103                 is_terminal_output(ec->std_error)) &&
2104                tty_may_match_dev_console(tty_path(ec));
2105 }
2106
2107 static void strv_fprintf(FILE *f, char **l) {
2108         char **g;
2109
2110         assert(f);
2111
2112         STRV_FOREACH(g, l)
2113                 fprintf(f, " %s", *g);
2114 }
2115
2116 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2117         char **e;
2118         unsigned i;
2119
2120         assert(c);
2121         assert(f);
2122
2123         prefix = strempty(prefix);
2124
2125         fprintf(f,
2126                 "%sUMask: %04o\n"
2127                 "%sWorkingDirectory: %s\n"
2128                 "%sRootDirectory: %s\n"
2129                 "%sNonBlocking: %s\n"
2130                 "%sPrivateTmp: %s\n"
2131                 "%sPrivateNetwork: %s\n"
2132                 "%sPrivateDevices: %s\n"
2133                 "%sProtectHome: %s\n"
2134                 "%sProtectSystem: %s\n"
2135                 "%sIgnoreSIGPIPE: %s\n",
2136                 prefix, c->umask,
2137                 prefix, c->working_directory ? c->working_directory : "/",
2138                 prefix, c->root_directory ? c->root_directory : "/",
2139                 prefix, yes_no(c->non_blocking),
2140                 prefix, yes_no(c->private_tmp),
2141                 prefix, yes_no(c->private_network),
2142                 prefix, yes_no(c->private_devices),
2143                 prefix, protect_home_to_string(c->protect_home),
2144                 prefix, protect_system_to_string(c->protect_system),
2145                 prefix, yes_no(c->ignore_sigpipe));
2146
2147         STRV_FOREACH(e, c->environment)
2148                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2149
2150         STRV_FOREACH(e, c->environment_files)
2151                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2152
2153         if (c->nice_set)
2154                 fprintf(f,
2155                         "%sNice: %i\n",
2156                         prefix, c->nice);
2157
2158         if (c->oom_score_adjust_set)
2159                 fprintf(f,
2160                         "%sOOMScoreAdjust: %i\n",
2161                         prefix, c->oom_score_adjust);
2162
2163         for (i = 0; i < RLIM_NLIMITS; i++)
2164                 if (c->rlimit[i])
2165                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2166                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2167
2168         if (c->ioprio_set) {
2169                 _cleanup_free_ char *class_str = NULL;
2170
2171                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2172                 fprintf(f,
2173                         "%sIOSchedulingClass: %s\n"
2174                         "%sIOPriority: %i\n",
2175                         prefix, strna(class_str),
2176                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2177         }
2178
2179         if (c->cpu_sched_set) {
2180                 _cleanup_free_ char *policy_str = NULL;
2181
2182                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2183                 fprintf(f,
2184                         "%sCPUSchedulingPolicy: %s\n"
2185                         "%sCPUSchedulingPriority: %i\n"
2186                         "%sCPUSchedulingResetOnFork: %s\n",
2187                         prefix, strna(policy_str),
2188                         prefix, c->cpu_sched_priority,
2189                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2190         }
2191
2192         if (c->cpuset) {
2193                 fprintf(f, "%sCPUAffinity:", prefix);
2194                 for (i = 0; i < c->cpuset_ncpus; i++)
2195                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2196                                 fprintf(f, " %u", i);
2197                 fputs("\n", f);
2198         }
2199
2200         if (c->timer_slack_nsec != NSEC_INFINITY)
2201                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2202
2203         fprintf(f,
2204                 "%sStandardInput: %s\n"
2205                 "%sStandardOutput: %s\n"
2206                 "%sStandardError: %s\n",
2207                 prefix, exec_input_to_string(c->std_input),
2208                 prefix, exec_output_to_string(c->std_output),
2209                 prefix, exec_output_to_string(c->std_error));
2210
2211         if (c->tty_path)
2212                 fprintf(f,
2213                         "%sTTYPath: %s\n"
2214                         "%sTTYReset: %s\n"
2215                         "%sTTYVHangup: %s\n"
2216                         "%sTTYVTDisallocate: %s\n",
2217                         prefix, c->tty_path,
2218                         prefix, yes_no(c->tty_reset),
2219                         prefix, yes_no(c->tty_vhangup),
2220                         prefix, yes_no(c->tty_vt_disallocate));
2221
2222         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2223             c->std_output == EXEC_OUTPUT_KMSG ||
2224             c->std_output == EXEC_OUTPUT_JOURNAL ||
2225             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2226             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2227             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2228             c->std_error == EXEC_OUTPUT_SYSLOG ||
2229             c->std_error == EXEC_OUTPUT_KMSG ||
2230             c->std_error == EXEC_OUTPUT_JOURNAL ||
2231             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2232             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2233             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2234
2235                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2236
2237                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2238                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2239
2240                 fprintf(f,
2241                         "%sSyslogFacility: %s\n"
2242                         "%sSyslogLevel: %s\n",
2243                         prefix, strna(fac_str),
2244                         prefix, strna(lvl_str));
2245         }
2246
2247         if (c->capabilities) {
2248                 _cleanup_cap_free_charp_ char *t;
2249
2250                 t = cap_to_text(c->capabilities, NULL);
2251                 if (t)
2252                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2253         }
2254
2255         if (c->secure_bits)
2256                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2257                         prefix,
2258                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2259                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2260                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2261                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2262                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2263                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2264
2265         if (c->capability_bounding_set_drop) {
2266                 unsigned long l;
2267                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2268
2269                 for (l = 0; l <= cap_last_cap(); l++)
2270                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2271                                 _cleanup_cap_free_charp_ char *t;
2272
2273                                 t = cap_to_name(l);
2274                                 if (t)
2275                                         fprintf(f, " %s", t);
2276                         }
2277
2278                 fputs("\n", f);
2279         }
2280
2281         if (c->user)
2282                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2283         if (c->group)
2284                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2285
2286         if (strv_length(c->supplementary_groups) > 0) {
2287                 fprintf(f, "%sSupplementaryGroups:", prefix);
2288                 strv_fprintf(f, c->supplementary_groups);
2289                 fputs("\n", f);
2290         }
2291
2292         if (c->pam_name)
2293                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2294
2295         if (strv_length(c->read_write_dirs) > 0) {
2296                 fprintf(f, "%sReadWriteDirs:", prefix);
2297                 strv_fprintf(f, c->read_write_dirs);
2298                 fputs("\n", f);
2299         }
2300
2301         if (strv_length(c->read_only_dirs) > 0) {
2302                 fprintf(f, "%sReadOnlyDirs:", prefix);
2303                 strv_fprintf(f, c->read_only_dirs);
2304                 fputs("\n", f);
2305         }
2306
2307         if (strv_length(c->inaccessible_dirs) > 0) {
2308                 fprintf(f, "%sInaccessibleDirs:", prefix);
2309                 strv_fprintf(f, c->inaccessible_dirs);
2310                 fputs("\n", f);
2311         }
2312
2313         if (c->utmp_id)
2314                 fprintf(f,
2315                         "%sUtmpIdentifier: %s\n",
2316                         prefix, c->utmp_id);
2317
2318         if (c->selinux_context)
2319                 fprintf(f,
2320                         "%sSELinuxContext: %s%s\n",
2321                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2322
2323         if (c->personality != 0xffffffffUL)
2324                 fprintf(f,
2325                         "%sPersonality: %s\n",
2326                         prefix, strna(personality_to_string(c->personality)));
2327
2328         if (c->syscall_filter) {
2329 #ifdef HAVE_SECCOMP
2330                 Iterator j;
2331                 void *id;
2332                 bool first = true;
2333 #endif
2334
2335                 fprintf(f,
2336                         "%sSystemCallFilter: ",
2337                         prefix);
2338
2339                 if (!c->syscall_whitelist)
2340                         fputc('~', f);
2341
2342 #ifdef HAVE_SECCOMP
2343                 SET_FOREACH(id, c->syscall_filter, j) {
2344                         _cleanup_free_ char *name = NULL;
2345
2346                         if (first)
2347                                 first = false;
2348                         else
2349                                 fputc(' ', f);
2350
2351                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2352                         fputs(strna(name), f);
2353                 }
2354 #endif
2355
2356                 fputc('\n', f);
2357         }
2358
2359         if (c->syscall_archs) {
2360 #ifdef HAVE_SECCOMP
2361                 Iterator j;
2362                 void *id;
2363 #endif
2364
2365                 fprintf(f,
2366                         "%sSystemCallArchitectures:",
2367                         prefix);
2368
2369 #ifdef HAVE_SECCOMP
2370                 SET_FOREACH(id, c->syscall_archs, j)
2371                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2372 #endif
2373                 fputc('\n', f);
2374         }
2375
2376         if (c->syscall_errno != 0)
2377                 fprintf(f,
2378                         "%sSystemCallErrorNumber: %s\n",
2379                         prefix, strna(errno_to_name(c->syscall_errno)));
2380
2381         if (c->apparmor_profile)
2382                 fprintf(f,
2383                         "%sAppArmorProfile: %s%s\n",
2384                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2385 }
2386
2387 void exec_status_start(ExecStatus *s, pid_t pid) {
2388         assert(s);
2389
2390         zero(*s);
2391         s->pid = pid;
2392         dual_timestamp_get(&s->start_timestamp);
2393 }
2394
2395 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2396         assert(s);
2397
2398         if (s->pid && s->pid != pid)
2399                 zero(*s);
2400
2401         s->pid = pid;
2402         dual_timestamp_get(&s->exit_timestamp);
2403
2404         s->code = code;
2405         s->status = status;
2406
2407         if (context) {
2408                 if (context->utmp_id)
2409                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2410
2411                 exec_context_tty_reset(context);
2412         }
2413 }
2414
2415 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2416         char buf[FORMAT_TIMESTAMP_MAX];
2417
2418         assert(s);
2419         assert(f);
2420
2421         if (!prefix)
2422                 prefix = "";
2423
2424         if (s->pid <= 0)
2425                 return;
2426
2427         fprintf(f,
2428                 "%sPID: "PID_FMT"\n",
2429                 prefix, s->pid);
2430
2431         if (s->start_timestamp.realtime > 0)
2432                 fprintf(f,
2433                         "%sStart Timestamp: %s\n",
2434                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2435
2436         if (s->exit_timestamp.realtime > 0)
2437                 fprintf(f,
2438                         "%sExit Timestamp: %s\n"
2439                         "%sExit Code: %s\n"
2440                         "%sExit Status: %i\n",
2441                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2442                         prefix, sigchld_code_to_string(s->code),
2443                         prefix, s->status);
2444 }
2445
2446 char *exec_command_line(char **argv) {
2447         size_t k;
2448         char *n, *p, **a;
2449         bool first = true;
2450
2451         assert(argv);
2452
2453         k = 1;
2454         STRV_FOREACH(a, argv)
2455                 k += strlen(*a)+3;
2456
2457         if (!(n = new(char, k)))
2458                 return NULL;
2459
2460         p = n;
2461         STRV_FOREACH(a, argv) {
2462
2463                 if (!first)
2464                         *(p++) = ' ';
2465                 else
2466                         first = false;
2467
2468                 if (strpbrk(*a, WHITESPACE)) {
2469                         *(p++) = '\'';
2470                         p = stpcpy(p, *a);
2471                         *(p++) = '\'';
2472                 } else
2473                         p = stpcpy(p, *a);
2474
2475         }
2476
2477         *p = 0;
2478
2479         /* FIXME: this doesn't really handle arguments that have
2480          * spaces and ticks in them */
2481
2482         return n;
2483 }
2484
2485 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2486         _cleanup_free_ char *p2 = NULL;
2487         const char *prefix2;
2488
2489         _cleanup_free_ char *cmd = NULL;
2490
2491         assert(c);
2492         assert(f);
2493
2494         if (!prefix)
2495                 prefix = "";
2496         p2 = strappend(prefix, "\t");
2497         prefix2 = p2 ? p2 : prefix;
2498
2499         cmd = exec_command_line(c->argv);
2500
2501         fprintf(f,
2502                 "%sCommand Line: %s\n",
2503                 prefix, cmd ? cmd : strerror(ENOMEM));
2504
2505         exec_status_dump(&c->exec_status, f, prefix2);
2506 }
2507
2508 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2509         assert(f);
2510
2511         if (!prefix)
2512                 prefix = "";
2513
2514         LIST_FOREACH(command, c, c)
2515                 exec_command_dump(c, f, prefix);
2516 }
2517
2518 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2519         ExecCommand *end;
2520
2521         assert(l);
2522         assert(e);
2523
2524         if (*l) {
2525                 /* It's kind of important, that we keep the order here */
2526                 LIST_FIND_TAIL(command, *l, end);
2527                 LIST_INSERT_AFTER(command, *l, end, e);
2528         } else
2529               *l = e;
2530 }
2531
2532 int exec_command_set(ExecCommand *c, const char *path, ...) {
2533         va_list ap;
2534         char **l, *p;
2535
2536         assert(c);
2537         assert(path);
2538
2539         va_start(ap, path);
2540         l = strv_new_ap(path, ap);
2541         va_end(ap);
2542
2543         if (!l)
2544                 return -ENOMEM;
2545
2546         p = strdup(path);
2547         if (!p) {
2548                 strv_free(l);
2549                 return -ENOMEM;
2550         }
2551
2552         free(c->path);
2553         c->path = p;
2554
2555         strv_free(c->argv);
2556         c->argv = l;
2557
2558         return 0;
2559 }
2560
2561 static int exec_runtime_allocate(ExecRuntime **rt) {
2562
2563         if (*rt)
2564                 return 0;
2565
2566         *rt = new0(ExecRuntime, 1);
2567         if (!*rt)
2568                 return -ENOMEM;
2569
2570         (*rt)->n_ref = 1;
2571         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2572
2573         return 0;
2574 }
2575
2576 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2577         int r;
2578
2579         assert(rt);
2580         assert(c);
2581         assert(id);
2582
2583         if (*rt)
2584                 return 1;
2585
2586         if (!c->private_network && !c->private_tmp)
2587                 return 0;
2588
2589         r = exec_runtime_allocate(rt);
2590         if (r < 0)
2591                 return r;
2592
2593         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2594                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2595                         return -errno;
2596         }
2597
2598         if (c->private_tmp && !(*rt)->tmp_dir) {
2599                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2600                 if (r < 0)
2601                         return r;
2602         }
2603
2604         return 1;
2605 }
2606
2607 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2608         assert(r);
2609         assert(r->n_ref > 0);
2610
2611         r->n_ref++;
2612         return r;
2613 }
2614
2615 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2616
2617         if (!r)
2618                 return NULL;
2619
2620         assert(r->n_ref > 0);
2621
2622         r->n_ref--;
2623         if (r->n_ref <= 0) {
2624                 free(r->tmp_dir);
2625                 free(r->var_tmp_dir);
2626                 safe_close_pair(r->netns_storage_socket);
2627                 free(r);
2628         }
2629
2630         return NULL;
2631 }
2632
2633 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2634         assert(u);
2635         assert(f);
2636         assert(fds);
2637
2638         if (!rt)
2639                 return 0;
2640
2641         if (rt->tmp_dir)
2642                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2643
2644         if (rt->var_tmp_dir)
2645                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2646
2647         if (rt->netns_storage_socket[0] >= 0) {
2648                 int copy;
2649
2650                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2651                 if (copy < 0)
2652                         return copy;
2653
2654                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2655         }
2656
2657         if (rt->netns_storage_socket[1] >= 0) {
2658                 int copy;
2659
2660                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2661                 if (copy < 0)
2662                         return copy;
2663
2664                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2665         }
2666
2667         return 0;
2668 }
2669
2670 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2671         int r;
2672
2673         assert(rt);
2674         assert(key);
2675         assert(value);
2676
2677         if (streq(key, "tmp-dir")) {
2678                 char *copy;
2679
2680                 r = exec_runtime_allocate(rt);
2681                 if (r < 0)
2682                         return r;
2683
2684                 copy = strdup(value);
2685                 if (!copy)
2686                         return log_oom();
2687
2688                 free((*rt)->tmp_dir);
2689                 (*rt)->tmp_dir = copy;
2690
2691         } else if (streq(key, "var-tmp-dir")) {
2692                 char *copy;
2693
2694                 r = exec_runtime_allocate(rt);
2695                 if (r < 0)
2696                         return r;
2697
2698                 copy = strdup(value);
2699                 if (!copy)
2700                         return log_oom();
2701
2702                 free((*rt)->var_tmp_dir);
2703                 (*rt)->var_tmp_dir = copy;
2704
2705         } else if (streq(key, "netns-socket-0")) {
2706                 int fd;
2707
2708                 r = exec_runtime_allocate(rt);
2709                 if (r < 0)
2710                         return r;
2711
2712                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2713                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2714                 else {
2715                         safe_close((*rt)->netns_storage_socket[0]);
2716                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2717                 }
2718         } else if (streq(key, "netns-socket-1")) {
2719                 int fd;
2720
2721                 r = exec_runtime_allocate(rt);
2722                 if (r < 0)
2723                         return r;
2724
2725                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2726                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2727                 else {
2728                         safe_close((*rt)->netns_storage_socket[1]);
2729                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2730                 }
2731         } else
2732                 return 0;
2733
2734         return 1;
2735 }
2736
2737 static void *remove_tmpdir_thread(void *p) {
2738         _cleanup_free_ char *path = p;
2739
2740         rm_rf_dangerous(path, false, true, false);
2741         return NULL;
2742 }
2743
2744 void exec_runtime_destroy(ExecRuntime *rt) {
2745         int r;
2746
2747         if (!rt)
2748                 return;
2749
2750         /* If there are multiple users of this, let's leave the stuff around */
2751         if (rt->n_ref > 1)
2752                 return;
2753
2754         if (rt->tmp_dir) {
2755                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2756
2757                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2758                 if (r < 0) {
2759                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2760                         free(rt->tmp_dir);
2761                 }
2762
2763                 rt->tmp_dir = NULL;
2764         }
2765
2766         if (rt->var_tmp_dir) {
2767                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2768
2769                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2770                 if (r < 0) {
2771                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2772                         free(rt->var_tmp_dir);
2773                 }
2774
2775                 rt->var_tmp_dir = NULL;
2776         }
2777
2778         safe_close_pair(rt->netns_storage_socket);
2779 }
2780
2781 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2782         [EXEC_INPUT_NULL] = "null",
2783         [EXEC_INPUT_TTY] = "tty",
2784         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2785         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2786         [EXEC_INPUT_SOCKET] = "socket"
2787 };
2788
2789 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2790
2791 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2792         [EXEC_OUTPUT_INHERIT] = "inherit",
2793         [EXEC_OUTPUT_NULL] = "null",
2794         [EXEC_OUTPUT_TTY] = "tty",
2795         [EXEC_OUTPUT_SYSLOG] = "syslog",
2796         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2797         [EXEC_OUTPUT_KMSG] = "kmsg",
2798         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2799         [EXEC_OUTPUT_JOURNAL] = "journal",
2800         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2801         [EXEC_OUTPUT_SOCKET] = "socket"
2802 };
2803
2804 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);