From 8cfa775f4f116c5f56a140da268ea7b6072534e6 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 11 Nov 2017 21:35:49 +0900 Subject: [PATCH] core: add support to specify errno in SystemCallFilter= This makes each system call in SystemCallFilter= blacklist optionally takes errno name or number after a colon. The errno takes precedence over the one given by SystemCallErrorNumber=. C.f. #7173. Closes #7169. --- man/systemd.exec.xml | 7 ++++++- src/core/dbus-execute.c | 48 ++++++++++++++++++++++++++++++++++++----------- src/core/execute.c | 18 ++++++++++++++---- src/core/execute.h | 2 +- src/core/load-fragment.c | 26 ++++++++++++++++--------- src/shared/seccomp-util.c | 22 ++++++++++++++-------- src/shared/seccomp-util.h | 4 ++-- 7 files changed, 91 insertions(+), 36 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 6843c20..0aaccb9 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1438,7 +1438,12 @@ CapabilityBoundingSet=~CAP_B CAP_C executed by the unit processes except for the listed ones will result in immediate process termination with the SIGSYS signal (whitelisting). If the first character of the list is ~, the effect is inverted: only the listed system calls will result in immediate process termination - (blacklisting). If running in user mode, or in system mode, but without the CAP_SYS_ADMIN + (blacklisting). Blacklisted system calls and system call groups may optionally be suffixed with a colon + (:) and errno error number (between 0 and 4095) or errno name such as + EPERM, EACCES or EUCLEAN. This value will be + returned when a blacklisted system call is triggered, instead of terminating the processes immediately. + This value takes precedence over the one given in SystemCallErrorNumber=. + If running in user mode, or in system mode, but without the CAP_SYS_ADMIN capability (e.g. setting User=nobody), NoNewPrivileges=yes is implied. This feature makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for enforcing a minimal sandboxing environment. Note that the execve, diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 4dbd6b0..e480bac 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -378,7 +378,7 @@ static int property_get_syscall_filter( #if HAVE_SECCOMP Iterator i; - void *id; + void *id, *val; #endif assert(bus); @@ -394,14 +394,33 @@ static int property_get_syscall_filter( return r; #if HAVE_SECCOMP - SET_FOREACH(id, c->syscall_filter, i) { - char *name; + HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, i) { + _cleanup_free_ char *name = NULL; + const char *e = NULL; + char *s; + int num = PTR_TO_INT(val); name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1); if (!name) continue; - r = strv_consume(&l, name); + if (num >= 0) { + e = errno_to_name(num); + if (e) { + s = strjoin(name, ":", e); + if (!s) + return -ENOMEM; + } else { + r = asprintf(&s, "%s:%d", name, num); + if (r < 0) + return -ENOMEM; + } + } else { + s = name; + name = NULL; + } + + r = strv_consume(&l, s); if (r < 0) return r; } @@ -1210,22 +1229,29 @@ int bus_exec_context_set_transient_property( if (strv_length(l) == 0) { c->syscall_whitelist = false; - c->syscall_filter = set_free(c->syscall_filter); + c->syscall_filter = hashmap_free(c->syscall_filter); } else { char **s; c->syscall_whitelist = whitelist; - r = set_ensure_allocated(&c->syscall_filter, NULL); + r = hashmap_ensure_allocated(&c->syscall_filter, NULL); if (r < 0) return r; STRV_FOREACH(s, l) { - if (**s == '@') { + _cleanup_free_ char *n = NULL; + int e; + + r = parse_syscall_and_errno(*s, &n, &e); + if (r < 0) + return r; + + if (*n == '@') { const SyscallFilterSet *set; const char *i; - set = syscall_filter_set_find(*s); + set = syscall_filter_set_find(n); if (!set) return -EINVAL; @@ -1236,7 +1262,7 @@ int bus_exec_context_set_transient_property( if (id == __NR_SCMP_ERROR) return -EINVAL; - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + r = hashmap_put(c->syscall_filter, INT_TO_PTR(id + 1), INT_TO_PTR(e)); if (r < 0) return r; } @@ -1244,11 +1270,11 @@ int bus_exec_context_set_transient_property( } else { int id; - id = seccomp_syscall_resolve_name(*s); + id = seccomp_syscall_resolve_name(n); if (id == __NR_SCMP_ERROR) return -EINVAL; - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + r = hashmap_put(c->syscall_filter, INT_TO_PTR(id + 1), INT_TO_PTR(e)); if (r < 0) return r; } diff --git a/src/core/execute.c b/src/core/execute.c index 5187ad2..fdd57cd 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1290,7 +1290,7 @@ static bool context_has_syscall_filters(const ExecContext *c) { assert(c); return c->syscall_whitelist || - !set_isempty(c->syscall_filter); + !hashmap_isempty(c->syscall_filter); } static bool context_has_no_new_privileges(const ExecContext *c) { @@ -3528,7 +3528,7 @@ void exec_context_done(ExecContext *c) { c->apparmor_profile = mfree(c->apparmor_profile); c->smack_process_label = mfree(c->smack_process_label); - c->syscall_filter = set_free(c->syscall_filter); + c->syscall_filter = hashmap_free(c->syscall_filter); c->syscall_archs = set_free(c->syscall_archs); c->address_families = set_free(c->address_families); @@ -4065,7 +4065,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->syscall_filter) { #if HAVE_SECCOMP Iterator j; - void *id; + void *id, *val; bool first = true; #endif @@ -4077,8 +4077,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { fputc('~', f); #if HAVE_SECCOMP - SET_FOREACH(id, c->syscall_filter, j) { + HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) { _cleanup_free_ char *name = NULL; + const char *errno_name = NULL; + int num = PTR_TO_INT(val); if (first) first = false; @@ -4087,6 +4089,14 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1); fputs(strna(name), f); + + if (num >= 0) { + errno_name = errno_to_name(num); + if (errno_name) + fprintf(f, ":%s", errno_name); + else + fprintf(f, ":%d", num); + } } #endif diff --git a/src/core/execute.h b/src/core/execute.h index 4d41990..23abdd4 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -242,7 +242,7 @@ struct ExecContext { unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */ - Set *syscall_filter; + Hashmap *syscall_filter; Set *syscall_archs; int syscall_errno; bool syscall_whitelist:1; diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index a4e39a8..33f8ca9 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2632,7 +2632,8 @@ static int syscall_filter_parse_one( ExecContext *c, bool invert, const char *t, - bool warn) { + bool warn, + int errno_num) { int r; if (t[0] == '@') { @@ -2647,7 +2648,7 @@ static int syscall_filter_parse_one( } NULSTR_FOREACH(i, set->value) { - r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false); + r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false, errno_num); if (r < 0) return r; } @@ -2665,13 +2666,13 @@ static int syscall_filter_parse_one( * we want to allow it, then remove it from the list */ if (!invert == c->syscall_whitelist) { - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + r = hashmap_put(c->syscall_filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)); if (r == 0) return 0; if (r < 0) return log_oom(); } else - (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + (void) hashmap_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } return 0; @@ -2702,7 +2703,7 @@ int config_parse_syscall_filter( if (isempty(rvalue)) { /* Empty assignment resets the list */ - c->syscall_filter = set_free(c->syscall_filter); + c->syscall_filter = hashmap_free(c->syscall_filter); c->syscall_whitelist = false; return 0; } @@ -2713,7 +2714,7 @@ int config_parse_syscall_filter( } if (!c->syscall_filter) { - c->syscall_filter = set_new(NULL); + c->syscall_filter = hashmap_new(NULL); if (!c->syscall_filter) return log_oom(); @@ -2725,7 +2726,7 @@ int config_parse_syscall_filter( c->syscall_whitelist = true; /* Accept default syscalls if we are on a whitelist */ - r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false); + r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false, -1); if (r < 0) return r; } @@ -2733,7 +2734,8 @@ int config_parse_syscall_filter( p = rvalue; for (;;) { - _cleanup_free_ char *word = NULL; + _cleanup_free_ char *word = NULL, *name = NULL; + int num; r = extract_first_word(&p, &word, NULL, 0); if (r == 0) @@ -2745,7 +2747,13 @@ int config_parse_syscall_filter( break; } - r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true); + r = parse_syscall_and_errno(word, &name, &num); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall:errno, ignoring: %s", word); + continue; + } + + r = syscall_filter_parse_one(unit, filename, line, c, invert, name, true, num); if (r < 0) return r; } diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index f877016..d60ac91 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -900,20 +900,20 @@ int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilter return 0; } -int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Set* set, uint32_t action) { +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action) { uint32_t arch; int r; /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a * SyscallFilterSet* table. */ - if (set_isempty(set) && default_action == SCMP_ACT_ALLOW) + if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW) return 0; SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; Iterator i; - void *id; + void *id, *val; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); @@ -921,8 +921,14 @@ int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Set* set, uint3 if (r < 0) return r; - SET_FOREACH(id, set, i) { - r = seccomp_rule_add_exact(seccomp, action, PTR_TO_INT(id) - 1, 0); + HASHMAP_FOREACH_KEY(val, id, set, i) { + uint32_t a = action; + int e = PTR_TO_INT(val); + + if (action != SCMP_ACT_ALLOW && e >= 0) + a = SCMP_ACT_ERRNO(e); + + r = seccomp_rule_add_exact(seccomp, a, PTR_TO_INT(id) - 1, 0); if (r < 0) { /* If the system call is not known on this architecture, then that's fine, let's ignore it */ _cleanup_free_ char *n = NULL; @@ -1515,7 +1521,7 @@ int parse_syscall_archs(char **l, Set **archs) { return 0; } -int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) { +int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) { const char *i; int r; @@ -1543,11 +1549,11 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) { } if (add) { - r = set_put(filter, INT_TO_PTR(id + 1)); + r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1)); if (r < 0) return r; } else - (void) set_remove(filter, INT_TO_PTR(id + 1)); + (void) hashmap_remove(filter, INT_TO_PTR(id + 1)); } } diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 6dfa465..a3c360c 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -73,12 +73,12 @@ extern const SyscallFilterSet syscall_filter_sets[]; const SyscallFilterSet *syscall_filter_set_find(const char *name); -int seccomp_filter_set_add(Set *s, bool b, const SyscallFilterSet *set); +int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set); int seccomp_add_syscall_filter_item(scmp_filter_ctx *ctx, const char *name, uint32_t action, char **exclude); int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action); -int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Set* set, uint32_t action); +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action); int seccomp_restrict_archs(Set *archs); int seccomp_restrict_namespaces(unsigned long retain); -- 2.7.4