bpf-firewall: custom BPF programs through IP(Ingress|Egress)FilterPath=
authorKai Lüke <kailueke@riseup.net>
Tue, 23 Apr 2019 10:14:20 +0000 (12:14 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 25 Jun 2019 07:56:16 +0000 (09:56 +0200)
Takes a single /sys/fs/bpf/pinned_prog string as argument, but may be
specified multiple times. An empty assignment resets all previous filters.

Closes https://github.com/systemd/systemd/issues/10227

16 files changed:
man/systemd.resource-control.xml
src/analyze/analyze-security.c
src/core/bpf-firewall.c
src/core/bpf-firewall.h
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/core/unit.c
src/core/unit.h
src/shared/bpf-program.c
src/shared/bpf-program.h
src/shared/bus-unit-util.c
src/test/test-bpf.c

index 95209a8..e7b5dfb 100644 (file)
       </varlistentry>
 
       <varlistentry>
+        <term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
+        <term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
+
+        <listitem>
+          <para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets
+          sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets.
+          Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>).
+          </para>
+
+          <para>The filters configured with this option are applied to all sockets created by processes
+          of this unit (or in the case of socket units, associated with it). The filters are loaded in addition
+          to filters any of the parent slice units this unit might be a member of as well as any
+          <varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units.
+          By default there are no filters specified.</para>
+
+          <para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
+          empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>
+
+          <para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
+          all sockets associated with it directly, but not to any sockets created by the ultimately activated services
+          for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
+          the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both
+          the socket and the service unit, however it often makes sense to maintain one configuration more open and the other
+          one more restricted, depending on the usecase.</para>
+
+          <para>Note that these settings might not be supported on some systems (for example if eBPF control group
+          support is not enabled in the underlying kernel or container manager). These settings will fail the service in
+          that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually
+          (requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
         <term><varname>DeviceAllow=</varname></term>
 
         <listitem>
index 0962950..3cf6515 100644 (file)
@@ -45,6 +45,9 @@ struct security_info {
         bool ip_address_allow_localhost;
         bool ip_address_allow_other;
 
+        bool ip_filters_custom_ingress;
+        bool ip_filters_custom_egress;
+
         char *keyring_mode;
         bool lock_personality;
         bool memory_deny_write_execute;
@@ -590,7 +593,10 @@ static int assess_ip_address_allow(
         assert(ret_badness);
         assert(ret_description);
 
-        if (!info->ip_address_deny_all) {
+        if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
+                d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
+                b = 0;
+        } else if (!info->ip_address_deny_all) {
                 d = strdup("Service does not define an IP address whitelist");
                 b = 10;
         } else if (info->ip_address_allow_other) {
@@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow(
         return sd_bus_message_exit_container(m);
 }
 
+static int property_read_ip_filters(
+                sd_bus *bus,
+                const char *member,
+                sd_bus_message *m,
+                sd_bus_error *error,
+                void *userdata) {
+
+        struct security_info *info = userdata;
+        _cleanup_(strv_freep) char **l = NULL;
+        int r;
+
+        assert(bus);
+        assert(member);
+        assert(m);
+
+        r = sd_bus_message_read_strv(m, &l);
+        if (r < 0)
+                return r;
+
+        if (streq(member, "IPIngressFilterPath"))
+                info->ip_filters_custom_ingress = !strv_isempty(l);
+        else if (streq(member, "IPEgressFilterPath"))
+                info->ip_filters_custom_ingress = !strv_isempty(l);
+
+        return 0;
+}
+
 static int property_read_device_allow(
                 sd_bus *bus,
                 const char *member,
@@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
                 { "FragmentPath",            "s",       NULL,                                    offsetof(struct security_info, fragment_path)             },
                 { "IPAddressAllow",          "a(iayu)", property_read_ip_address_allow,          0                                                         },
                 { "IPAddressDeny",           "a(iayu)", property_read_ip_address_allow,          0                                                         },
+                { "IPIngressFilterPath",     "as",      property_read_ip_filters,                0                                                         },
+                { "IPEgressFilterPath",      "as",      property_read_ip_filters,                0                                                         },
                 { "Id",                      "s",       NULL,                                    offsetof(struct security_info, id)                        },
                 { "KeyringMode",             "s",       NULL,                                    offsetof(struct security_info, keyring_mode)              },
                 { "LoadState",               "s",       NULL,                                    offsetof(struct security_info, load_state)                },
index 8163db2..7a8b848 100644 (file)
@@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) {
         return 0;
 }
 
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
+
+static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
+        char **bpf_fs_path;
+
+        set_clear(*set);
+
+        STRV_FOREACH(bpf_fs_path, filter_paths) {
+                _cleanup_free_ BPFProgram *prog = NULL;
+                int r;
+
+                r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");
+
+                r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
+
+                r = set_ensure_allocated(set, &filter_prog_hash_ops);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
+
+                r = set_put(*set, prog);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+                TAKE_PTR(prog);
+        }
+
+        return 0;
+}
+
+int bpf_firewall_load_custom(Unit *u) {
+        CGroupContext *cc;
+        int r, supported;
+
+        assert(u);
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return 0;
+
+        if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
+                return 0;
+
+        supported = bpf_firewall_supported();
+        if (supported < 0)
+                return supported;
+
+        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
+
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+        if (r < 0)
+                return r;
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
+        BPFProgram *prog;
+        Iterator i;
+        int r;
+
+        assert(u);
+
+        set_clear(*set_installed);
+
+        SET_FOREACH(prog, *set, i) {
+                r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
+                /* Remember that these BPF programs are installed now. */
+                r = set_ensure_allocated(set_installed, &filter_prog_hash_ops);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
+
+                r = set_put(*set_installed, prog);
+                if (r < 0)
+                        return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+                bpf_program_ref(prog);
+        }
+
+        return 0;
+}
+
 int bpf_firewall_install(Unit *u) {
         _cleanup_free_ char *path = NULL;
         CGroupContext *cc;
@@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) {
                 log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
                 return -EOPNOTSUPP;
         }
+        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+            (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
 
         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
         if (r < 0)
@@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) {
         u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
 
         if (u->ip_bpf_egress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
+                                              flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
 
@@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) {
         }
 
         if (u->ip_bpf_ingress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
+                                              flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
 
                 u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
         }
 
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+        if (r < 0)
+                return r;
+
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+        if (r < 0)
+                return r;
+
         return 0;
 }
 
index 10cafcc..f1460d9 100644 (file)
@@ -15,6 +15,7 @@ int bpf_firewall_supported(void);
 
 int bpf_firewall_compile(Unit *u);
 int bpf_firewall_install(Unit *u);
+int bpf_firewall_load_custom(Unit *u);
 
 int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
 int bpf_firewall_reset_accounting(int map_fd);
index 1ed5723..0428f62 100644 (file)
@@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) {
 
         c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
         c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
+
+        c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
+        c->ip_filters_egress = strv_free(c->ip_filters_egress);
 }
 
 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
         CGroupBlockIODeviceWeight *w;
         CGroupDeviceAllow *a;
         IPAddressAccessItem *iaai;
+        char **path;
         char u[FORMAT_TIMESPAN_MAX];
         char v[FORMAT_TIMESPAN_MAX];
 
@@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 (void) in_addr_to_string(iaai->family, &iaai->address, &k);
                 fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
         }
+
+        STRV_FOREACH(path, c->ip_filters_ingress)
+                fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);
+
+        STRV_FOREACH(path, c->ip_filters_egress)
+                fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
 }
 
 int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
@@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) {
         if (bpf_firewall_compile(u) < 0)
                 return;
 
+        (void) bpf_firewall_load_custom(u);
         (void) bpf_firewall_install(u);
 }
 
@@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
 
         if (c->ip_accounting ||
             c->ip_address_allow ||
-            c->ip_address_deny)
+            c->ip_address_deny ||
+            c->ip_filters_ingress ||
+            c->ip_filters_egress)
                 return true;
 
         /* If any parent slice has an IP access list defined, it applies too */
@@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
         if (set_isempty(pids))
                 return 0;
 
+        /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+         * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+        r = bpf_firewall_load_custom(u);
+        if (r < 0)
+                return r;
+
         r = unit_realize_cgroup(u);
         if (r < 0)
                 return r;
index fe347ea..d1537c5 100644 (file)
@@ -114,6 +114,9 @@ struct CGroupContext {
         LIST_HEAD(IPAddressAccessItem, ip_address_allow);
         LIST_HEAD(IPAddressAccessItem, ip_address_deny);
 
+        char **ip_filters_ingress;
+        char **ip_filters_egress;
+
         /* For legacy hierarchies */
         uint64_t cpu_shares;
         uint64_t startup_cpu_shares;
index 9f4fd06..f70e6c8 100644 (file)
@@ -362,6 +362,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
         SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
         SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+        SD_BUS_PROPERTY("IPIngressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_ingress), 0),
+        SD_BUS_PROPERTY("IPEgressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_egress), 0),
         SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
         SD_BUS_VTABLE_END
 };
@@ -462,6 +464,80 @@ static int bus_cgroup_set_transient_property(
                 }
 
                 return 1;
+        } else if (STR_IN_SET(name, "IPIngressFilterPath", "IPEgressFilterPath")) {
+                char ***filters;
+                size_t n = 0;
+
+                filters = streq(name, "IPIngressFilterPath") ? &c->ip_filters_ingress : &c->ip_filters_egress;
+                r = sd_bus_message_enter_container(message, 'a', "s");
+                if (r < 0)
+                        return r;
+
+                for (;;) {
+                        const char *path;
+
+                        r = sd_bus_message_read(message, "s", &path);
+                        if (r < 0)
+                                return r;
+                        if (r == 0)
+                                break;
+
+                        if (!path_is_normalized(path) || !path_is_absolute(path))
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects a normalized absolute path.", name);
+
+                        if (!UNIT_WRITE_FLAGS_NOOP(flags) && !strv_contains(*filters, path)) {
+                                r = strv_extend(filters, path);
+                                if (r < 0)
+                                        return log_oom();
+                        }
+                        n++;
+                }
+                r = sd_bus_message_exit_container(message);
+                if (r < 0)
+                        return r;
+
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        _cleanup_free_ char *buf = NULL;
+                        _cleanup_fclose_ FILE *f = NULL;
+                        char **entry;
+                        size_t size = 0;
+
+                        if (n == 0)
+                                *filters = strv_free(*filters);
+
+                        unit_invalidate_cgroup_bpf(u);
+                        f = open_memstream_unlocked(&buf, &size);
+                        if (!f)
+                                return -ENOMEM;
+
+                        fputs(name, f);
+                        fputs("=\n", f);
+
+                        STRV_FOREACH(entry, *filters)
+                                fprintf(f, "%s=%s\n", name, *entry);
+
+                        r = fflush_and_check(f);
+                        if (r < 0)
+                                return r;
+
+                        unit_write_setting(u, flags, name, buf);
+
+                        if (*filters) {
+                                r = bpf_firewall_supported();
+                                if (r < 0)
+                                        return r;
+                                if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+                                        static bool warned = false;
+
+                                        log_full(warned ? LOG_DEBUG : LOG_WARNING,
+                                                 "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with mulitiple filters.\n"
+                                                 "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
+                                        warned = true;
+                                }
+                        }
+                }
+
+                return 1;
         }
 
         return 0;
index 5e6fb64..f7906b3 100644 (file)
@@ -205,6 +205,8 @@ $1.DisableControllers,           config_parse_disable_controllers,   0,
 $1.IPAccounting,                 config_parse_bool,                  0,                             offsetof($1, cgroup_context.ip_accounting)
 $1.IPAddressAllow,               config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_allow)
 $1.IPAddressDeny,                config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_deny)
+$1.IPIngressFilterPath,          config_parse_ip_filter_bpf_progs,   0,                             offsetof($1, cgroup_context.ip_filters_ingress)
+$1.IPEgressFilterPath,           config_parse_ip_filter_bpf_progs,   0,                             offsetof($1, cgroup_context.ip_filters_egress)
 $1.NetClass,                     config_parse_warn_compat,           DISABLED_LEGACY,               0'
 )m4_dnl
 Unit.Description,                config_parse_unit_string_printf,    0,                             offsetof(Unit, description)
index 274d9d2..ba41f8e 100644 (file)
@@ -18,6 +18,7 @@
 #include "af-list.h"
 #include "alloc-util.h"
 #include "all-units.h"
+#include "bpf-firewall.h"
 #include "bus-error.h"
 #include "bus-internal.h"
 #include "bus-util.h"
@@ -4456,6 +4457,66 @@ int config_parse_disable_controllers(
         return 0;
 }
 
+int config_parse_ip_filter_bpf_progs(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_free_ char *resolved = NULL;
+        Unit *u = userdata;
+        char ***paths = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(paths);
+
+        if (isempty(rvalue)) {
+                *paths = strv_free(*paths);
+                return 0;
+        }
+
+        r = unit_full_printf(u, rvalue, &resolved);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+                return 0;
+        }
+
+        r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+        if (r < 0)
+                return 0;
+
+        if (strv_contains(*paths, resolved))
+                return 0;
+
+        r = strv_extend(paths, resolved);
+        if (r < 0)
+                return log_oom();
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+                static bool warned = false;
+
+                log_full(warned ? LOG_DEBUG : LOG_WARNING,
+                         "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n"
+                         "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue);
+
+                warned = true;
+        }
+
+        return 0;
+}
+
 #define FOLLOW_MAX 8
 
 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
index ddcc8d2..8d5f701 100644 (file)
@@ -110,6 +110,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
 CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_filter_bpf_progs);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
index 4d777b4..463db73 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "all-units.h"
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-common-errors.h"
 #include "bus-util.h"
 #include "cgroup-util.h"
@@ -682,6 +683,11 @@ void unit_free(Unit *u) {
         bpf_program_unref(u->ip_bpf_egress);
         bpf_program_unref(u->ip_bpf_egress_installed);
 
+        set_free(u->ip_bpf_custom_ingress);
+        set_free(u->ip_bpf_custom_egress);
+        set_free(u->ip_bpf_custom_ingress_installed);
+        set_free(u->ip_bpf_custom_egress_installed);
+
         bpf_program_unref(u->bpf_device_control_installed);
 
         condition_free_list(u->conditions);
@@ -5500,6 +5506,12 @@ int unit_prepare_exec(Unit *u) {
 
         assert(u);
 
+        /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+         * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+        r = bpf_firewall_load_custom(u);
+        if (r < 0)
+                return r;
+
         /* Prepares everything so that we can fork of a process for this unit */
 
         (void) unit_realize_cgroup(u);
index 007c4ae..ef495f8 100644 (file)
@@ -10,6 +10,7 @@
 #include "emergency-action.h"
 #include "install.h"
 #include "list.h"
+#include "set.h"
 #include "unit-name.h"
 #include "cgroup.h"
 
@@ -281,6 +282,10 @@ typedef struct Unit {
 
         BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
         BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+        Set *ip_bpf_custom_ingress;
+        Set *ip_bpf_custom_ingress_installed;
+        Set *ip_bpf_custom_egress;
+        Set *ip_bpf_custom_egress_installed;
 
         uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
 
index 40bc964..93f8db3 100644 (file)
@@ -94,6 +94,25 @@ int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
         return 0;
 }
 
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
+        union bpf_attr attr;
+
+        assert(p);
+
+        if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
+                return -EBUSY;
+
+        attr = (union bpf_attr) {
+                .pathname = PTR_TO_UINT64(path),
+        };
+
+        p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+        if (p->kernel_fd < 0)
+                return -errno;
+
+        return 0;
+}
+
 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
         _cleanup_free_ char *copy = NULL;
         _cleanup_close_ int fd = -1;
index c21eb2f..a21589e 100644 (file)
@@ -31,6 +31,7 @@ BPFProgram *bpf_program_ref(BPFProgram *p);
 
 int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
 int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
 
 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
 int bpf_program_cgroup_detach(BPFProgram *p);
index bb30e8f..2ea25d8 100644 (file)
@@ -758,6 +758,18 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
                 return 1;
         }
 
+        if (STR_IN_SET(field, "IPIngressFilterPath", "IPEgressFilterPath")) {
+                if (isempty(eq))
+                        r = sd_bus_message_append(m, "(sv)", field, "as", 0);
+                else
+                        r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq);
+
+                if (r < 0)
+                        return bus_log_create_error(r);
+
+                return 1;
+        }
+
         return 0;
 }
 
index 90ab15c..6a75221 100644 (file)
@@ -9,6 +9,7 @@
 #include "bpf-program.h"
 #include "load-fragment.h"
 #include "manager.h"
+#include "missing.h"
 #include "rm-rf.h"
 #include "service.h"
 #include "test-helper.h"
@@ -42,7 +43,7 @@ static bool can_memlock(void) {
 
 int main(int argc, char *argv[]) {
         struct bpf_insn exit_insn[] = {
-                BPF_MOV64_IMM(BPF_REG_0, 1),
+                BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */
                 BPF_EXIT_INSN()
         };
 
@@ -54,6 +55,9 @@ int main(int argc, char *argv[]) {
         char log_buf[65535];
         struct rlimit rl;
         int r;
+        union bpf_attr attr;
+        bool test_custom_filter = false;
+        const char *test_prog = "/sys/fs/bpf/test-dropper";
 
         test_setup_logging(LOG_DEBUG);
 
@@ -88,14 +92,31 @@ int main(int argc, char *argv[]) {
                 return log_tests_skipped("BPF firewalling not supported");
         assert_se(r > 0);
 
-        if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+        if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
                 log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!");
-        else
+                test_custom_filter = true;
+        } else
                 log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good.");
 
         r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
         assert(r >= 0);
 
+        if (test_custom_filter) {
+                attr = (union bpf_attr) {
+                        .pathname = PTR_TO_UINT64(test_prog),
+                        .bpf_fd = p->kernel_fd,
+                        .file_flags = 0,
+                };
+
+                (void) unlink(test_prog);
+
+                r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+                if (r < 0) {
+                        log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m");
+                        test_custom_filter = false;
+                }
+        }
+
         p = bpf_program_unref(p);
 
         /* The simple tests succeeded. Now let's try full unit-based use-case. */
@@ -175,5 +196,31 @@ int main(int argc, char *argv[]) {
         assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
                   SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
 
+        if (test_custom_filter) {
+                assert_se(u = unit_new(m, sizeof(Service)));
+                assert_se(unit_add_name(u, "custom-filter.service") == 0);
+                assert_se(cc = unit_get_cgroup_context(u));
+                u->perpetual = true;
+
+                cc->ip_accounting = true;
+
+                assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0);
+                assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+                SERVICE(u)->type = SERVICE_ONESHOT;
+                u->load_state = UNIT_LOADED;
+
+                assert_se(unit_start(u) >= 0);
+
+                while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+                        assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+                assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED ||
+                          SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS);
+
+                (void) unlink(test_prog);
+                assert_se(SERVICE(u)->state == SERVICE_DEAD);
+        }
+
         return 0;
 }