kernels. It is recomended to turn this on for most services that do not need special
file systems or extra kernel modules to work. Default to off. Enabling this option
removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
- the unit, and installs a system call filter to block module system calls.
+ the unit, and installs a system call filter to block module system calls,
+ also <filename>/usr/lib/modules</filename> is made inaccessible. For this
+ setting the same restrictions regarding mount propagation and privileges
+ apply as for <varname>ReadOnlyPaths=</varname> and related calls, see above.
Note that limited automatic module loading due to user configuration or kernel
mapping tables might still happen as side effect of requested user operations,
both privileged and unprivileged. To disable module auto-load feature please see
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
context->protect_control_groups)
return true;
if (needs_mount_namespace) {
_cleanup_free_ char **rw = NULL;
char *tmp = NULL, *var = NULL;
+ NameSpaceInfo ns_info = {
+ .private_dev = context->private_devices,
+ .protect_control_groups = context->protect_control_groups,
+ .protect_kernel_tunables = context->protect_kernel_tunables,
+ .protect_kernel_modules = context->protect_kernel_modules,
+ };
/* The runtime struct only contains the parent
* of the private /tmp, which is
r = setup_namespace(
(params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
+ &ns_info,
rw,
context->read_only_paths,
context->inaccessible_paths,
tmp,
var,
- context->private_devices,
- context->protect_kernel_tunables,
- context->protect_control_groups,
context->protect_home,
context->protect_system,
context->mount_flags);
{ "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */
};
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+ { "/lib/modules", INACCESSIBLE, true },
+#endif
+ { "/usr/lib/modules", INACCESSIBLE, true },
+};
+
/*
* ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
* system should be protected by ProtectSystem=
ELEMENTSOF(protect_kernel_tunables_table));
}
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+ assert(p);
+
+ return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+ ELEMENTSOF(protect_kernel_modules_table));
+}
+
static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
int r = 0;
}
static unsigned namespace_calculate_mounts(
+ const NameSpaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
- bool private_dev,
- bool protect_sysctl,
- bool protect_cgroups,
ProtectHome protect_home,
ProtectSystem protect_system) {
strv_length(read_write_paths) +
strv_length(read_only_paths) +
strv_length(inaccessible_paths) +
- private_dev +
- (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
- (protect_cgroups ? 1 : 0) +
+ ns_info->private_dev +
+ (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+ (ns_info->protect_control_groups ? 1 : 0) +
+ (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
protect_home_cnt + protect_system_cnt;
}
int setup_namespace(
const char* root_directory,
+ const NameSpaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
- bool private_dev,
- bool protect_sysctl,
- bool protect_cgroups,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags) {
if (mount_flags == 0)
mount_flags = MS_SHARED;
- n = namespace_calculate_mounts(read_write_paths,
+ n = namespace_calculate_mounts(ns_info,
+ read_write_paths,
read_only_paths,
inaccessible_paths,
tmp_dir, var_tmp_dir,
- private_dev, protect_sysctl,
- protect_cgroups, protect_home,
- protect_system);
+ protect_home, protect_system);
/* Set mount slave mode */
if (root_directory || n > 0)
m++;
}
- if (private_dev) {
+ if (ns_info->private_dev) {
m->path = prefix_roota(root_directory, "/dev");
m->mode = PRIVATE_DEV;
m++;
}
- if (protect_sysctl)
- append_protect_kernel_tunables(&m, root_directory);
+ if (ns_info->protect_kernel_tunables) {
+ r = append_protect_kernel_tunables(&m, root_directory);
+ if (r < 0)
+ return r;
+ }
+
+ if (ns_info->protect_kernel_modules) {
+ r = append_protect_kernel_modules(&m, root_directory);
+ if (r < 0)
+ return r;
+ }
- if (protect_cgroups) {
+ if (ns_info->protect_control_groups) {
m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
m->mode = READONLY;
m++;
This file is part of systemd.
Copyright 2010 Lennart Poettering
+ Copyright 2016 Djalal Harouni
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
+typedef struct NameSpaceInfo NameSpaceInfo;
+
#include <stdbool.h>
#include "macro.h"
_PROTECT_SYSTEM_INVALID = -1
} ProtectSystem;
+struct NameSpaceInfo {
+ bool private_dev:1;
+ bool protect_control_groups:1;
+ bool protect_kernel_tunables:1;
+ bool protect_kernel_modules:1;
+};
+
int setup_namespace(const char *chroot,
+ const NameSpaceInfo *ns_info,
char **read_write_paths,
char **read_only_paths,
char **inaccessible_paths,
const char *tmp_dir,
const char *var_tmp_dir,
- bool private_dev,
- bool protect_sysctl,
- bool protect_cgroups,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags);
"/home/lennart/projects",
NULL
};
+
+ static const NameSpaceInfo ns_info = {
+ .private_dev = true,
+ .protect_control_groups = true,
+ .protect_kernel_tunables = true,
+ .protect_kernel_modules = true,
+ };
+
char *root_directory;
char *projects_directory;
int r;
log_info("Not chrooted");
r = setup_namespace(root_directory,
+ &ns_info,
(char **) writable,
(char **) readonly,
(char **) inaccessible,
tmp_dir,
var_tmp_dir,
- true,
- true,
- true,
PROTECT_HOME_NO,
PROTECT_SYSTEM_NO,
0);