From cd2dfc6faea2a1e2370ae52460247bb6c8c553b1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 28 Jun 2017 19:22:46 +0200 Subject: [PATCH] nspawn: register a scope for the unit if --register=no is specified (#6166) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, only when --register=yes was set (the default) the invoked container would get its own scope, created by machined on behalf of nspawn. With this change if --register=no is set nspawn will still get its own scope (which is a good thing, so that --slice= and --property= take effect), but this is not done through machined but by registering a scope unit directly in PID 1. Summary: --register=yes → allocate a new scope through machined (the default) --register=yes --keep-unit → use the unit we are already running in an register with machined --register=no → allocate a new scope directly, but no machined --register=no --keep-unit → do not allocate nor register anything Fixes: #5823 --- man/systemd-nspawn.xml | 45 ++++----- src/nspawn/nspawn-register.c | 222 ++++++++++++++++++++++++++++++++----------- src/nspawn/nspawn-register.h | 2 + src/nspawn/nspawn.c | 18 +++- 4 files changed, 202 insertions(+), 85 deletions(-) diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index ae70827..7014443 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -398,24 +398,19 @@ - Make the container part of the specified - slice, instead of the default - machine.slice. This is only applies if - the machine is run in its own scope unit, i.e. if - is not used. + Make the container part of the specified slice, instead of the default + machine.slice. This applies only if the machine is run in its own scope unit, i.e. if + isn't used. - Set a unit property on the scope unit to - register for the machine. This only applies if the machine is - run in its own scope unit, i.e. if - is not used. Takes unit property - assignments in the same format as systemctl - set-property. This is useful to set memory limits - and similar for machines. + Set a unit property on the scope unit to register for the machine. This applies only if the + machine is run in its own scope unit, i.e. if isn't used. Takes unit property + assignments in the same format as systemctl set-property. This is useful to set memory + limits and similar for container. @@ -888,18 +883,16 @@ - Controls whether the container is registered - with - systemd-machined8. - Takes a boolean argument, which defaults to yes. - This option should be enabled when the container runs a full - Operating System (more specifically: an init system), and is - useful to ensure that the container is accessible via - machinectl1 - and shown by tools such as - ps1. - If the container does not run an init system, it is - recommended to set this option to no. + Controls whether the container is registered with + systemd-machined8. Takes a + boolean argument, which defaults to yes. This option should be enabled when the container + runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to + ensure that the container is accessible via + machinectl1 and shown by + tools such as ps1. If the container + does not run a service manager, it is recommended to set this option to + no. @@ -916,7 +909,9 @@ service unit, and the service unit's sole purpose is to run a single systemd-nspawn container. This option is not available if run from a user - session. + session. + Note that passing disables the effect of and + . diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index e3ab39f..ffdf123 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -27,6 +27,77 @@ #include "strv.h" #include "util.h" +static int append_machine_properties( + sd_bus_message *m, + CustomMount *mounts, + unsigned n_mounts, + int kill_signal, + char **properties) { + + unsigned j; + int r; + + assert(m); + + r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed"); + if (r < 0) + return bus_log_create_error(r); + + /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in + * sync regardless if we are run with or without the --keep-unit switch. */ + r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2, + /* Allow the container to + * access and create the API + * device nodes, so that + * PrivateDevices= in the + * container can work + * fine */ + "/dev/net/tun", "rwm", + /* Allow the container + * access to ptys. However, + * do not permit the + * container to ever create + * these device nodes. */ + "char-pts", "rw"); + if (r < 0) + return bus_log_create_error(r); + + for (j = 0; j < n_mounts; j++) { + CustomMount *cm = mounts + j; + + if (cm->type != CUSTOM_MOUNT_BIND) + continue; + + r = is_device_node(cm->source); + if (r == -ENOENT) { + /* The bind source might only appear as the image is put together, hence don't complain */ + log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source); + continue; + } + if (r < 0) + return log_error_errno(r, "Failed to stat %s: %m", cm->source); + + if (r) { + r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1, + cm->source, cm->read_only ? "r" : "rw"); + if (r < 0) + return log_error_errno(r, "Failed to append message arguments: %m"); + } + } + + if (kill_signal != 0) { + r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed"); + if (r < 0) + return bus_log_create_error(r); + } + + return 0; +} + int register_machine( const char *machine_name, pid_t pid, @@ -68,7 +139,6 @@ int register_machine( local_ifindex > 0 ? 1 : 0, local_ifindex); } else { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; - unsigned j; r = sd_bus_message_new_method_call( bus, @@ -103,63 +173,14 @@ int register_machine( return bus_log_create_error(r); } - r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed"); - if (r < 0) - return bus_log_create_error(r); - - /* If you make changes here, also make sure to update - * systemd-nspawn@.service, to keep the device - * policies in sync regardless if we are run with or - * without the --keep-unit switch. */ - r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2, - /* Allow the container to - * access and create the API - * device nodes, so that - * PrivateDevices= in the - * container can work - * fine */ - "/dev/net/tun", "rwm", - /* Allow the container - * access to ptys. However, - * do not permit the - * container to ever create - * these device nodes. */ - "char-pts", "rw"); + r = append_machine_properties( + m, + mounts, + n_mounts, + kill_signal, + properties); if (r < 0) - return bus_log_create_error(r); - - for (j = 0; j < n_mounts; j++) { - CustomMount *cm = mounts + j; - - if (cm->type != CUSTOM_MOUNT_BIND) - continue; - - r = is_device_node(cm->source); - if (r == -ENOENT) { - /* The bind source might only appear as the image is put together, hence don't complain */ - log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source); - continue; - } - if (r < 0) - return log_error_errno(r, "Failed to stat %s: %m", cm->source); - - if (r) { - r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1, - cm->source, cm->read_only ? "r" : "rw"); - if (r < 0) - return log_error_errno(r, "Failed to append message arguments: %m"); - } - } - - if (kill_signal != 0) { - r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed"); - if (r < 0) - return bus_log_create_error(r); - } + return r; r = bus_append_unit_property_assignment_many(m, properties); if (r < 0) @@ -229,3 +250,90 @@ int terminate_machine(pid_t pid) { return 0; } + +int allocate_scope( + const char *machine_name, + pid_t pid, + const char *slice, + CustomMount *mounts, + unsigned n_mounts, + int kill_signal, + char **properties) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *scope = NULL; + const char *description; + int r; + + r = sd_bus_default_system(&bus); + if (r < 0) + return log_error_errno(r, "Failed to open system bus: %m"); + + r = unit_name_mangle_with_suffix(machine_name, UNIT_NAME_NOGLOB, ".scope", &scope); + if (r < 0) + return log_error_errno(r, "Failed to mangle scope name: %m"); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "StartTransientUnit"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "ss", scope, "fail"); + if (r < 0) + return bus_log_create_error(r); + + /* Properties */ + r = sd_bus_message_open_container(m, 'a', "(sv)"); + if (r < 0) + return bus_log_create_error(r); + + description = strjoina("Container ", machine_name); + + r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)", + "PIDs", "au", 1, pid, + "Description", "s", description, + "Delegate", "b", 1, + "Slice", "s", isempty(slice) ? "machine.slice" : slice); + if (r < 0) + return bus_log_create_error(r); + + r = append_machine_properties( + m, + mounts, + n_mounts, + kill_signal, + properties); + if (r < 0) + return r; + + r = bus_append_unit_property_assignment_many(m, properties); + if (r < 0) + return r; + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + /* No auxiliary units */ + r = sd_bus_message_append( + m, + "a(sa(sv))", + 0); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_call(bus, m, 0, &error, NULL); + if (r < 0) { + log_error("Failed to allocate scope: %s", bus_error_message(&error, r)); + return r; + } + + return 0; +} diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h index 304c5a4..6694b3f 100644 --- a/src/nspawn/nspawn-register.h +++ b/src/nspawn/nspawn-register.h @@ -27,3 +27,5 @@ int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service); int terminate_machine(pid_t pid); + +int allocate_scope(const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 767d992..8a5fedd 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1083,8 +1083,8 @@ static int parse_argv(int argc, char *argv[]) { if (arg_userns_mode == USER_NAMESPACE_PICK) arg_userns_chown = true; - if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) { - log_error("--keep-unit may not be used when invoked from a user session."); + if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0) { + log_error("--keep-unit --register=yes may not be used when invoked from a user session."); return -EINVAL; } @@ -3387,7 +3387,19 @@ static int run(int master, arg_container_service_name); if (r < 0) return r; - } + } else if (!arg_keep_unit) { + r = allocate_scope( + arg_machine, + *pid, + arg_slice, + arg_custom_mounts, arg_n_custom_mounts, + arg_kill_signal, + arg_property); + if (r < 0) + return r; + + } else if (arg_slice || arg_property) + log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect."); r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift); if (r < 0) -- 2.7.4