nspawn: register a scope for the unit if --register=no is specified (#6166)
authorLennart Poettering <lennart@poettering.net>
Wed, 28 Jun 2017 17:22:46 +0000 (19:22 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Wed, 28 Jun 2017 17:22:46 +0000 (13:22 -0400)
Previously, only when --register=yes was set (the default) the invoked
container would get its own scope, created by machined on behalf of
nspawn. With this change if --register=no is set nspawn will still get
its own scope (which is a good thing, so that --slice= and --property=
take effect), but this is not done through machined but by registering a
scope unit directly in PID 1.

Summary:

--register=yes             → allocate a new scope through machined (the default)
--register=yes --keep-unit → use the unit we are already running in an register with machined
--register=no              → allocate a new scope directly, but no machined
--register=no --keep-unit  → do not allocate nor register anything

Fixes: #5823

man/systemd-nspawn.xml
src/nspawn/nspawn-register.c
src/nspawn/nspawn-register.h
src/nspawn/nspawn.c

index ae70827..7014443 100644 (file)
       <varlistentry>
         <term><option>--slice=</option></term>
 
-        <listitem><para>Make the container part of the specified
-        slice, instead of the default
-        <filename>machine.slice</filename>. This is only applies if
-        the machine is run in its own scope unit, i.e. if
-        <option>--keep-unit</option> is not used.</para>
+        <listitem><para>Make the container part of the specified slice, instead of the default
+        <filename>machine.slice</filename>. This applies only if the machine is run in its own scope unit, i.e. if
+        <option>--keep-unit</option> isn't used.</para>
         </listitem>
       </varlistentry>
 
       <varlistentry>
         <term><option>--property=</option></term>
 
-        <listitem><para>Set a unit property on the scope unit to
-        register for the machine. This only applies if the machine is
-        run in its own scope unit, i.e. if
-        <option>--keep-unit</option> is not used. Takes unit property
-        assignments in the same format as <command>systemctl
-        set-property</command>. This is useful to set memory limits
-        and similar for machines.</para>
+        <listitem><para>Set a unit property on the scope unit to register for the machine. This applies only if the
+        machine is run in its own scope unit, i.e. if <option>--keep-unit</option> isn't used. Takes unit property
+        assignments in the same format as <command>systemctl set-property</command>. This is useful to set memory
+        limits and similar for container.</para>
         </listitem>
       </varlistentry>
 
       <varlistentry>
         <term><option>--register=</option></term>
 
-        <listitem><para>Controls whether the container is registered
-        with
-        <citerefentry><refentrytitle>systemd-machined</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
-        Takes a boolean argument, which defaults to <literal>yes</literal>.
-        This option should be enabled when the container runs a full
-        Operating System (more specifically: an init system), and is
-        useful to ensure that the container is accessible via
-        <citerefentry><refentrytitle>machinectl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-        and shown by tools such as
-        <citerefentry project='man-pages'><refentrytitle>ps</refentrytitle><manvolnum>1</manvolnum></citerefentry>.
-        If the container does not run an init system, it is
-        recommended to set this option to <literal>no</literal>.</para></listitem>
+        <listitem><para>Controls whether the container is registered with
+        <citerefentry><refentrytitle>systemd-machined</refentrytitle><manvolnum>8</manvolnum></citerefentry>.  Takes a
+        boolean argument, which defaults to <literal>yes</literal>.  This option should be enabled when the container
+        runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to
+        ensure that the container is accessible via
+        <citerefentry><refentrytitle>machinectl</refentrytitle><manvolnum>1</manvolnum></citerefentry> and shown by
+        tools such as <citerefentry
+        project='man-pages'><refentrytitle>ps</refentrytitle><manvolnum>1</manvolnum></citerefentry>.  If the container
+        does not run a service manager, it is recommended to set this option to
+        <literal>no</literal>.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         service unit, and the service unit's sole purpose is to run a
         single <command>systemd-nspawn</command> container. This
         option is not available if run from a user
-        session.</para></listitem>
+        session.</para>
+        <para>Note that passing <option>--keep-unit</option> disables the effect of <option>--slice=</option> and
+        <option>--property=</option>.</para></listitem>
       </varlistentry>
 
       <varlistentry>
index e3ab39f..ffdf123 100644 (file)
 #include "strv.h"
 #include "util.h"
 
+static int append_machine_properties(
+                sd_bus_message *m,
+                CustomMount *mounts,
+                unsigned n_mounts,
+                int kill_signal,
+                char **properties) {
+
+        unsigned j;
+        int r;
+
+        assert(m);
+
+        r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in
+         * sync regardless if we are run with or without the --keep-unit switch. */
+        r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
+                                  /* Allow the container to
+                                   * access and create the API
+                                   * device nodes, so that
+                                   * PrivateDevices= in the
+                                   * container can work
+                                   * fine */
+                                  "/dev/net/tun", "rwm",
+                                  /* Allow the container
+                                   * access to ptys. However,
+                                   * do not permit the
+                                   * container to ever create
+                                   * these device nodes. */
+                                  "char-pts", "rw");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        for (j = 0; j < n_mounts; j++) {
+                CustomMount *cm = mounts + j;
+
+                if (cm->type != CUSTOM_MOUNT_BIND)
+                        continue;
+
+                r = is_device_node(cm->source);
+                if (r == -ENOENT) {
+                        /* The bind source might only appear as the image is put together, hence don't complain */
+                        log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
+                        continue;
+                }
+                if (r < 0)
+                        return log_error_errno(r, "Failed to stat %s: %m", cm->source);
+
+                if (r) {
+                        r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
+                                                  cm->source, cm->read_only ? "r" : "rw");
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to append message arguments: %m");
+                }
+        }
+
+        if (kill_signal != 0) {
+                r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
+                if (r < 0)
+                        return bus_log_create_error(r);
+
+                r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
+                if (r < 0)
+                        return bus_log_create_error(r);
+        }
+
+        return 0;
+}
+
 int register_machine(
                 const char *machine_name,
                 pid_t pid,
@@ -68,7 +139,6 @@ int register_machine(
                                 local_ifindex > 0 ? 1 : 0, local_ifindex);
         } else {
                 _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
-                unsigned j;
 
                 r = sd_bus_message_new_method_call(
                                 bus,
@@ -103,63 +173,14 @@ int register_machine(
                                 return bus_log_create_error(r);
                 }
 
-                r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
-                if (r < 0)
-                        return bus_log_create_error(r);
-
-                /* If you make changes here, also make sure to update
-                 * systemd-nspawn@.service, to keep the device
-                 * policies in sync regardless if we are run with or
-                 * without the --keep-unit switch. */
-                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
-                                          /* Allow the container to
-                                           * access and create the API
-                                           * device nodes, so that
-                                           * PrivateDevices= in the
-                                           * container can work
-                                           * fine */
-                                          "/dev/net/tun", "rwm",
-                                          /* Allow the container
-                                           * access to ptys. However,
-                                           * do not permit the
-                                           * container to ever create
-                                           * these device nodes. */
-                                          "char-pts", "rw");
+                r = append_machine_properties(
+                                m,
+                                mounts,
+                                n_mounts,
+                                kill_signal,
+                                properties);
                 if (r < 0)
-                        return bus_log_create_error(r);
-
-                for (j = 0; j < n_mounts; j++) {
-                        CustomMount *cm = mounts + j;
-
-                        if (cm->type != CUSTOM_MOUNT_BIND)
-                                continue;
-
-                        r = is_device_node(cm->source);
-                        if (r == -ENOENT) {
-                                /* The bind source might only appear as the image is put together, hence don't complain */
-                                log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
-                                continue;
-                        }
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to stat %s: %m", cm->source);
-
-                        if (r) {
-                                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
-                                        cm->source, cm->read_only ? "r" : "rw");
-                                if (r < 0)
-                                        return log_error_errno(r, "Failed to append message arguments: %m");
-                        }
-                }
-
-                if (kill_signal != 0) {
-                        r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
-                        if (r < 0)
-                                return bus_log_create_error(r);
-
-                        r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
-                        if (r < 0)
-                                return bus_log_create_error(r);
-                }
+                        return r;
 
                 r = bus_append_unit_property_assignment_many(m, properties);
                 if (r < 0)
@@ -229,3 +250,90 @@ int terminate_machine(pid_t pid) {
 
         return 0;
 }
+
+int allocate_scope(
+                const char *machine_name,
+                pid_t pid,
+                const char *slice,
+                CustomMount *mounts,
+                unsigned n_mounts,
+                int kill_signal,
+                char **properties) {
+
+        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+        _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+        _cleanup_free_ char *scope = NULL;
+        const char *description;
+        int r;
+
+        r = sd_bus_default_system(&bus);
+        if (r < 0)
+                return log_error_errno(r, "Failed to open system bus: %m");
+
+        r = unit_name_mangle_with_suffix(machine_name, UNIT_NAME_NOGLOB, ".scope", &scope);
+        if (r < 0)
+                return log_error_errno(r, "Failed to mangle scope name: %m");
+
+        r = sd_bus_message_new_method_call(
+                        bus,
+                        &m,
+                        "org.freedesktop.systemd1",
+                        "/org/freedesktop/systemd1",
+                        "org.freedesktop.systemd1.Manager",
+                        "StartTransientUnit");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_append(m, "ss", scope, "fail");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        /* Properties */
+        r = sd_bus_message_open_container(m, 'a', "(sv)");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        description = strjoina("Container ", machine_name);
+
+        r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)",
+                                  "PIDs", "au", 1, pid,
+                                  "Description", "s", description,
+                                  "Delegate", "b", 1,
+                                  "Slice", "s", isempty(slice) ? "machine.slice" : slice);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = append_machine_properties(
+                        m,
+                        mounts,
+                        n_mounts,
+                        kill_signal,
+                        properties);
+        if (r < 0)
+                return r;
+
+        r = bus_append_unit_property_assignment_many(m, properties);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_close_container(m);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        /* No auxiliary units */
+        r = sd_bus_message_append(
+                        m,
+                        "a(sa(sv))",
+                        0);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_call(bus, m, 0, &error, NULL);
+        if (r < 0) {
+                log_error("Failed to allocate scope: %s", bus_error_message(&error, r));
+                return r;
+        }
+
+        return 0;
+}
index 304c5a4..6694b3f 100644 (file)
@@ -27,3 +27,5 @@
 
 int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service);
 int terminate_machine(pid_t pid);
+
+int allocate_scope(const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties);
index 767d992..8a5fedd 100644 (file)
@@ -1083,8 +1083,8 @@ static int parse_argv(int argc, char *argv[]) {
         if (arg_userns_mode == USER_NAMESPACE_PICK)
                 arg_userns_chown = true;
 
-        if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
-                log_error("--keep-unit may not be used when invoked from a user session.");
+        if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0) {
+                log_error("--keep-unit --register=yes may not be used when invoked from a user session.");
                 return -EINVAL;
         }
 
@@ -3387,7 +3387,19 @@ static int run(int master,
                                 arg_container_service_name);
                 if (r < 0)
                         return r;
-        }
+        } else if (!arg_keep_unit) {
+                r = allocate_scope(
+                                arg_machine,
+                                *pid,
+                                arg_slice,
+                                arg_custom_mounts, arg_n_custom_mounts,
+                                arg_kill_signal,
+                                arg_property);
+                if (r < 0)
+                        return r;
+
+        } else if (arg_slice || arg_property)
+                log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect.");
 
         r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
         if (r < 0)