kdbus: Upgrade driver to newer upstream version
authorKarol Lewandowski <k.lewandowsk@samsung.com>
Thu, 6 Oct 2016 12:20:24 +0000 (14:20 +0200)
committerSeung-Woo Kim <sw0312.kim@samsung.com>
Wed, 14 Dec 2016 04:53:39 +0000 (13:53 +0900)
This commit upgrades kdbus ipc driver from v4 patchset, as posted on lkml
for review by Greg Kroah-Hartman on Mar 09 2015 to commit 0c05fbdc82f from
new upstream kdbus repository (git://github.com/systemd/kdbus).

Summary of major changes:

 * message importer rewritten - considerably reduces internal message
   processing overhead,

 * name registration reworked to follow DBus Specification precisely,

 * attached metadata now follow /proc access checks

 * reduced in-kernel stack buffer to 256 bytes for small messages

Change-Id: I6d849173b4289e1b684ed1a9b48e6e0b361e5d53

32 files changed:
include/uapi/linux/kdbus.h
ipc/kdbus/bus.c
ipc/kdbus/bus.h
ipc/kdbus/connection.c
ipc/kdbus/connection.h
ipc/kdbus/endpoint.c
ipc/kdbus/endpoint.h
ipc/kdbus/fs.c
ipc/kdbus/handle.c
ipc/kdbus/handle.h
ipc/kdbus/item.c
ipc/kdbus/item.h
ipc/kdbus/limits.h
ipc/kdbus/main.c
ipc/kdbus/match.c
ipc/kdbus/match.h
ipc/kdbus/message.c
ipc/kdbus/message.h
ipc/kdbus/metadata.c
ipc/kdbus/metadata.h
ipc/kdbus/names.c
ipc/kdbus/names.h
ipc/kdbus/node.c
ipc/kdbus/node.h
ipc/kdbus/notify.c
ipc/kdbus/policy.c
ipc/kdbus/pool.c
ipc/kdbus/queue.c
ipc/kdbus/queue.h
ipc/kdbus/reply.c
ipc/kdbus/util.c
ipc/kdbus/util.h

index 00a6e14..4fc44cb 100644 (file)
@@ -374,6 +374,7 @@ enum kdbus_item_type {
        KDBUS_ITEM_ATTACH_FLAGS_RECV,
        KDBUS_ITEM_ID,
        KDBUS_ITEM_NAME,
+       KDBUS_ITEM_DST_ID,
 
        /* keep these item types in sync with KDBUS_ATTACH_* flags */
        _KDBUS_ITEM_ATTACH_BASE = 0x1000,
@@ -853,6 +854,8 @@ enum kdbus_make_flags {
  * @KDBUS_NAME_QUEUE:                  Name should be queued if busy
  * @KDBUS_NAME_IN_QUEUE:               Name is queued
  * @KDBUS_NAME_ACTIVATOR:              Name is owned by a activator connection
+ * @KDBUS_NAME_PRIMARY:                        Primary owner of the name
+ * @KDBUS_NAME_ACQUIRED:               Name was acquired/queued _now_
  */
 enum kdbus_name_flags {
        KDBUS_NAME_REPLACE_EXISTING     = 1ULL <<  0,
@@ -860,6 +863,8 @@ enum kdbus_name_flags {
        KDBUS_NAME_QUEUE                = 1ULL <<  2,
        KDBUS_NAME_IN_QUEUE             = 1ULL <<  3,
        KDBUS_NAME_ACTIVATOR            = 1ULL <<  4,
+       KDBUS_NAME_PRIMARY              = 1ULL <<  5,
+       KDBUS_NAME_ACQUIRED             = 1ULL <<  6,
 };
 
 /**
index cfe997f..a67f825 100644 (file)
@@ -66,23 +66,16 @@ static struct kdbus_bus *kdbus_bus_new(struct kdbus_domain *domain,
                                       const char *name,
                                       struct kdbus_bloom_parameter *bloom,
                                       const u64 *pattach_owner,
-                                      const u64 *pattach_recv,
                                       u64 flags, kuid_t uid, kgid_t gid)
 {
        struct kdbus_bus *b;
        u64 attach_owner;
-       u64 attach_recv;
        int ret;
 
        if (bloom->size < 8 || bloom->size > KDBUS_BUS_BLOOM_MAX_SIZE ||
            !KDBUS_IS_ALIGNED8(bloom->size) || bloom->n_hash < 1)
                return ERR_PTR(-EINVAL);
 
-       ret = kdbus_sanitize_attach_flags(pattach_recv ? *pattach_recv : 0,
-                                         &attach_recv);
-       if (ret < 0)
-               return ERR_PTR(ret);
-
        ret = kdbus_sanitize_attach_flags(pattach_owner ? *pattach_owner : 0,
                                          &attach_owner);
        if (ret < 0)
@@ -111,7 +104,6 @@ static struct kdbus_bus *kdbus_bus_new(struct kdbus_domain *domain,
 
        b->id = atomic64_inc_return(&domain->last_id);
        b->bus_flags = flags;
-       b->attach_flags_req = attach_recv;
        b->attach_flags_owner = attach_owner;
        generate_random_uuid(b->id128);
        b->bloom = *bloom;
@@ -240,9 +232,9 @@ struct kdbus_conn *kdbus_bus_find_conn_by_id(struct kdbus_bus *bus, u64 id)
  * kdbus_bus_broadcast() - send a message to all subscribed connections
  * @bus:       The bus the connections are connected to
  * @conn_src:  The source connection, may be %NULL for kernel notifications
- * @kmsg:      The message to send.
+ * @staging:   Staging object containing the message to send
  *
- * Send @kmsg to all connections that are currently active on the bus.
+ * Send message to all connections that are currently active on the bus.
  * Connections must still have matches installed in order to let the message
  * pass.
  *
@@ -250,7 +242,7 @@ struct kdbus_conn *kdbus_bus_find_conn_by_id(struct kdbus_bus *bus, u64 id)
  */
 void kdbus_bus_broadcast(struct kdbus_bus *bus,
                         struct kdbus_conn *conn_src,
-                        struct kdbus_kmsg *kmsg)
+                        struct kdbus_staging *staging)
 {
        struct kdbus_conn *conn_dst;
        unsigned int i;
@@ -267,7 +259,7 @@ void kdbus_bus_broadcast(struct kdbus_bus *bus,
         * can re-construct order via sequence numbers), but we should at least
         * try to avoid re-ordering for monitors.
         */
-       kdbus_bus_eavesdrop(bus, conn_src, kmsg);
+       kdbus_bus_eavesdrop(bus, conn_src, staging);
 
        down_read(&bus->conn_rwlock);
        hash_for_each(bus->conn_hash, i, conn_dst, hentry) {
@@ -278,13 +270,11 @@ void kdbus_bus_broadcast(struct kdbus_bus *bus,
                 * Check if there is a match for the kmsg object in
                 * the destination connection match db
                 */
-               if (!kdbus_match_db_match_kmsg(conn_dst->match_db, conn_src,
-                                              kmsg))
+               if (!kdbus_match_db_match_msg(conn_dst->match_db, conn_src,
+                                             staging))
                        continue;
 
                if (conn_src) {
-                       u64 attach_flags;
-
                        /*
                         * Anyone can send broadcasts, as they have no
                         * destination. But a receiver needs TALK access to
@@ -292,20 +282,6 @@ void kdbus_bus_broadcast(struct kdbus_bus *bus,
                         */
                        if (!kdbus_conn_policy_talk(conn_dst, NULL, conn_src))
                                continue;
-
-                       attach_flags = kdbus_meta_calc_attach_flags(conn_src,
-                                                                   conn_dst);
-
-                       /*
-                        * Keep sending messages even if we cannot acquire the
-                        * requested metadata. It's up to the receiver to drop
-                        * messages that lack expected metadata.
-                        */
-                       if (!conn_src->faked_meta)
-                               kdbus_meta_proc_collect(kmsg->proc_meta,
-                                                       attach_flags);
-                       kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, conn_src,
-                                               attach_flags);
                } else {
                        /*
                         * Check if there is a policy db that prevents the
@@ -313,11 +289,12 @@ void kdbus_bus_broadcast(struct kdbus_bus *bus,
                         * notification
                         */
                        if (!kdbus_conn_policy_see_notification(conn_dst, NULL,
-                                                               kmsg))
+                                                               staging->msg))
                                continue;
                }
 
-               ret = kdbus_conn_entry_insert(conn_src, conn_dst, kmsg, NULL);
+               ret = kdbus_conn_entry_insert(conn_src, conn_dst, staging,
+                                             NULL, NULL);
                if (ret < 0)
                        kdbus_conn_lost_message(conn_dst);
        }
@@ -328,16 +305,16 @@ void kdbus_bus_broadcast(struct kdbus_bus *bus,
  * kdbus_bus_eavesdrop() - send a message to all subscribed monitors
  * @bus:       The bus the monitors are connected to
  * @conn_src:  The source connection, may be %NULL for kernel notifications
- * @kmsg:      The message to send.
+ * @staging:   Staging object containing the message to send
  *
- * Send @kmsg to all monitors that are currently active on the bus. Monitors
+ * Send message to all monitors that are currently active on the bus. Monitors
  * must still have matches installed in order to let the message pass.
  *
  * The caller must hold the name-registry lock of @bus.
  */
 void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
                         struct kdbus_conn *conn_src,
-                        struct kdbus_kmsg *kmsg)
+                        struct kdbus_staging *staging)
 {
        struct kdbus_conn *conn_dst;
        int ret;
@@ -351,25 +328,8 @@ void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
 
        down_read(&bus->conn_rwlock);
        list_for_each_entry(conn_dst, &bus->monitors_list, monitor_entry) {
-               /*
-                * Collect metadata requested by the destination connection.
-                * Ignore errors, as receivers need to check metadata
-                * availability, anyway. So it's still better to send messages
-                * that lack data, than to skip it entirely.
-                */
-               if (conn_src) {
-                       u64 attach_flags;
-
-                       attach_flags = kdbus_meta_calc_attach_flags(conn_src,
-                                                                   conn_dst);
-                       if (!conn_src->faked_meta)
-                               kdbus_meta_proc_collect(kmsg->proc_meta,
-                                                       attach_flags);
-                       kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, conn_src,
-                                               attach_flags);
-               }
-
-               ret = kdbus_conn_entry_insert(conn_src, conn_dst, kmsg, NULL);
+               ret = kdbus_conn_entry_insert(conn_src, conn_dst, staging,
+                                             NULL, NULL);
                if (ret < 0)
                        kdbus_conn_lost_message(conn_dst);
        }
@@ -381,7 +341,7 @@ void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
  * @domain:            domain to operate on
  * @argp:              command payload
  *
- * Return: Newly created bus on success, ERR_PTR on failure.
+ * Return: NULL or newly created bus on success, ERR_PTR on failure.
  */
 struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
                                     void __user *argp)
@@ -396,7 +356,6 @@ struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
                { .type = KDBUS_ITEM_MAKE_NAME, .mandatory = true },
                { .type = KDBUS_ITEM_BLOOM_PARAMETER, .mandatory = true },
                { .type = KDBUS_ITEM_ATTACH_FLAGS_SEND },
-               { .type = KDBUS_ITEM_ATTACH_FLAGS_RECV },
        };
        struct kdbus_args args = {
                .allowed_flags = KDBUS_FLAG_NEGOTIATE |
@@ -415,7 +374,6 @@ struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
        bus = kdbus_bus_new(domain,
                            argv[1].item->str, &argv[2].item->bloom_parameter,
                            argv[3].item ? argv[3].item->data64 : NULL,
-                           argv[4].item ? argv[4].item->data64 : NULL,
                            cmd->flags, current_euid(), current_egid());
        if (IS_ERR(bus)) {
                ret = PTR_ERR(bus);
@@ -475,20 +433,19 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
 {
        struct kdbus_cmd_info *cmd;
        struct kdbus_bus *bus = conn->ep->bus;
        struct kdbus_pool_slice *slice = NULL;
+       struct kdbus_item *meta_items = NULL;
        struct kdbus_item_header item_hdr;
        struct kdbus_info info = {};
-       size_t meta_size, name_len;
-       struct kvec kvec[5];
-       u64 hdr_size = 0;
-       u64 attach_flags;
-       size_t cnt = 0;
+       size_t meta_size, name_len, cnt = 0;
+       struct kvec kvec[6];
+       u64 attach_flags, size = 0;
        int ret;
 
        struct kdbus_arg argv[] = {
@@ -510,8 +467,8 @@ int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
 
        attach_flags &= bus->attach_flags_owner;
 
-       ret = kdbus_meta_export_prepare(bus->creator_meta, NULL,
-                                       &attach_flags, &meta_size);
+       ret = kdbus_meta_emit(bus->creator_meta, NULL, NULL, conn,
+                             attach_flags, &meta_items, &meta_size);
        if (ret < 0)
                goto exit;
 
@@ -521,26 +478,25 @@ int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
        item_hdr.type = KDBUS_ITEM_MAKE_NAME;
        item_hdr.size = KDBUS_ITEM_HEADER_SIZE + name_len;
 
-       kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &hdr_size);
-       kdbus_kvec_set(&kvec[cnt++], &item_hdr, sizeof(item_hdr), &hdr_size);
-       kdbus_kvec_set(&kvec[cnt++], bus->node.name, name_len, &hdr_size);
-       cnt += !!kdbus_kvec_pad(&kvec[cnt], &hdr_size);
+       kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &size);
+       kdbus_kvec_set(&kvec[cnt++], &item_hdr, sizeof(item_hdr), &size);
+       kdbus_kvec_set(&kvec[cnt++], bus->node.name, name_len, &size);
+       cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+       if (meta_size > 0) {
+               kdbus_kvec_set(&kvec[cnt++], meta_items, meta_size, &size);
+               cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+       }
+
+       info.size = size;
 
-       slice = kdbus_pool_slice_alloc(conn->pool, hdr_size + meta_size, false);
+       slice = kdbus_pool_slice_alloc(conn->pool, size, false);
        if (IS_ERR(slice)) {
                ret = PTR_ERR(slice);
                slice = NULL;
                goto exit;
        }
 
-       ret = kdbus_meta_export(bus->creator_meta, NULL, attach_flags,
-                               slice, hdr_size, &meta_size);
-       if (ret < 0)
-               goto exit;
-
-       info.size = hdr_size + meta_size;
-
-       ret = kdbus_pool_slice_copy_kvec(slice, 0, kvec, cnt, hdr_size);
+       ret = kdbus_pool_slice_copy_kvec(slice, 0, kvec, cnt, size);
        if (ret < 0)
                goto exit;
 
@@ -553,6 +509,6 @@ int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, void __user *argp)
 
 exit:
        kdbus_pool_slice_release(slice);
-
+       kfree(meta_items);
        return kdbus_args_clear(&args, ret);
 }
index 5bea5ef..8c2acae 100644 (file)
@@ -29,7 +29,7 @@
 
 struct kdbus_conn;
 struct kdbus_domain;
-struct kdbus_kmsg;
+struct kdbus_staging;
 struct kdbus_user;
 
 /**
@@ -37,7 +37,6 @@ struct kdbus_user;
  * @node:              kdbus_node
  * @id:                        ID of this bus in the domain
  * @bus_flags:         Simple pass-through flags from userspace to userspace
- * @attach_flags_req:  KDBUS_ATTACH_* flags required by connecting peers
  * @attach_flags_owner:        KDBUS_ATTACH_* flags of bus creator that other
  *                     connections can see or query
  * @id128:             Unique random 128 bit ID of this bus
@@ -45,6 +44,7 @@ struct kdbus_user;
  * @domain:            Domain of this bus
  * @creator:           Creator of the bus
  * @creator_meta:      Meta information about the bus creator
+ * @last_message_id:   Last used message id
  * @policy_db:         Policy database for this bus
  * @name_registry:     Name registry of this bus
  * @conn_rwlock:       Read/Write lock for all lists of child connections
@@ -60,7 +60,6 @@ struct kdbus_bus {
        /* static */
        u64 id;
        u64 bus_flags;
-       u64 attach_flags_req;
        u64 attach_flags_owner;
        u8 id128[16];
        struct kdbus_bloom_parameter bloom;
@@ -69,6 +68,7 @@ struct kdbus_bus {
        struct kdbus_meta_proc *creator_meta;
 
        /* protected by own locks */
+       atomic64_t last_message_id;
        struct kdbus_policy_db policy_db;
        struct kdbus_name_registry *name_registry;
 
@@ -89,10 +89,10 @@ struct kdbus_bus *kdbus_bus_unref(struct kdbus_bus *bus);
 struct kdbus_conn *kdbus_bus_find_conn_by_id(struct kdbus_bus *bus, u64 id);
 void kdbus_bus_broadcast(struct kdbus_bus *bus,
                         struct kdbus_conn *conn_src,
-                        struct kdbus_kmsg *kmsg);
+                        struct kdbus_staging *staging);
 void kdbus_bus_eavesdrop(struct kdbus_bus *bus,
                         struct kdbus_conn *conn_src,
-                        struct kdbus_kmsg *kmsg);
+                        struct kdbus_staging *staging);
 
 struct kdbus_bus *kdbus_cmd_bus_make(struct kdbus_domain *domain,
                                     void __user *argp);
index 8e994ea..f80191e 100644 (file)
 #define KDBUS_CONN_ACTIVE_NEW  (INT_MIN + 1)
 
 /* Disable internal kdbus policy - possibilities of connections to own, see and
- * talk to names are restricted by libdbuspolicy library
+ * talk to names are restricted by libdbuspolicy library and LSM hooks
  */
 #define DISABLE_KDBUS_POLICY
 
-static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
+static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep,
+                                        struct file *file,
                                         struct kdbus_cmd_hello *hello,
                                         const char *name,
                                         const struct kdbus_creds *creds,
@@ -77,6 +78,8 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
        bool is_policy_holder;
        bool is_activator;
        bool is_monitor;
+       bool privileged;
+       bool owner;
        struct kvec kvec;
        int ret;
 
@@ -86,6 +89,9 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
                struct kdbus_bloom_parameter bloom;
        } bloom_item;
 
+       privileged = kdbus_ep_is_privileged(ep, file);
+       owner = kdbus_ep_is_owner(ep, file);
+
        is_monitor = hello->flags & KDBUS_HELLO_MONITOR;
        is_activator = hello->flags & KDBUS_HELLO_ACTIVATOR;
        is_policy_holder = hello->flags & KDBUS_HELLO_POLICY_HOLDER;
@@ -102,9 +108,9 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
                return ERR_PTR(-EINVAL);
        if (is_monitor && ep->user)
                return ERR_PTR(-EOPNOTSUPP);
-       if (!privileged && (is_activator || is_policy_holder || is_monitor))
+       if (!owner && (is_activator || is_policy_holder || is_monitor))
                return ERR_PTR(-EPERM);
-       if ((creds || pids || seclabel) && !privileged)
+       if (!owner && (creds || pids || seclabel))
                return ERR_PTR(-EPERM);
 
        ret = kdbus_sanitize_attach_flags(hello->attach_flags_send,
@@ -117,10 +123,6 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
        if (ret < 0)
                return ERR_PTR(ret);
 
-       /* The attach flags must always satisfy the bus requirements. */
-       if (bus->attach_flags_req & ~attach_flags_send)
-               return ERR_PTR(-ECONNREFUSED);
-
        conn = kzalloc(sizeof(*conn), GFP_KERNEL);
        if (!conn)
                return ERR_PTR(-ENOMEM);
@@ -132,16 +134,17 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
 #endif
        mutex_init(&conn->lock);
        INIT_LIST_HEAD(&conn->names_list);
-       INIT_LIST_HEAD(&conn->names_queue_list);
        INIT_LIST_HEAD(&conn->reply_list);
-       atomic_set(&conn->name_count, 0);
        atomic_set(&conn->request_count, 0);
        atomic_set(&conn->lost_count, 0);
        INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work);
-       conn->cred = get_current_cred();
+       conn->cred = get_cred(file->f_cred);
+       conn->pid = get_pid(task_pid(current));
+       get_fs_root(current->fs, &conn->root_path);
        init_waitqueue_head(&conn->wait);
        kdbus_queue_init(&conn->queue);
        conn->privileged = privileged;
+       conn->owner = owner;
        conn->ep = kdbus_ep_ref(ep);
        conn->id = atomic64_inc_return(&bus->domain->last_id);
        conn->flags = hello->flags;
@@ -178,22 +181,28 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
        BUILD_BUG_ON(sizeof(bus->id128) != sizeof(hello->id128));
        memcpy(hello->id128, bus->id128, sizeof(hello->id128));
 
-       conn->meta = kdbus_meta_proc_new();
-       if (IS_ERR(conn->meta)) {
-               ret = PTR_ERR(conn->meta);
-               conn->meta = NULL;
-               goto exit_unref;
-       }
-
        /* privileged processes can impersonate somebody else */
        if (creds || pids || seclabel) {
-               ret = kdbus_meta_proc_fake(conn->meta, creds, pids, seclabel);
-               if (ret < 0)
+               conn->meta_fake = kdbus_meta_fake_new();
+               if (IS_ERR(conn->meta_fake)) {
+                       ret = PTR_ERR(conn->meta_fake);
+                       conn->meta_fake = NULL;
                        goto exit_unref;
+               }
 
-               conn->faked_meta = true;
+               ret = kdbus_meta_fake_collect(conn->meta_fake,
+                                             creds, pids, seclabel);
+               if (ret < 0)
+                       goto exit_unref;
        } else {
-               ret = kdbus_meta_proc_collect(conn->meta,
+               conn->meta_proc = kdbus_meta_proc_new();
+               if (IS_ERR(conn->meta_proc)) {
+                       ret = PTR_ERR(conn->meta_proc);
+                       conn->meta_proc = NULL;
+                       goto exit_unref;
+               }
+
+               ret = kdbus_meta_proc_collect(conn->meta_proc,
                                              KDBUS_ATTACH_CREDS |
                                              KDBUS_ATTACH_PIDS |
                                              KDBUS_ATTACH_AUXGROUPS |
@@ -215,11 +224,21 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, bool privileged,
         * Note that limits are always accounted against the real UID, not
         * the effective UID (cred->user always points to the accounting of
         * cred->uid, not cred->euid).
+        * In case the caller is privileged, we allow changing the accounting
+        * to the faked user.
         */
        if (ep->user) {
                conn->user = kdbus_user_ref(ep->user);
        } else {
-               conn->user = kdbus_user_lookup(ep->bus->domain, current_uid());
+               kuid_t uid;
+
+               if (conn->meta_fake && uid_valid(conn->meta_fake->uid) &&
+                   conn->privileged)
+                       uid = conn->meta_fake->uid;
+               else
+                       uid = conn->cred->uid;
+
+               conn->user = kdbus_user_lookup(ep->bus->domain, uid);
                if (IS_ERR(conn->user)) {
                        ret = PTR_ERR(conn->user);
                        conn->user = NULL;
@@ -268,7 +287,6 @@ static void __kdbus_conn_free(struct kref *kref)
        WARN_ON(delayed_work_pending(&conn->work));
        WARN_ON(!list_empty(&conn->queue.msg_list));
        WARN_ON(!list_empty(&conn->names_list));
-       WARN_ON(!list_empty(&conn->names_queue_list));
        WARN_ON(!list_empty(&conn->reply_list));
 
        if (conn->user) {
@@ -276,10 +294,13 @@ static void __kdbus_conn_free(struct kref *kref)
                kdbus_user_unref(conn->user);
        }
 
-       kdbus_meta_proc_unref(conn->meta);
+       kdbus_meta_fake_free(conn->meta_fake);
+       kdbus_meta_proc_unref(conn->meta_proc);
        kdbus_match_db_free(conn->match_db);
        kdbus_pool_free(conn->pool);
        kdbus_ep_unref(conn->ep);
+       path_put(&conn->root_path);
+       put_pid(conn->pid);
        put_cred(conn->cred);
        kfree(conn->description);
        kfree(conn->quota);
@@ -437,7 +458,7 @@ static int kdbus_conn_connect(struct kdbus_conn *conn, const char *name)
         * directly, and won't cause any notifications.
         */
        if (!kdbus_conn_is_monitor(conn)) {
-               ret = kdbus_notify_id_change(conn->ep->bus, KDBUS_ITEM_ID_ADD,
+               ret = kdbus_notify_id_change(bus, KDBUS_ITEM_ID_ADD,
                                             conn->id, conn->flags);
                if (ret < 0)
                        goto exit_disconnect;
@@ -564,17 +585,16 @@ int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty)
        hash_for_each(bus->conn_hash, i, c, hentry) {
                mutex_lock(&c->lock);
                list_for_each_entry_safe(r, r_tmp, &c->reply_list, entry) {
-                       if (r->reply_src == conn) {
-                               if (r->sync) {
-                                       kdbus_sync_reply_wakeup(r, -EPIPE);
-                                       kdbus_reply_unlink(r);
-                                       continue;
-                               }
+                       if (r->reply_src != conn)
+                               continue;
 
+                       if (r->sync)
+                               kdbus_sync_reply_wakeup(r, -EPIPE);
+                       else
                                /* send a 'connection dead' notification */
                                kdbus_notify_reply_dead(bus, c->id, r->cookie);
-                               kdbus_reply_unlink(r);
-                       }
+
+                       kdbus_reply_unlink(r);
                }
                mutex_unlock(&c->lock);
        }
@@ -600,21 +620,22 @@ int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty)
  */
 bool kdbus_conn_has_name(struct kdbus_conn *conn, const char *name)
 {
-       struct kdbus_name_entry *e;
+       struct kdbus_name_owner *owner;
 
        lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
 
-       list_for_each_entry(e, &conn->names_list, conn_entry)
-               if (strcmp(e->name, name) == 0)
+       list_for_each_entry(owner, &conn->names_list, conn_entry)
+               if (!(owner->flags & KDBUS_NAME_IN_QUEUE) &&
+                   !strcmp(name, owner->name->name))
                        return true;
 
        return false;
 }
 
 struct kdbus_quota {
-       uint32_t memory;
-       uint16_t msgs;
-       uint8_t fds;
+       u32 memory;
+       u16 msgs;
+       u8 fds;
 };
 
 /**
@@ -652,7 +673,7 @@ int kdbus_conn_quota_inc(struct kdbus_conn *c, struct kdbus_user *u,
         * allocation schemes. Furthermore, resource utilization should be
         * maximized, so only minimal resources stay reserved. However, we need
         * to adapt to a dynamic number of users, as we cannot know how many
-        * users will talk to a connection. Therefore, the current allocations
+        * users will talk to a connection. Therefore, the current allocation
         * works like this:
         * We limit the number of bytes in a destination's pool per sending
         * user. The space available for a user is 33% of the unused pool space
@@ -694,7 +715,7 @@ int kdbus_conn_quota_inc(struct kdbus_conn *c, struct kdbus_user *u,
 
        /*
         * Pool owner slices are un-accounted slices; they can claim more
-        * than 50% of the queue. However, the slice we're dealing with here
+        * than 50% of the queue. However, the slices we're dealing with here
         * belong to the incoming queue, hence they are 'accounted' slices
         * to which the 50%-limit applies.
         */
@@ -758,7 +779,7 @@ void kdbus_conn_quota_dec(struct kdbus_conn *c, struct kdbus_user *u,
  *
  * kdbus is reliable. That means, we try hard to never lose messages. However,
  * memory is limited, so we cannot rely on transmissions to never fail.
- * Therefore, we use quota-limits to let callers know if there unicast message
+ * Therefore, we use quota-limits to let callers know if their unicast message
  * cannot be transmitted to a peer. This works fine for unicasts, but for
  * broadcasts we cannot make the caller handle the transmission failure.
  * Instead, we must let the destination know that it couldn't receive a
@@ -776,12 +797,10 @@ void kdbus_conn_lost_message(struct kdbus_conn *c)
 
 /* Callers should take the conn_dst lock */
 static struct kdbus_queue_entry *
-kdbus_conn_entry_make(struct kdbus_conn *conn_dst,
-                     const struct kdbus_kmsg *kmsg,
-                     struct kdbus_user *user)
+kdbus_conn_entry_make(struct kdbus_conn *conn_src,
+                     struct kdbus_conn *conn_dst,
+                     struct kdbus_staging *staging)
 {
-       struct kdbus_queue_entry *entry;
-
        /* The remote connection was disconnected */
        if (!kdbus_conn_active(conn_dst))
                return ERR_PTR(-ECONNRESET);
@@ -795,14 +814,10 @@ kdbus_conn_entry_make(struct kdbus_conn *conn_dst,
         */
        if (!kdbus_conn_is_monitor(conn_dst) &&
            !(conn_dst->flags & KDBUS_HELLO_ACCEPT_FD) &&
-           kmsg->res && kmsg->res->fds_count > 0)
+           staging->gaps && staging->gaps->n_fds > 0)
                return ERR_PTR(-ECOMM);
 
-       entry = kdbus_queue_entry_new(conn_dst, kmsg, user);
-       if (IS_ERR(entry))
-               return entry;
-
-       return entry;
+       return kdbus_queue_entry_new(conn_src, conn_dst, staging);
 }
 
 /*
@@ -811,12 +826,11 @@ kdbus_conn_entry_make(struct kdbus_conn *conn_dst,
  * The connection's queue will never get to see it.
  */
 static int kdbus_conn_entry_sync_attach(struct kdbus_conn *conn_dst,
-                                       const struct kdbus_kmsg *kmsg,
+                                       struct kdbus_staging *staging,
                                        struct kdbus_reply *reply_wake)
 {
        struct kdbus_queue_entry *entry;
-       int remote_ret;
-       int ret = 0;
+       int remote_ret, ret = 0;
 
        mutex_lock(&reply_wake->reply_dst->lock);
 
@@ -825,8 +839,8 @@ static int kdbus_conn_entry_sync_attach(struct kdbus_conn *conn_dst,
         * entry and attach it to the reply object
         */
        if (reply_wake->waiting) {
-               entry = kdbus_conn_entry_make(conn_dst, kmsg,
-                                             reply_wake->reply_src->user);
+               entry = kdbus_conn_entry_make(reply_wake->reply_src, conn_dst,
+                                             staging);
                if (IS_ERR(entry))
                        ret = PTR_ERR(entry);
                else
@@ -867,23 +881,24 @@ static int kdbus_conn_entry_sync_attach(struct kdbus_conn *conn_dst,
  * kdbus_conn_entry_insert() - enqueue a message into the receiver's pool
  * @conn_src:          The sending connection
  * @conn_dst:          The connection to queue into
- * @kmsg:              The kmsg to queue
+ * @staging:           Message to send
  * @reply:             The reply tracker to attach to the queue entry
+ * @name:              Destination name this msg is sent to, or NULL
  *
  * Return: 0 on success. negative error otherwise.
  */
 int kdbus_conn_entry_insert(struct kdbus_conn *conn_src,
                            struct kdbus_conn *conn_dst,
-                           const struct kdbus_kmsg *kmsg,
-                           struct kdbus_reply *reply)
+                           struct kdbus_staging *staging,
+                           struct kdbus_reply *reply,
+                           const struct kdbus_name_entry *name)
 {
        struct kdbus_queue_entry *entry;
        int ret;
 
        kdbus_conn_lock2(conn_src, conn_dst);
 
-       entry = kdbus_conn_entry_make(conn_dst, kmsg,
-                                     conn_src ? conn_src->user : NULL);
+       entry = kdbus_conn_entry_make(conn_src, conn_dst, staging);
        if (IS_ERR(entry)) {
                ret = PTR_ERR(entry);
                goto exit_unlock;
@@ -895,6 +910,14 @@ int kdbus_conn_entry_insert(struct kdbus_conn *conn_src,
                        schedule_delayed_work(&conn_src->work, 0);
        }
 
+       /*
+        * Record the sequence number of the registered name; it will
+        * be remembered by the queue, in case messages addressed to a
+        * name need to be moved from or to an activator.
+        */
+       if (name)
+               entry->dst_name_id = name->name_id;
+
        kdbus_queue_entry_enqueue(entry, reply);
        wake_up_interruptible(&conn_dst->wait);
 
@@ -1027,22 +1050,19 @@ static int kdbus_conn_wait_reply(struct kdbus_conn *conn_src,
 }
 
 static int kdbus_pin_dst(struct kdbus_bus *bus,
-                        struct kdbus_kmsg *kmsg,
+                        struct kdbus_staging *staging,
                         struct kdbus_name_entry **out_name,
                         struct kdbus_conn **out_dst)
 {
-       struct kdbus_msg_resources *res = kmsg->res;
+       const struct kdbus_msg *msg = staging->msg;
+       struct kdbus_name_owner *owner = NULL;
        struct kdbus_name_entry *name = NULL;
        struct kdbus_conn *dst = NULL;
-       struct kdbus_msg *msg = &kmsg->msg;
        int ret;
 
-       if (WARN_ON(!res))
-               return -EINVAL;
-
        lockdep_assert_held(&bus->name_registry->rwlock);
 
-       if (!res->dst_name) {
+       if (!staging->dst_name) {
                dst = kdbus_bus_find_conn_by_id(bus, msg->dst_id);
                if (!dst)
                        return -ENXIO;
@@ -1053,8 +1073,10 @@ static int kdbus_pin_dst(struct kdbus_bus *bus,
                }
        } else {
                name = kdbus_name_lookup_unlocked(bus->name_registry,
-                                                 res->dst_name);
-               if (!name)
+                                                 staging->dst_name);
+               if (name)
+                       owner = kdbus_name_get_owner(name);
+               if (!owner)
                        return -ESRCH;
 
                /*
@@ -1066,26 +1088,14 @@ static int kdbus_pin_dst(struct kdbus_bus *bus,
                 * owns the given name.
                 */
                if (msg->dst_id != KDBUS_DST_ID_NAME &&
-                   msg->dst_id != name->conn->id)
+                   msg->dst_id != owner->conn->id)
                        return -EREMCHG;
 
-               if (!name->conn && name->activator)
-                       dst = kdbus_conn_ref(name->activator);
-               else
-                       dst = kdbus_conn_ref(name->conn);
-
                if ((msg->flags & KDBUS_MSG_NO_AUTO_START) &&
-                   kdbus_conn_is_activator(dst)) {
-                       ret = -EADDRNOTAVAIL;
-                       goto error;
-               }
+                   kdbus_conn_is_activator(owner->conn))
+                       return -EADDRNOTAVAIL;
 
-               /*
-                * Record the sequence number of the registered name; it will
-                * be passed on to the queue, in case messages addressed to a
-                * name need to be moved from or to an activator.
-                */
-               kmsg->dst_name_id = name->name_id;
+               dst = kdbus_conn_ref(owner->conn);
        }
 
        *out_name = name;
@@ -1097,18 +1107,19 @@ error:
        return ret;
 }
 
-static int kdbus_conn_reply(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
+static int kdbus_conn_reply(struct kdbus_conn *src,
+                           struct kdbus_staging *staging)
 {
+       const struct kdbus_msg *msg = staging->msg;
        struct kdbus_name_entry *name = NULL;
        struct kdbus_reply *reply, *wake = NULL;
        struct kdbus_conn *dst = NULL;
        struct kdbus_bus *bus = src->ep->bus;
-       u64 attach;
        int ret;
 
-       if (WARN_ON(kmsg->msg.dst_id == KDBUS_DST_ID_BROADCAST) ||
-           WARN_ON(kmsg->msg.flags & KDBUS_MSG_EXPECT_REPLY) ||
-           WARN_ON(kmsg->msg.flags & KDBUS_MSG_SIGNAL))
+       if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+           WARN_ON(msg->flags & KDBUS_MSG_EXPECT_REPLY) ||
+           WARN_ON(msg->flags & KDBUS_MSG_SIGNAL))
                return -EINVAL;
 
        /* name-registry must be locked for lookup *and* collecting data */
@@ -1116,12 +1127,12 @@ static int kdbus_conn_reply(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
 
        /* find and pin destination */
 
-       ret = kdbus_pin_dst(bus, kmsg, &name, &dst);
+       ret = kdbus_pin_dst(bus, staging, &name, &dst);
        if (ret < 0)
                goto exit;
 
        mutex_lock(&dst->lock);
-       reply = kdbus_reply_find(src, dst, kmsg->msg.cookie_reply);
+       reply = kdbus_reply_find(src, dst, msg->cookie_reply);
        if (reply) {
                if (reply->sync)
                        wake = kdbus_reply_ref(reply);
@@ -1129,28 +1140,19 @@ static int kdbus_conn_reply(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
        }
        mutex_unlock(&dst->lock);
 
-       /* attach metadata */
-
-       attach = kdbus_meta_calc_attach_flags(src, dst);
-
-       if (!src->faked_meta) {
-               ret = kdbus_meta_proc_collect(kmsg->proc_meta, attach);
-               if (ret < 0)
-                       goto exit;
-       }
-
-       ret = kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, src, attach);
-       if (ret < 0)
+       if (!reply) {
+               ret = -EBADSLT;
                goto exit;
+       }
 
        /* send message */
 
-       kdbus_bus_eavesdrop(bus, src, kmsg);
+       kdbus_bus_eavesdrop(bus, src, staging);
 
        if (wake)
-               ret = kdbus_conn_entry_sync_attach(dst, kmsg, wake);
+               ret = kdbus_conn_entry_sync_attach(dst, staging, wake);
        else
-               ret = kdbus_conn_entry_insert(src, dst, kmsg, NULL);
+               ret = kdbus_conn_entry_insert(src, dst, staging, NULL, name);
 
 exit:
        up_read(&bus->name_registry->rwlock);
@@ -1160,25 +1162,25 @@ exit:
 }
 
 static struct kdbus_reply *kdbus_conn_call(struct kdbus_conn *src,
-                                          struct kdbus_kmsg *kmsg,
+                                          struct kdbus_staging *staging,
                                           ktime_t exp)
 {
+       const struct kdbus_msg *msg = staging->msg;
        struct kdbus_name_entry *name = NULL;
        struct kdbus_reply *wait = NULL;
        struct kdbus_conn *dst = NULL;
        struct kdbus_bus *bus = src->ep->bus;
-       u64 attach;
        int ret;
 
-       if (WARN_ON(kmsg->msg.dst_id == KDBUS_DST_ID_BROADCAST) ||
-           WARN_ON(kmsg->msg.flags & KDBUS_MSG_SIGNAL) ||
-           WARN_ON(!(kmsg->msg.flags & KDBUS_MSG_EXPECT_REPLY)))
+       if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+           WARN_ON(msg->flags & KDBUS_MSG_SIGNAL) ||
+           WARN_ON(!(msg->flags & KDBUS_MSG_EXPECT_REPLY)))
                return ERR_PTR(-EINVAL);
 
        /* resume previous wait-context, if available */
 
        mutex_lock(&src->lock);
-       wait = kdbus_reply_find(NULL, src, kmsg->msg.cookie);
+       wait = kdbus_reply_find(NULL, src, msg->cookie);
        if (wait) {
                if (wait->interrupted) {
                        kdbus_reply_ref(wait);
@@ -1200,44 +1202,27 @@ static struct kdbus_reply *kdbus_conn_call(struct kdbus_conn *src,
 
        /* find and pin destination */
 
-       ret = kdbus_pin_dst(bus, kmsg, &name, &dst);
+       ret = kdbus_pin_dst(bus, staging, &name, &dst);
        if (ret < 0)
                goto exit;
 
-       /* Disable internal kdbus policy - possibilities of connections to own,
-        * see and talk to well-known names are restricted by libdbuspolicy
        if (!kdbus_conn_policy_talk(src, current_cred(), dst)) {
                ret = -EPERM;
                goto exit;
        }
-       */
 
-       wait = kdbus_reply_new(dst, src, &kmsg->msg, name, true);
+       wait = kdbus_reply_new(dst, src, msg, name, true);
        if (IS_ERR(wait)) {
                ret = PTR_ERR(wait);
                wait = NULL;
                goto exit;
        }
 
-       /* attach metadata */
-
-       attach = kdbus_meta_calc_attach_flags(src, dst);
-
-       if (!src->faked_meta) {
-               ret = kdbus_meta_proc_collect(kmsg->proc_meta, attach);
-               if (ret < 0)
-                       goto exit;
-       }
-
-       ret = kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, src, attach);
-       if (ret < 0)
-               goto exit;
-
        /* send message */
 
-       kdbus_bus_eavesdrop(bus, src, kmsg);
+       kdbus_bus_eavesdrop(bus, src, staging);
 
-       ret = kdbus_conn_entry_insert(src, dst, kmsg, wait);
+       ret = kdbus_conn_entry_insert(src, dst, staging, wait, name);
        if (ret < 0)
                goto exit;
 
@@ -1253,19 +1238,20 @@ exit:
        return wait;
 }
 
-static int kdbus_conn_unicast(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
+static int kdbus_conn_unicast(struct kdbus_conn *src,
+                             struct kdbus_staging *staging)
 {
+       const struct kdbus_msg *msg = staging->msg;
        struct kdbus_name_entry *name = NULL;
        struct kdbus_reply *wait = NULL;
        struct kdbus_conn *dst = NULL;
        struct kdbus_bus *bus = src->ep->bus;
-       bool is_signal = (kmsg->msg.flags & KDBUS_MSG_SIGNAL);
-       u64 attach;
+       bool is_signal = (msg->flags & KDBUS_MSG_SIGNAL);
        int ret = 0;
 
-       if (WARN_ON(kmsg->msg.dst_id == KDBUS_DST_ID_BROADCAST) ||
-           WARN_ON(!(kmsg->msg.flags & KDBUS_MSG_EXPECT_REPLY) &&
-                   kmsg->msg.cookie_reply != 0))
+       if (WARN_ON(msg->dst_id == KDBUS_DST_ID_BROADCAST) ||
+           WARN_ON(!(msg->flags & KDBUS_MSG_EXPECT_REPLY) &&
+                   msg->cookie_reply != 0))
                return -EINVAL;
 
        /* name-registry must be locked for lookup *and* collecting data */
@@ -1273,23 +1259,23 @@ static int kdbus_conn_unicast(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
 
        /* find and pin destination */
 
-       ret = kdbus_pin_dst(bus, kmsg, &name, &dst);
+       ret = kdbus_pin_dst(bus, staging, &name, &dst);
        if (ret < 0)
                goto exit;
 
        if (is_signal) {
                /* like broadcasts we eavesdrop even if the msg is dropped */
-               kdbus_bus_eavesdrop(bus, src, kmsg);
+               kdbus_bus_eavesdrop(bus, src, staging);
 
                /* drop silently if peer is not interested or not privileged */
-               if (!kdbus_match_db_match_kmsg(dst->match_db, src, kmsg) ||
+               if (!kdbus_match_db_match_msg(dst->match_db, src, staging) ||
                    !kdbus_conn_policy_talk(dst, NULL, src))
                        goto exit;
        } else if (!kdbus_conn_policy_talk(src, current_cred(), dst)) {
                ret = -EPERM;
                goto exit;
-       } else if (kmsg->msg.flags & KDBUS_MSG_EXPECT_REPLY) {
-               wait = kdbus_reply_new(dst, src, &kmsg->msg, name, false);
+       } else if (msg->flags & KDBUS_MSG_EXPECT_REPLY) {
+               wait = kdbus_reply_new(dst, src, msg, name, false);
                if (IS_ERR(wait)) {
                        ret = PTR_ERR(wait);
                        wait = NULL;
@@ -1297,26 +1283,12 @@ static int kdbus_conn_unicast(struct kdbus_conn *src, struct kdbus_kmsg *kmsg)
                }
        }
 
-       /* attach metadata */
-
-       attach = kdbus_meta_calc_attach_flags(src, dst);
-
-       if (!src->faked_meta) {
-               ret = kdbus_meta_proc_collect(kmsg->proc_meta, attach);
-               if (ret < 0 && !is_signal)
-                       goto exit;
-       }
-
-       ret = kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, src, attach);
-       if (ret < 0 && !is_signal)
-               goto exit;
-
        /* send message */
 
        if (!is_signal)
-               kdbus_bus_eavesdrop(bus, src, kmsg);
+               kdbus_bus_eavesdrop(bus, src, staging);
 
-       ret = kdbus_conn_entry_insert(src, dst, kmsg, wait);
+       ret = kdbus_conn_entry_insert(src, dst, staging, wait, name);
        if (ret < 0 && !is_signal)
                goto exit;
 
@@ -1386,7 +1358,7 @@ void kdbus_conn_move_messages(struct kdbus_conn *conn_dst,
                        continue;
 
                if (!(conn_dst->flags & KDBUS_HELLO_ACCEPT_FD) &&
-                   e->msg_res && e->msg_res->fds_count > 0) {
+                   e->gaps && e->gaps->n_fds > 0) {
                        kdbus_conn_lost_message(conn_dst);
                        kdbus_queue_entry_free(e);
                        continue;
@@ -1412,7 +1384,7 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn,
                                        struct kdbus_conn *whom,
                                        unsigned int access)
 {
-       struct kdbus_name_entry *ne;
+       struct kdbus_name_owner *owner;
        bool pass = false;
        int res;
 
@@ -1421,10 +1393,14 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn,
        down_read(&db->entries_rwlock);
        mutex_lock(&whom->lock);
 
-       list_for_each_entry(ne, &whom->names_list, conn_entry) {
-               res = kdbus_policy_query_unlocked(db, conn_creds ? : conn->cred,
-                                                 ne->name,
-                                                 kdbus_strhash(ne->name));
+       list_for_each_entry(owner, &whom->names_list, conn_entry) {
+               if (owner->flags & KDBUS_NAME_IN_QUEUE)
+                       continue;
+
+               res = kdbus_policy_query_unlocked(db,
+                                       conn_creds ? : conn->cred,
+                                       owner->name->name,
+                                       kdbus_strhash(owner->name->name));
                if (res >= (int)access) {
                        pass = true;
                        break;
@@ -1468,7 +1444,7 @@ bool kdbus_conn_policy_own_name(struct kdbus_conn *conn,
                        return false;
        }
 
-       if (conn->privileged)
+       if (conn->owner)
                return true;
 
        res = kdbus_policy_query(&conn->ep->bus->policy_db, conn_creds,
@@ -1503,7 +1479,7 @@ bool kdbus_conn_policy_talk(struct kdbus_conn *conn,
                                         to, KDBUS_POLICY_TALK))
                return false;
 
-       if (conn->privileged)
+       if (conn->owner)
                return true;
        if (uid_eq(conn_creds->euid, to->cred->uid))
                return true;
@@ -1588,19 +1564,16 @@ static bool kdbus_conn_policy_see(struct kdbus_conn *conn,
  *                                       receive a given kernel notification
  * @conn:              Connection
  * @conn_creds:                Credentials of @conn to use for policy check
- * @kmsg:              The message carrying the notification
+ * @msg:               Notification message
  *
- * This checks whether @conn is allowed to see the kernel notification @kmsg.
+ * This checks whether @conn is allowed to see the kernel notification.
  *
  * Return: true if allowed, false if not.
  */
 bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
                                        const struct cred *conn_creds,
-                                       const struct kdbus_kmsg *kmsg)
+                                       const struct kdbus_msg *msg)
 {
-       if (WARN_ON(kmsg->msg.src_id != KDBUS_SRC_ID_KERNEL))
-               return false;
-
        /*
         * Depending on the notification type, broadcasted kernel notifications
         * have to be filtered:
@@ -1609,26 +1582,24 @@ bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
         *     to a peer if, and only if, that peer can see the name this
         *     notification is for.
         *
-        * KDBUS_ITEM_ID_{ADD,REMOVE}: As new peers cannot have names, and all
-        *     names are dropped before a peer is removed, those notifications
-        *     cannot be seen on custom endpoints. Thus, we only pass them
-        *     through on default endpoints.
+        * KDBUS_ITEM_ID_{ADD,REMOVE}: Notifications for ID changes are
+        *     broadcast to everyone, to allow tracking peers.
         */
 
-       switch (kmsg->notify_type) {
+       switch (msg->items[0].type) {
        case KDBUS_ITEM_NAME_ADD:
        case KDBUS_ITEM_NAME_REMOVE:
        case KDBUS_ITEM_NAME_CHANGE:
                return kdbus_conn_policy_see_name(conn, conn_creds,
-                                                 kmsg->notify_name);
+                                       msg->items[0].name_change.name);
 
        case KDBUS_ITEM_ID_ADD:
        case KDBUS_ITEM_ID_REMOVE:
-               return !conn->ep->user;
+               return true;
 
        default:
                WARN(1, "Invalid type for notification broadcast: %llu\n",
-                    (unsigned long long)kmsg->notify_type);
+                    (unsigned long long)msg->items[0].type);
                return false;
        }
 }
@@ -1636,12 +1607,12 @@ bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
 /**
  * kdbus_cmd_hello() - handle KDBUS_CMD_HELLO
  * @ep:                        Endpoint to operate on
- * @privileged:                Whether the caller is privileged
+ * @file:              File this connection is opened on
  * @argp:              Command payload
  *
- * Return: Newly created connection on success, ERR_PTR on failure.
+ * Return: NULL or newly created connection on success, ERR_PTR on failure.
  */
-struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, bool privileged,
+struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, struct file *file,
                                   void __user *argp)
 {
        struct kdbus_cmd_hello *cmd;
@@ -1676,7 +1647,7 @@ struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, bool privileged,
 
        item_name = argv[1].item ? argv[1].item->str : NULL;
 
-       c = kdbus_conn_new(ep, privileged, cmd, item_name,
+       c = kdbus_conn_new(ep, file, cmd, item_name,
                           argv[2].item ? &argv[2].item->creds : NULL,
                           argv[3].item ? &argv[3].item->pids : NULL,
                           argv[4].item ? argv[4].item->str : NULL,
@@ -1726,7 +1697,7 @@ exit:
  *
  * The caller must not hold any active reference to @conn or this will deadlock.
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_byebye_unlocked(struct kdbus_conn *conn, void __user *argp)
 {
@@ -1758,21 +1729,23 @@ int kdbus_cmd_byebye_unlocked(struct kdbus_conn *conn, void __user *argp)
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
 {
        struct kdbus_meta_conn *conn_meta = NULL;
        struct kdbus_pool_slice *slice = NULL;
        struct kdbus_name_entry *entry = NULL;
+       struct kdbus_name_owner *owner = NULL;
        struct kdbus_conn *owner_conn = NULL;
+       struct kdbus_item *meta_items = NULL;
        struct kdbus_info info = {};
        struct kdbus_cmd_info *cmd;
        struct kdbus_bus *bus = conn->ep->bus;
-       struct kvec kvec;
-       size_t meta_size;
+       struct kvec kvec[3];
+       size_t meta_size, cnt = 0;
        const char *name;
-       u64 attach_flags;
+       u64 attach_flags, size = 0;
        int ret;
 
        struct kdbus_arg argv[] = {
@@ -1800,15 +1773,17 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
 
        if (name) {
                entry = kdbus_name_lookup_unlocked(bus->name_registry, name);
-               if (!entry || !entry->conn ||
+               if (entry)
+                       owner = kdbus_name_get_owner(entry);
+               if (!owner ||
                    !kdbus_conn_policy_see_name(conn, current_cred(), name) ||
-                   (cmd->id != 0 && entry->conn->id != cmd->id)) {
+                   (cmd->id != 0 && owner->conn->id != cmd->id)) {
                        /* pretend a name doesn't exist if you cannot see it */
                        ret = -ESRCH;
                        goto exit;
                }
 
-               owner_conn = kdbus_conn_ref(entry->conn);
+               owner_conn = kdbus_conn_ref(owner->conn);
        } else if (cmd->id > 0) {
                owner_conn = kdbus_bus_find_conn_by_id(bus, cmd->id);
                if (!owner_conn || !kdbus_conn_policy_see(conn, current_cred(),
@@ -1822,10 +1797,6 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
                goto exit;
        }
 
-       info.id = owner_conn->id;
-       info.flags = owner_conn->flags;
-       kdbus_kvec_set(&kvec, &info, sizeof(info), &info.size);
-
        attach_flags &= atomic64_read(&owner_conn->attach_flags_send);
 
        conn_meta = kdbus_meta_conn_new();
@@ -1835,32 +1806,35 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
                goto exit;
        }
 
-       ret = kdbus_meta_conn_collect(conn_meta, NULL, owner_conn,
-                                     attach_flags);
+       ret = kdbus_meta_conn_collect(conn_meta, owner_conn, 0, attach_flags);
        if (ret < 0)
                goto exit;
 
-       ret = kdbus_meta_export_prepare(owner_conn->meta, conn_meta,
-                                       &attach_flags, &meta_size);
+       ret = kdbus_meta_emit(owner_conn->meta_proc, owner_conn->meta_fake,
+                             conn_meta, conn, attach_flags,
+                             &meta_items, &meta_size);
        if (ret < 0)
                goto exit;
 
-       slice = kdbus_pool_slice_alloc(conn->pool,
-                                      info.size + meta_size, false);
+       info.id = owner_conn->id;
+       info.flags = owner_conn->flags;
+
+       kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &size);
+       if (meta_size > 0) {
+               kdbus_kvec_set(&kvec[cnt++], meta_items, meta_size, &size);
+               cnt += !!kdbus_kvec_pad(&kvec[cnt], &size);
+       }
+
+       info.size = size;
+
+       slice = kdbus_pool_slice_alloc(conn->pool, size, false);
        if (IS_ERR(slice)) {
                ret = PTR_ERR(slice);
                slice = NULL;
                goto exit;
        }
 
-       ret = kdbus_meta_export(owner_conn->meta, conn_meta, attach_flags,
-                               slice, sizeof(info), &meta_size);
-       if (ret < 0)
-               goto exit;
-
-       info.size += meta_size;
-
-       ret = kdbus_pool_slice_copy_kvec(slice, 0, &kvec, 1, sizeof(info));
+       ret = kdbus_pool_slice_copy_kvec(slice, 0, kvec, cnt, size);
        if (ret < 0)
                goto exit;
 
@@ -1878,6 +1852,7 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
 exit:
        up_read(&bus->name_registry->rwlock);
        kdbus_pool_slice_release(slice);
+       kfree(meta_items);
        kdbus_meta_conn_unref(conn_meta);
        kdbus_conn_unref(owner_conn);
        return kdbus_args_clear(&args, ret);
@@ -1888,11 +1863,10 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_update(struct kdbus_conn *conn, void __user *argp)
 {
-       struct kdbus_bus *bus = conn->ep->bus;
        struct kdbus_item *item_policy;
        u64 *item_attach_send = NULL;
        u64 *item_attach_recv = NULL;
@@ -1933,11 +1907,6 @@ int kdbus_cmd_update(struct kdbus_conn *conn, void __user *argp)
                                                  &attach_send);
                if (ret < 0)
                        goto exit;
-
-               if (bus->attach_flags_req & ~attach_send) {
-                       ret = -EINVAL;
-                       goto exit;
-               }
        }
 
        if (item_attach_recv) {
@@ -1985,15 +1954,17 @@ exit:
  * @f:                 file this command was called on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
 {
        struct kdbus_cmd_send *cmd;
-       struct kdbus_kmsg *kmsg = NULL;
+       struct kdbus_staging *staging = NULL;
+       struct kdbus_msg *msg = NULL;
        struct file *cancel_fd = NULL;
-       int ret;
+       int ret, ret2;
 
+       /* command arguments */
        struct kdbus_arg argv[] = {
                { .type = KDBUS_ITEM_NEGOTIATE },
                { .type = KDBUS_ITEM_CANCEL_FD },
@@ -2005,12 +1976,48 @@ int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
                .argc = ARRAY_SIZE(argv),
        };
 
+       /* message arguments */
+       struct kdbus_arg msg_argv[] = {
+               { .type = KDBUS_ITEM_NEGOTIATE },
+               { .type = KDBUS_ITEM_PAYLOAD_VEC, .multiple = true },
+               { .type = KDBUS_ITEM_PAYLOAD_MEMFD, .multiple = true },
+               { .type = KDBUS_ITEM_FDS },
+               { .type = KDBUS_ITEM_BLOOM_FILTER },
+               { .type = KDBUS_ITEM_DST_NAME },
+       };
+       struct kdbus_args msg_args = {
+               .allowed_flags = KDBUS_FLAG_NEGOTIATE |
+                                KDBUS_MSG_EXPECT_REPLY |
+                                KDBUS_MSG_NO_AUTO_START |
+                                KDBUS_MSG_SIGNAL,
+               .argv = msg_argv,
+               .argc = ARRAY_SIZE(msg_argv),
+       };
+
        if (!kdbus_conn_is_ordinary(conn))
                return -EOPNOTSUPP;
 
+       /* make sure to parse both, @cmd and @msg on negotiation */
+
        ret = kdbus_args_parse(&args, argp, &cmd);
-       if (ret != 0)
-               return ret;
+       if (ret < 0)
+               goto exit;
+       else if (ret > 0 && !cmd->msg_address) /* negotiation without msg */
+               goto exit;
+
+       ret2 = kdbus_args_parse_msg(&msg_args, KDBUS_PTR(cmd->msg_address),
+                                   &msg);
+       if (ret2 < 0) { /* cannot parse message */
+               ret = ret2;
+               goto exit;
+       } else if (ret2 > 0 && !ret) { /* msg-negot implies cmd-negot */
+               ret = -EINVAL;
+               goto exit;
+       } else if (ret > 0) { /* negotiation */
+               goto exit;
+       }
+
+       /* here we parsed both, @cmd and @msg, and neither wants negotiation */
 
        cmd->reply.return_flags = 0;
        kdbus_pool_publish_empty(conn->pool, &cmd->reply.offset,
@@ -2029,23 +2036,30 @@ int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
                }
        }
 
-       kmsg = kdbus_kmsg_new_from_cmd(conn, cmd);
-       if (IS_ERR(kmsg)) {
-               ret = PTR_ERR(kmsg);
-               kmsg = NULL;
+       /* patch-in the source of this message */
+       if (msg->src_id > 0 && msg->src_id != conn->id) {
+               ret = -EINVAL;
+               goto exit;
+       }
+       msg->src_id = conn->id;
+
+       staging = kdbus_staging_new_user(conn->ep->bus, cmd, msg);
+       if (IS_ERR(staging)) {
+               ret = PTR_ERR(staging);
+               staging = NULL;
                goto exit;
        }
 
-       if (kmsg->msg.dst_id == KDBUS_DST_ID_BROADCAST) {
+       if (msg->dst_id == KDBUS_DST_ID_BROADCAST) {
                down_read(&conn->ep->bus->name_registry->rwlock);
-               kdbus_bus_broadcast(conn->ep->bus, conn, kmsg);
+               kdbus_bus_broadcast(conn->ep->bus, conn, staging);
                up_read(&conn->ep->bus->name_registry->rwlock);
        } else if (cmd->flags & KDBUS_SEND_SYNC_REPLY) {
                struct kdbus_reply *r;
                ktime_t exp;
 
-               exp = ns_to_ktime(kmsg->msg.timeout_ns);
-               r = kdbus_conn_call(conn, kmsg, exp);
+               exp = ns_to_ktime(msg->timeout_ns);
+               r = kdbus_conn_call(conn, staging, exp);
                if (IS_ERR(r)) {
                        ret = PTR_ERR(r);
                        goto exit;
@@ -2055,13 +2069,13 @@ int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
                kdbus_reply_unref(r);
                if (ret < 0)
                        goto exit;
-       } else if ((kmsg->msg.flags & KDBUS_MSG_EXPECT_REPLY) ||
-                  kmsg->msg.cookie_reply == 0) {
-               ret = kdbus_conn_unicast(conn, kmsg);
+       } else if ((msg->flags & KDBUS_MSG_EXPECT_REPLY) ||
+                  msg->cookie_reply == 0) {
+               ret = kdbus_conn_unicast(conn, staging);
                if (ret < 0)
                        goto exit;
        } else {
-               ret = kdbus_conn_reply(conn, kmsg);
+               ret = kdbus_conn_reply(conn, staging);
                if (ret < 0)
                        goto exit;
        }
@@ -2072,7 +2086,8 @@ int kdbus_cmd_send(struct kdbus_conn *conn, struct file *f, void __user *argp)
 exit:
        if (cancel_fd)
                fput(cancel_fd);
-       kdbus_kmsg_free(kmsg);
+       kdbus_staging_free(staging);
+       ret = kdbus_args_clear(&msg_args, ret);
        return kdbus_args_clear(&args, ret);
 }
 
@@ -2081,7 +2096,7 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_recv(struct kdbus_conn *conn, void __user *argp)
 {
@@ -2204,7 +2219,7 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_free(struct kdbus_conn *conn, void __user *argp)
 {
index d1ffe90..1ad0820 100644 (file)
@@ -30,8 +30,9 @@
                                         KDBUS_HELLO_POLICY_HOLDER | \
                                         KDBUS_HELLO_MONITOR)
 
+struct kdbus_name_entry;
 struct kdbus_quota;
-struct kdbus_kmsg;
+struct kdbus_staging;
 
 /**
  * struct kdbus_conn - connection to a bus
@@ -54,12 +55,13 @@ struct kdbus_kmsg;
  * @work:              Delayed work to handle timeouts
  *                     activator for
  * @match_db:          Subscription filter to broadcast messages
- * @meta:              Active connection creator's metadata/credentials,
- *                     either from the handle or from HELLO
+ * @meta_proc:         Process metadata of connection creator, or NULL
+ * @meta_fake:         Faked metadata, or NULL
  * @pool:              The user's buffer to receive messages
  * @user:              Owner of the connection
  * @cred:              The credentials of the connection at creation time
- * @name_count:                Number of owned well-known names
+ * @pid:               Pid at creation time
+ * @root_path:         Root path at creation time
  * @request_count:     Number of pending requests issued by this
  *                     connection that are waiting for replies from
  *                     other peers
@@ -68,11 +70,10 @@ struct kdbus_kmsg;
  * @queue:             The message queue associated with this connection
  * @quota:             Array of per-user quota indexed by user->id
  * @n_quota:           Number of elements in quota array
- * @activator_of:      Well-known name entry this connection acts as an
  * @names_list:                List of well-known names
- * @names_queue_list:  Well-known names this connection waits for
- * @privileged:                Whether this connection is privileged on the bus
- * @faked_meta:                Whether the metadata was faked on HELLO
+ * @name_count:                Number of owned well-known names
+ * @privileged:                Whether this connection is privileged on the domain
+ * @owner:             Owned by the same user as the bus owner
  */
 struct kdbus_conn {
        struct kref kref;
@@ -93,11 +94,13 @@ struct kdbus_conn {
        struct list_head reply_list;
        struct delayed_work work;
        struct kdbus_match_db *match_db;
-       struct kdbus_meta_proc *meta;
+       struct kdbus_meta_proc *meta_proc;
+       struct kdbus_meta_fake *meta_fake;
        struct kdbus_pool *pool;
        struct kdbus_user *user;
        const struct cred *cred;
-       atomic_t name_count;
+       struct pid *pid;
+       struct path root_path;
        atomic_t request_count;
        atomic_t lost_count;
        wait_queue_head_t wait;
@@ -107,12 +110,11 @@ struct kdbus_conn {
        unsigned int n_quota;
 
        /* protected by registry->rwlock */
-       struct kdbus_name_entry *activator_of;
        struct list_head names_list;
-       struct list_head names_queue_list;
+       unsigned int name_count;
 
        bool privileged:1;
-       bool faked_meta:1;
+       bool owner:1;
 };
 
 struct kdbus_conn *kdbus_conn_ref(struct kdbus_conn *conn);
@@ -129,8 +131,9 @@ void kdbus_conn_quota_dec(struct kdbus_conn *c, struct kdbus_user *u,
 void kdbus_conn_lost_message(struct kdbus_conn *c);
 int kdbus_conn_entry_insert(struct kdbus_conn *conn_src,
                            struct kdbus_conn *conn_dst,
-                           const struct kdbus_kmsg *kmsg,
-                           struct kdbus_reply *reply);
+                           struct kdbus_staging *staging,
+                           struct kdbus_reply *reply,
+                           const struct kdbus_name_entry *name);
 void kdbus_conn_move_messages(struct kdbus_conn *conn_dst,
                              struct kdbus_conn *conn_src,
                              u64 name_id);
@@ -147,10 +150,10 @@ bool kdbus_conn_policy_see_name_unlocked(struct kdbus_conn *conn,
                                         const char *name);
 bool kdbus_conn_policy_see_notification(struct kdbus_conn *conn,
                                        const struct cred *curr_creds,
-                                       const struct kdbus_kmsg *kmsg);
+                                       const struct kdbus_msg *msg);
 
 /* command dispatcher */
-struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, bool privileged,
+struct kdbus_conn *kdbus_cmd_hello(struct kdbus_ep *ep, struct file *file,
                                   void __user *argp);
 int kdbus_cmd_byebye_unlocked(struct kdbus_conn *conn, void __user *argp);
 int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp);
index 174d274..44e7a20 100644 (file)
@@ -78,7 +78,7 @@ static void kdbus_ep_release(struct kdbus_node *node, bool was_active)
  * @gid:               The gid of the node
  * @is_custom:         Whether this is a custom endpoint
  *
- * This function will create a new enpoint with the given
+ * This function will create a new endpoint with the given
  * name and properties for a given bus.
  *
  * Return: a new kdbus_ep on success, ERR_PTR on failure.
@@ -184,11 +184,39 @@ struct kdbus_ep *kdbus_ep_unref(struct kdbus_ep *ep)
 }
 
 /**
+ * kdbus_ep_is_privileged() - check whether a file is privileged
+ * @ep:                endpoint to operate on
+ * @file:      file to test
+ *
+ * Return: True if @file is privileged in the domain of @ep.
+ */
+bool kdbus_ep_is_privileged(struct kdbus_ep *ep, struct file *file)
+{
+       return !ep->user &&
+               file_ns_capable(file, ep->bus->domain->user_namespace,
+                               CAP_IPC_OWNER);
+}
+
+/**
+ * kdbus_ep_is_owner() - check whether a file should be treated as bus owner
+ * @ep:                endpoint to operate on
+ * @file:      file to test
+ *
+ * Return: True if @file should be treated as bus owner on @ep
+ */
+bool kdbus_ep_is_owner(struct kdbus_ep *ep, struct file *file)
+{
+       return !ep->user &&
+               (uid_eq(file->f_cred->euid, ep->bus->node.uid) ||
+                kdbus_ep_is_privileged(ep, file));
+}
+
+/**
  * kdbus_cmd_ep_make() - handle KDBUS_CMD_ENDPOINT_MAKE
  * @bus:               bus to operate on
  * @argp:              command payload
  *
- * Return: Newly created endpoint on success, ERR_PTR on failure.
+ * Return: NULL or newly created endpoint on success, ERR_PTR on failure.
  */
 struct kdbus_ep *kdbus_cmd_ep_make(struct kdbus_bus *bus, void __user *argp)
 {
@@ -247,7 +275,7 @@ exit:
  * @ep:                        endpoint to operate on
  * @argp:              command payload
  *
- * Return: Newly created endpoint on success, ERR_PTR on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_ep_update(struct kdbus_ep *ep, void __user *argp)
 {
index d31954b..e0da59f 100644 (file)
@@ -25,7 +25,7 @@ struct kdbus_bus;
 struct kdbus_user;
 
 /**
- * struct kdbus_ep - enpoint to access a bus
+ * struct kdbus_ep - endpoint to access a bus
  * @node:              The kdbus node
  * @lock:              Endpoint data lock
  * @bus:               Bus behind this endpoint
@@ -33,7 +33,7 @@ struct kdbus_user;
  * @policy_db:         Uploaded policy
  * @conn_list:         Connections of this endpoint
  *
- * An enpoint offers access to a bus; the default endpoint node name is "bus".
+ * An endpoint offers access to a bus; the default endpoint node name is "bus".
  * Additional custom endpoints to the same bus can be created and they can
  * carry their own policies/filters.
  */
@@ -61,6 +61,9 @@ struct kdbus_ep *kdbus_ep_new(struct kdbus_bus *bus, const char *name,
 struct kdbus_ep *kdbus_ep_ref(struct kdbus_ep *ep);
 struct kdbus_ep *kdbus_ep_unref(struct kdbus_ep *ep);
 
+bool kdbus_ep_is_privileged(struct kdbus_ep *ep, struct file *file);
+bool kdbus_ep_is_owner(struct kdbus_ep *ep, struct file *file);
+
 struct kdbus_ep *kdbus_cmd_ep_make(struct kdbus_bus *bus, void __user *argp);
 int kdbus_cmd_ep_update(struct kdbus_ep *ep, void __user *argp);
 
index d01f33b..09c4809 100644 (file)
@@ -74,7 +74,7 @@ static int fs_dir_fop_iterate(struct file *file, struct dir_context *ctx)
         * closest node to that position and cannot use our node pointer. This
         * means iterating the rb-tree to find the closest match and start over
         * from there.
-        * Note that hash values are not neccessarily unique. Therefore, llseek
+        * Note that hash values are not necessarily unique. Therefore, llseek
         * is not guaranteed to seek to the same node that you got when you
         * retrieved the position. Seeking to 0, 1, 2 and >=INT_MAX is safe,
         * though. We could use the inode-number as position, but this would
@@ -325,9 +325,7 @@ static void fs_super_kill(struct super_block *sb)
        }
 
        kill_anon_super(sb);
-
-       if (domain)
-               kdbus_domain_unref(domain);
+       kdbus_domain_unref(domain);
 }
 
 static int fs_super_set(struct super_block *sb, void *data)
index f72dbe5..fc60932 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/kdev_t.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h>
@@ -71,10 +72,6 @@ static int kdbus_args_verify(struct kdbus_args *args)
        if (!KDBUS_ITEMS_END(item, args->items, args->items_size))
                return -EINVAL;
 
-       for (i = 0; i < args->argc; ++i)
-               if (args->argv[i].mandatory && !args->argv[i].item)
-                       return -EINVAL;
-
        return 0;
 }
 
@@ -129,6 +126,7 @@ static int kdbus_args_negotiate(struct kdbus_args *args)
 /**
  * __kdbus_args_parse() - parse payload of kdbus command
  * @args:              object to parse data into
+ * @is_cmd:            whether this is a command or msg payload
  * @argp:              user-space location of command payload to parse
  * @type_size:         overall size of command payload to parse
  * @items_offset:      offset of items array in command payload
@@ -143,22 +141,49 @@ static int kdbus_args_negotiate(struct kdbus_args *args)
  * If this function succeeded, you must call kdbus_args_clear() to release
  * allocated resources before destroying @args.
  *
+ * This can also be used to import kdbus_msg objects. In that case, @is_cmd must
+ * be set to 'false' and the 'return_flags' field will not be touched (as it
+ * doesn't exist on kdbus_msg).
+ *
  * Return: On failure a negative error code is returned. Otherwise, 1 is
  * returned if negotiation was requested, 0 if not.
  */
-int __kdbus_args_parse(struct kdbus_args *args, void __user *argp,
+int __kdbus_args_parse(struct kdbus_args *args, bool is_cmd, void __user *argp,
                       size_t type_size, size_t items_offset, void **out)
 {
-       int ret;
+       u64 user_size;
+       int ret, i;
+
+       ret = kdbus_copy_from_user(&user_size, argp, sizeof(user_size));
+       if (ret < 0)
+               return ret;
 
-       args->cmd = kdbus_memdup_user(argp, type_size, KDBUS_CMD_MAX_SIZE);
-       if (IS_ERR(args->cmd))
-               return PTR_ERR(args->cmd);
+       if (user_size < type_size)
+               return -EINVAL;
+       if (user_size > KDBUS_CMD_MAX_SIZE)
+               return -EMSGSIZE;
+
+       if (user_size <= sizeof(args->cmd_buf)) {
+               if (copy_from_user(args->cmd_buf, argp, user_size))
+                       return -EFAULT;
+               args->cmd = (void*)args->cmd_buf;
+       } else {
+               args->cmd = memdup_user(argp, user_size);
+               if (IS_ERR(args->cmd))
+                       return PTR_ERR(args->cmd);
+       }
+
+       if (args->cmd->size != user_size) {
+               ret = -EINVAL;
+               goto error;
+       }
 
-       args->cmd->return_flags = 0;
+       if (is_cmd)
+               args->cmd->return_flags = 0;
        args->user = argp;
        args->items = (void *)((u8 *)args->cmd + items_offset);
        args->items_size = args->cmd->size - items_offset;
+       args->is_cmd = is_cmd;
 
        if (args->cmd->flags & ~args->allowed_flags) {
                ret = -EINVAL;
@@ -173,6 +198,15 @@ int __kdbus_args_parse(struct kdbus_args *args, void __user *argp,
        if (ret < 0)
                goto error;
 
+       /* mandatory items must be given (but not on negotiation) */
+       if (!(args->cmd->flags & KDBUS_FLAG_NEGOTIATE)) {
+               for (i = 0; i < args->argc; ++i)
+                       if (args->argv[i].mandatory && !args->argv[i].item) {
+                               ret = -EINVAL;
+                               goto error;
+                       }
+       }
+
        *out = args->cmd;
        return !!(args->cmd->flags & KDBUS_FLAG_NEGOTIATE);
 
@@ -198,10 +232,11 @@ int kdbus_args_clear(struct kdbus_args *args, int ret)
                return ret;
 
        if (!IS_ERR_OR_NULL(args->cmd)) {
-               if (put_user(args->cmd->return_flags,
-                            &args->user->return_flags))
+               if (args->is_cmd && put_user(args->cmd->return_flags,
+                                            &args->user->return_flags))
                        ret = -EFAULT;
-               kfree(args->cmd);
+               if (args->cmd != (void*)args->cmd_buf)
+                       kfree(args->cmd);
                args->cmd = NULL;
        }
 
@@ -224,15 +259,14 @@ enum kdbus_handle_type {
 
 /**
  * struct kdbus_handle - handle to the kdbus system
- * @rwlock:            handle lock
+ * @lock:              handle lock
  * @type:              type of this handle (KDBUS_HANDLE_*)
  * @bus_owner:         bus this handle owns
  * @ep_owner:          endpoint this handle owns
  * @conn:              connection this handle owns
- * @privileged:                Flag to mark a handle as privileged
  */
 struct kdbus_handle {
-       struct rw_semaphore rwlock;
+       struct mutex lock;
 
        enum kdbus_handle_type type;
        union {
@@ -240,8 +274,6 @@ struct kdbus_handle {
                struct kdbus_ep *ep_owner;
                struct kdbus_conn *conn;
        };
-
-       bool privileged:1;
 };
 
 static int kdbus_handle_open(struct inode *inode, struct file *file)
@@ -260,26 +292,9 @@ static int kdbus_handle_open(struct inode *inode, struct file *file)
                goto exit;
        }
 
-       init_rwsem(&handle->rwlock);
+       mutex_init(&handle->lock);
        handle->type = KDBUS_HANDLE_NONE;
 
-       if (node->type == KDBUS_NODE_ENDPOINT) {
-               struct kdbus_ep *ep = kdbus_ep_from_node(node);
-               struct kdbus_bus *bus = ep->bus;
-
-               /*
-                * A connection is privileged if it is opened on an endpoint
-                * without custom policy and either:
-                *   * the user has CAP_IPC_OWNER in the domain user namespace
-                * or
-                *   * the callers euid matches the uid of the bus creator
-                */
-               if (!ep->user &&
-                   (ns_capable(bus->domain->user_namespace, CAP_IPC_OWNER) ||
-                    uid_eq(file->f_cred->euid, bus->node.uid)))
-                       handle->privileged = true;
-       }
-
        file->private_data = handle;
        ret = 0;
 
@@ -350,8 +365,8 @@ static long kdbus_handle_ioctl_control(struct file *file, unsigned int cmd,
                        break;
                }
 
-               handle->type = KDBUS_HANDLE_BUS_OWNER;
                handle->bus_owner = bus;
+               ret = KDBUS_HANDLE_BUS_OWNER;
                break;
        }
 
@@ -371,6 +386,7 @@ static long kdbus_handle_ioctl_ep(struct file *file, unsigned int cmd,
        struct kdbus_handle *handle = file->private_data;
        struct kdbus_node *node = file_inode(file)->i_private;
        struct kdbus_ep *ep, *file_ep = kdbus_ep_from_node(node);
+       struct kdbus_bus *bus = file_ep->bus;
        struct kdbus_conn *conn;
        int ret = 0;
 
@@ -378,32 +394,33 @@ static long kdbus_handle_ioctl_ep(struct file *file, unsigned int cmd,
                return -ESHUTDOWN;
 
        switch (cmd) {
-       case KDBUS_CMD_ENDPOINT_MAKE:
+       case KDBUS_CMD_ENDPOINT_MAKE: {
                /* creating custom endpoints is a privileged operation */
-               if (!handle->privileged) {
+               if (!kdbus_ep_is_owner(file_ep, file)) {
                        ret = -EPERM;
                        break;
                }
 
-               ep = kdbus_cmd_ep_make(file_ep->bus, buf);
+               ep = kdbus_cmd_ep_make(bus, buf);
                if (IS_ERR_OR_NULL(ep)) {
                        ret = PTR_ERR_OR_ZERO(ep);
                        break;
                }
 
-               handle->type = KDBUS_HANDLE_EP_OWNER;
                handle->ep_owner = ep;
+               ret = KDBUS_HANDLE_EP_OWNER;
                break;
+       }
 
        case KDBUS_CMD_HELLO:
-               conn = kdbus_cmd_hello(file_ep, handle->privileged, buf);
+               conn = kdbus_cmd_hello(file_ep, file, buf);
                if (IS_ERR_OR_NULL(conn)) {
                        ret = PTR_ERR_OR_ZERO(conn);
                        break;
                }
 
-               handle->type = KDBUS_HANDLE_CONNECTED;
                handle->conn = conn;
+               ret = KDBUS_HANDLE_CONNECTED;
                break;
 
        default:
@@ -517,19 +534,41 @@ static long kdbus_handle_ioctl(struct file *file, unsigned int cmd,
        case KDBUS_CMD_BUS_MAKE:
        case KDBUS_CMD_ENDPOINT_MAKE:
        case KDBUS_CMD_HELLO:
-               /* bail out early if already typed */
-               if (handle->type != KDBUS_HANDLE_NONE)
-                       break;
-
-               down_write(&handle->rwlock);
+               mutex_lock(&handle->lock);
                if (handle->type == KDBUS_HANDLE_NONE) {
                        if (node->type == KDBUS_NODE_CONTROL)
                                ret = kdbus_handle_ioctl_control(file, cmd,
                                                                 argp);
                        else if (node->type == KDBUS_NODE_ENDPOINT)
                                ret = kdbus_handle_ioctl_ep(file, cmd, argp);
+
+                       if (ret > 0) {
+                               /*
+                                * The data given via open() is not sufficient
+                                * to setup a kdbus handle. Hence, we require
+                                * the user to perform a setup ioctl. This setup
+                                * can only be performed once and defines the
+                                * type of the handle. The different setup
+                                * ioctls are locked against each other so they
+                                * cannot race. Once the handle type is set,
+                                * the type-dependent ioctls are enabled. To
+                                * improve performance, we don't lock those via
+                                * handle->lock. Instead, we issue a
+                                * write-barrier before performing the
+                                * type-change, which pairs with smp_rmb() in
+                                * all handlers that access the type field. This
+                                * guarantees the handle is fully setup, if
+                                * handle->type is set. If handle->type is
+                                * unset, you must not make any assumptions
+                                * without taking handle->lock.
+                                * Note that handle->type is only set once. It
+                                * will never change afterwards.
+                                */
+                               smp_wmb();
+                               handle->type = ret;
+                       }
                }
-               up_write(&handle->rwlock);
+               mutex_unlock(&handle->lock);
                break;
 
        case KDBUS_CMD_ENDPOINT_UPDATE:
@@ -544,14 +583,30 @@ static long kdbus_handle_ioctl(struct file *file, unsigned int cmd,
        case KDBUS_CMD_MATCH_REMOVE:
        case KDBUS_CMD_SEND:
        case KDBUS_CMD_RECV:
-       case KDBUS_CMD_FREE:
-               down_read(&handle->rwlock);
-               if (handle->type == KDBUS_HANDLE_EP_OWNER)
+       case KDBUS_CMD_FREE: {
+               enum kdbus_handle_type type;
+
+               /*
+                * This read-barrier pairs with smp_wmb() of the handle setup.
+                * it guarantees the handle is fully written, in case the
+                * type has been set. It allows us to access the handle without
+                * taking handle->lock, given the guarantee that the type is
+                * only ever set once, and stays constant afterwards.
+                * Furthermore, the handle object itself is not modified in any
+                * way after the type is set. That is, the type-field is the
+                * last field that is written on any handle. If it has not been
+                * set, we must not access the handle here.
+                */
+               type = handle->type;
+               smp_rmb();
+
+               if (type == KDBUS_HANDLE_EP_OWNER)
                        ret = kdbus_handle_ioctl_ep_owner(file, cmd, argp);
-               else if (handle->type == KDBUS_HANDLE_CONNECTED)
+               else if (type == KDBUS_HANDLE_CONNECTED)
                        ret = kdbus_handle_ioctl_connected(file, cmd, argp);
-               up_read(&handle->rwlock);
+
                break;
+       }
        default:
                ret = -ENOTTY;
                break;
@@ -564,42 +619,61 @@ static unsigned int kdbus_handle_poll(struct file *file,
                                      struct poll_table_struct *wait)
 {
        struct kdbus_handle *handle = file->private_data;
+       enum kdbus_handle_type type;
        unsigned int mask = POLLOUT | POLLWRNORM;
-       int ret;
 
-       /* Only a connected endpoint can read/write data */
-       down_read(&handle->rwlock);
-       if (handle->type != KDBUS_HANDLE_CONNECTED) {
-               up_read(&handle->rwlock);
-               return POLLERR | POLLHUP;
-       }
-       up_read(&handle->rwlock);
+       /*
+        * This pairs with smp_wmb() during handle setup. It guarantees that
+        * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+        * _iff_ the type is set, the handle object is constant and never
+        * changed again. If it's not set, we must not access the handle but
+        * bail out. We also must assume no setup has taken place, yet.
+        */
+       type = handle->type;
+       smp_rmb();
 
-       ret = kdbus_conn_acquire(handle->conn);
-       if (ret < 0)
+       /* Only a connected endpoint can read/write data */
+       if (type != KDBUS_HANDLE_CONNECTED)
                return POLLERR | POLLHUP;
 
        poll_wait(file, &handle->conn->wait, wait);
 
+       /*
+        * Verify the connection hasn't been deactivated _after_ adding the
+        * wait-queue. This guarantees, that if the connection is deactivated
+        * after we checked it, the waitqueue is signaled and we're called
+        * again.
+        */
+       if (!kdbus_conn_active(handle->conn))
+               return POLLERR | POLLHUP;
+
        if (!list_empty(&handle->conn->queue.msg_list) ||
            atomic_read(&handle->conn->lost_count) > 0)
                mask |= POLLIN | POLLRDNORM;
 
-       kdbus_conn_release(handle->conn);
-
        return mask;
 }
 
 static int kdbus_handle_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct kdbus_handle *handle = file->private_data;
+       enum kdbus_handle_type type;
        int ret = -EBADFD;
 
-       if (down_read_trylock(&handle->rwlock)) {
-               if (handle->type == KDBUS_HANDLE_CONNECTED)
-                       ret = kdbus_pool_mmap(handle->conn->pool, vma);
-               up_read(&handle->rwlock);
-       }
+       /*
+        * This pairs with smp_wmb() during handle setup. It guarantees that
+        * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+        * _iff_ the type is set, the handle object is constant and never
+        * changed again. If it's not set, we must not access the handle but
+        * bail out. We also must assume no setup has taken place, yet.
+        */
+       type = handle->type;
+       smp_rmb();
+
+       /* Only connected handles have a pool we can map */
+       if (type == KDBUS_HANDLE_CONNECTED)
+               ret = kdbus_pool_mmap(handle->conn->pool, vma);
+
        return ret;
 }
 
index 93a372d..5dde2c1 100644 (file)
@@ -45,13 +45,18 @@ struct kdbus_arg {
  * @argv:              array of items this command supports
  * @user:              set by parser to user-space location of current command
  * @cmd:               set by parser to kernel copy of command payload
+ * @cmd_buf:           inline buf to avoid kmalloc() on small cmds
  * @items:             points to item array in @cmd
  * @items_size:                size of @items in bytes
+ * @is_cmd:            whether this is a command-payload or msg-payload
  *
  * This structure is used to parse ioctl command payloads on each invocation.
  * The ioctl handler has to pre-fill the flags and allowed items before passing
  * the object to kdbus_args_parse(). The parser will copy the command payload
  * into kernel-space and verify the correctness of the data.
+ *
+ * We use a 256 bytes buffer for small command payloads, to be allocated on
+ * stack on syscall entrance.
  */
 struct kdbus_args {
        u64 allowed_flags;
@@ -60,12 +65,14 @@ struct kdbus_args {
 
        struct kdbus_cmd __user *user;
        struct kdbus_cmd *cmd;
+       u8 cmd_buf[256];
 
        struct kdbus_item *items;
        size_t items_size;
+       bool is_cmd : 1;
 };
 
-int __kdbus_args_parse(struct kdbus_args *args, void __user *argp,
+int __kdbus_args_parse(struct kdbus_args *args, bool is_cmd, void __user *argp,
                       size_t type_size, size_t items_offset, void **out);
 int kdbus_args_clear(struct kdbus_args *args, int ret);
 
@@ -77,7 +84,18 @@ int kdbus_args_clear(struct kdbus_args *args, int ret);
                             offsetof(struct kdbus_cmd, flags));        \
                BUILD_BUG_ON(offsetof(typeof(**(_v)), return_flags) !=  \
                             offsetof(struct kdbus_cmd, return_flags)); \
-               __kdbus_args_parse((_args), (_argp), sizeof(**(_v)),    \
+               __kdbus_args_parse((_args), 1, (_argp), sizeof(**(_v)), \
+                                  offsetof(typeof(**(_v)), items),     \
+                                  (void **)(_v));                      \
+       })
+
+#define kdbus_args_parse_msg(_args, _argp, _v)                          \
+       ({                                                              \
+               BUILD_BUG_ON(offsetof(typeof(**(_v)), size) !=          \
+                            offsetof(struct kdbus_cmd, size));         \
+               BUILD_BUG_ON(offsetof(typeof(**(_v)), flags) !=         \
+                            offsetof(struct kdbus_cmd, flags));        \
+               __kdbus_args_parse((_args), 0, (_argp), sizeof(**(_v)), \
                                   offsetof(typeof(**(_v)), items),     \
                                   (void **)(_v));                      \
        })
index 745ad54..ce78dba 100644 (file)
@@ -96,12 +96,6 @@ int kdbus_item_validate(const struct kdbus_item *item)
                break;
 
        case KDBUS_ITEM_PAYLOAD_VEC:
-               if (payload_size != sizeof(struct kdbus_vec))
-                       return -EINVAL;
-               if (item->vec.size == 0 || item->vec.size > SIZE_MAX)
-                       return -EINVAL;
-               break;
-
        case KDBUS_ITEM_PAYLOAD_OFF:
                if (payload_size != sizeof(struct kdbus_vec))
                        return -EINVAL;
@@ -153,6 +147,7 @@ int kdbus_item_validate(const struct kdbus_item *item)
        case KDBUS_ITEM_ATTACH_FLAGS_SEND:
        case KDBUS_ITEM_ATTACH_FLAGS_RECV:
        case KDBUS_ITEM_ID:
+       case KDBUS_ITEM_DST_ID:
                if (payload_size != sizeof(u64))
                        return -EINVAL;
                break;
@@ -266,47 +261,6 @@ int kdbus_items_validate(const struct kdbus_item *items, size_t items_size)
        return 0;
 }
 
-static struct kdbus_item *kdbus_items_get(const struct kdbus_item *items,
-                                         size_t items_size,
-                                         unsigned int item_type)
-{
-       const struct kdbus_item *iter, *found = NULL;
-
-       KDBUS_ITEMS_FOREACH(iter, items, items_size) {
-               if (iter->type == item_type) {
-                       if (found)
-                               return ERR_PTR(-EEXIST);
-                       found = iter;
-               }
-       }
-
-       return (struct kdbus_item *)found ? : ERR_PTR(-EBADMSG);
-}
-
-/**
- * kdbus_items_get_str() - get string from a list of items
- * @items:             The items to walk
- * @items_size:                The size of all items
- * @item_type:         The item type to look for
- *
- * This function walks a list of items and searches for items of type
- * @item_type. If it finds exactly one such item, @str_ret will be set to
- * the .str member of the item.
- *
- * Return: the string, if the item was found exactly once, ERR_PTR(-EEXIST)
- * if the item was found more than once, and ERR_PTR(-EBADMSG) if there was
- * no item of the given type.
- */
-const char *kdbus_items_get_str(const struct kdbus_item *items,
-                               size_t items_size,
-                               unsigned int item_type)
-{
-       const struct kdbus_item *item;
-
-       item = kdbus_items_get(items, items_size, item_type);
-       return IS_ERR(item) ? ERR_CAST(item) : item->str;
-}
-
 /**
  * kdbus_item_set() - Set item content
  * @item:      The item to modify
index eeefd8b..3a7e6cc 100644 (file)
 #include "util.h"
 
 /* generic access and iterators over a stream of items */
-#define KDBUS_ITEM_NEXT(_i) (typeof(_i))(((u8 *)_i) + KDBUS_ALIGN8((_i)->size))
-#define KDBUS_ITEMS_SIZE(_h, _is) ((_h)->size - offsetof(typeof(*_h), _is))
+#define KDBUS_ITEM_NEXT(_i) (typeof(_i))((u8 *)(_i) + KDBUS_ALIGN8((_i)->size))
+#define KDBUS_ITEMS_SIZE(_h, _is) ((_h)->size - offsetof(typeof(*(_h)), _is))
 #define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
 #define KDBUS_ITEM_SIZE(_s) KDBUS_ALIGN8(KDBUS_ITEM_HEADER_SIZE + (_s))
 #define KDBUS_ITEM_PAYLOAD_SIZE(_i) ((_i)->size - KDBUS_ITEM_HEADER_SIZE)
 
 #define KDBUS_ITEMS_FOREACH(_i, _is, _s)                               \
-       for (_i = _is;                                                  \
+       for ((_i) = (_is);                                              \
             ((u8 *)(_i) < (u8 *)(_is) + (_s)) &&                       \
               ((u8 *)(_i) >= (u8 *)(_is));                             \
-            _i = KDBUS_ITEM_NEXT(_i))
+            (_i) = KDBUS_ITEM_NEXT(_i))
 
 #define KDBUS_ITEM_VALID(_i, _is, _s)                                  \
        ((_i)->size >= KDBUS_ITEM_HEADER_SIZE &&                        \
@@ -40,7 +40,7 @@
         (u8 *)(_i) >= (u8 *)(_is))
 
 #define KDBUS_ITEMS_END(_i, _is, _s)                                   \
-       ((u8 *)_i == ((u8 *)(_is) + KDBUS_ALIGN8(_s)))
+       ((u8 *)(_i) == ((u8 *)(_is) + KDBUS_ALIGN8(_s)))
 
 /**
  * struct kdbus_item_header - Describes the fix part of an item
@@ -55,9 +55,6 @@ struct kdbus_item_header {
 int kdbus_item_validate_name(const struct kdbus_item *item);
 int kdbus_item_validate(const struct kdbus_item *item);
 int kdbus_items_validate(const struct kdbus_item *items, size_t items_size);
-const char *kdbus_items_get_str(const struct kdbus_item *items,
-                               size_t items_size,
-                               unsigned int item_type);
 struct kdbus_item *kdbus_item_set(struct kdbus_item *item, u64 type,
                                  const void *data, size_t len);
 
index 3ea2f0e..59e3608 100644 (file)
@@ -19,9 +19,6 @@
 /* maximum size of message header and items */
 #define KDBUS_MSG_MAX_SIZE             SZ_8K
 
-/* maximum number of message items */
-#define KDBUS_MSG_MAX_ITEMS            128
-
 /* maximum number of memfd items per message */
 #define KDBUS_MSG_MAX_MEMFD_ITEMS      16
 
@@ -53,7 +50,7 @@
 #define KDBUS_CONN_MAX_NAMES                   256
 
 /* maximum number of queued requests waiting for a reply */
-#define KDBUS_CONN_MAX_REQUESTS_PENDING                1024    /* 128 -> 1024 */
+#define KDBUS_CONN_MAX_REQUESTS_PENDING                1024
 
 /* maximum number of connections per user in one domain */
 #define KDBUS_USER_MAX_CONN                    1024
index 785f529..1ad4dc8 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 
 #include "util.h"
 #include "fs.h"
 /* kdbus mount-point /sys/fs/kdbus */
 static struct kobject *kdbus_dir;
 
-/* global module option to apply a mask to exported metadata */
-unsigned long long kdbus_meta_attach_mask = KDBUS_ATTACH_TIMESTAMP |
-                                           KDBUS_ATTACH_CREDS |
-                                           KDBUS_ATTACH_PIDS |
-                                           KDBUS_ATTACH_AUXGROUPS |
-                                           KDBUS_ATTACH_NAMES |
-                                           KDBUS_ATTACH_SECLABEL |
-                                           KDBUS_ATTACH_CONN_DESCRIPTION;
-MODULE_PARM_DESC(attach_flags_mask, "Attach-flags mask for exported metadata");
-module_param_named(attach_flags_mask, kdbus_meta_attach_mask, ullong, 0644);
-
 static int __init kdbus_init(void)
 {
        int ret;
@@ -116,6 +104,7 @@ static void __exit kdbus_exit(void)
 {
        kdbus_fs_exit();
        kobject_put(kdbus_dir);
+       ida_destroy(&kdbus_node_ida);
 }
 
 module_init(kdbus_init);
index 30cec1c..4ee6a1f 100644 (file)
@@ -66,7 +66,7 @@ struct kdbus_bloom_mask {
 
 /**
  * struct kdbus_match_rule - a rule appended to a match entry
- * @type:              An item type to match agains
+ * @type:              An item type to match against
  * @bloom_mask:                Bloom mask to match a message's filter against, used
  *                     with KDBUS_ITEM_BLOOM_MASK
  * @name:              Name to match against, used with KDBUS_ITEM_NAME,
@@ -78,6 +78,7 @@ struct kdbus_bloom_mask {
  *                     KDBUS_ITEM_NAME_{ADD,REMOVE,CHANGE},
  *                     KDBUS_ITEM_ID_REMOVE
  * @src_id:            ID to match against, used with KDBUS_ITEM_ID
+ * @dst_id:            Message destination ID, used with KDBUS_ITEM_DST_ID
  * @rules_entry:       Entry in the entry's rules list
  */
 struct kdbus_match_rule {
@@ -90,6 +91,7 @@ struct kdbus_match_rule {
                        u64 new_id;
                };
                u64 src_id;
+               u64 dst_id;
        };
        struct list_head rules_entry;
 };
@@ -112,6 +114,7 @@ static void kdbus_match_rule_free(struct kdbus_match_rule *rule)
                break;
 
        case KDBUS_ITEM_ID:
+       case KDBUS_ITEM_DST_ID:
        case KDBUS_ITEM_ID_ADD:
        case KDBUS_ITEM_ID_REMOVE:
                break;
@@ -204,96 +207,74 @@ static bool kdbus_match_bloom(const struct kdbus_bloom_filter *filter,
        return true;
 }
 
-static bool kdbus_match_rules(const struct kdbus_match_entry *entry,
-                             struct kdbus_conn *conn_src,
-                             struct kdbus_kmsg *kmsg)
+static bool kdbus_match_rule_conn(const struct kdbus_match_rule *r,
+                                 struct kdbus_conn *c,
+                                 const struct kdbus_staging *s)
 {
-       struct kdbus_match_rule *r;
-
-       if (conn_src)
-               lockdep_assert_held(&conn_src->ep->bus->name_registry->rwlock);
-
-       /*
-        * Walk all the rules and bail out immediately
-        * if any of them is unsatisfied.
-        */
-
-       list_for_each_entry(r, &entry->rules_list, rules_entry) {
-               if (conn_src) {
-                       /* messages from userspace */
-
-                       switch (r->type) {
-                       case KDBUS_ITEM_BLOOM_MASK:
-                               if (!kdbus_match_bloom(kmsg->bloom_filter,
-                                                      &r->bloom_mask,
-                                                      conn_src))
-                                       return false;
-                               break;
-
-                       case KDBUS_ITEM_ID:
-                               if (r->src_id != conn_src->id &&
-                                   r->src_id != KDBUS_MATCH_ID_ANY)
-                                       return false;
-
-                               break;
-
-                       case KDBUS_ITEM_NAME:
-                               if (!kdbus_conn_has_name(conn_src, r->name))
-                                       return false;
-
-                               break;
-
-                       default:
-                               return false;
-                       }
-               } else {
-                       /* kernel notifications */
+       lockdep_assert_held(&c->ep->bus->name_registry->rwlock);
 
-                       if (kmsg->notify_type != r->type)
-                               return false;
-
-                       switch (r->type) {
-                       case KDBUS_ITEM_ID_ADD:
-                               if (r->new_id != KDBUS_MATCH_ID_ANY &&
-                                   r->new_id != kmsg->notify_new_id)
-                                       return false;
-
-                               break;
+       switch (r->type) {
+       case KDBUS_ITEM_BLOOM_MASK:
+               return kdbus_match_bloom(s->bloom_filter, &r->bloom_mask, c);
+       case KDBUS_ITEM_ID:
+               return r->src_id == c->id || r->src_id == KDBUS_MATCH_ID_ANY;
+       case KDBUS_ITEM_DST_ID:
+               return r->dst_id == s->msg->dst_id ||
+                      r->dst_id == KDBUS_MATCH_ID_ANY;
+       case KDBUS_ITEM_NAME:
+               return kdbus_conn_has_name(c, r->name);
+       default:
+               return false;
+       }
+}
 
-                       case KDBUS_ITEM_ID_REMOVE:
-                               if (r->old_id != KDBUS_MATCH_ID_ANY &&
-                                   r->old_id != kmsg->notify_old_id)
-                                       return false;
+static bool kdbus_match_rule_kernel(const struct kdbus_match_rule *r,
+                                   const struct kdbus_staging *s)
+{
+       struct kdbus_item *n = s->notify;
 
-                               break;
+       if (WARN_ON(!n) || n->type != r->type)
+               return false;
 
-                       case KDBUS_ITEM_NAME_ADD:
-                       case KDBUS_ITEM_NAME_CHANGE:
-                       case KDBUS_ITEM_NAME_REMOVE:
-                               if ((r->old_id != KDBUS_MATCH_ID_ANY &&
-                                    r->old_id != kmsg->notify_old_id) ||
-                                   (r->new_id != KDBUS_MATCH_ID_ANY &&
-                                    r->new_id != kmsg->notify_new_id) ||
-                                   (r->name && kmsg->notify_name &&
-                                    strcmp(r->name, kmsg->notify_name) != 0))
-                                       return false;
+       switch (r->type) {
+       case KDBUS_ITEM_ID_ADD:
+               return r->new_id == KDBUS_MATCH_ID_ANY ||
+                      r->new_id == n->id_change.id;
+       case KDBUS_ITEM_ID_REMOVE:
+               return r->old_id == KDBUS_MATCH_ID_ANY ||
+                      r->old_id == n->id_change.id;
+       case KDBUS_ITEM_NAME_ADD:
+       case KDBUS_ITEM_NAME_CHANGE:
+       case KDBUS_ITEM_NAME_REMOVE:
+               return (r->old_id == KDBUS_MATCH_ID_ANY ||
+                       r->old_id == n->name_change.old_id.id) &&
+                      (r->new_id == KDBUS_MATCH_ID_ANY ||
+                       r->new_id == n->name_change.new_id.id) &&
+                      (!r->name || !strcmp(r->name, n->name_change.name));
+       default:
+               return false;
+       }
+}
 
-                               break;
+static bool kdbus_match_rules(const struct kdbus_match_entry *entry,
+                             struct kdbus_conn *c,
+                             const struct kdbus_staging *s)
+{
+       struct kdbus_match_rule *r;
 
-                       default:
-                               return false;
-                       }
-               }
-       }
+       list_for_each_entry(r, &entry->rules_list, rules_entry)
+               if ((c && !kdbus_match_rule_conn(r, c, s)) ||
+                   (!c && !kdbus_match_rule_kernel(r, s)))
+                       return false;
 
        return true;
 }
 
 /**
- * kdbus_match_db_match_kmsg() - match a kmsg object agains the database entries
+ * kdbus_match_db_match_msg() - match a msg object agains the database entries
  * @mdb:               The match database
  * @conn_src:          The connection object originating the message
- * @kmsg:              The kmsg to perform the match on
+ * @staging:           Staging object containing the message to match against
  *
  * This function will walk through all the database entries previously uploaded
  * with kdbus_match_db_add(). As soon as any of them has an all-satisfied rule
@@ -304,16 +285,16 @@ static bool kdbus_match_rules(const struct kdbus_match_entry *entry,
  *
  * Return: true if there was a matching database entry, false otherwise.
  */
-bool kdbus_match_db_match_kmsg(struct kdbus_match_db *mdb,
-                              struct kdbus_conn *conn_src,
-                              struct kdbus_kmsg *kmsg)
+bool kdbus_match_db_match_msg(struct kdbus_match_db *mdb,
+                             struct kdbus_conn *conn_src,
+                             const struct kdbus_staging *staging)
 {
        struct kdbus_match_entry *entry;
        bool matched = false;
 
        down_read(&mdb->mdb_rwlock);
        list_for_each_entry(entry, &mdb->entries_list, list_entry) {
-               matched = kdbus_match_rules(entry, conn_src, kmsg);
+               matched = kdbus_match_rules(entry, conn_src, staging);
                if (matched)
                        break;
        }
@@ -353,6 +334,7 @@ static int kdbus_match_db_remove_unlocked(struct kdbus_match_db *mdb,
  * KDBUS_ITEM_BLOOM_MASK:      A bloom mask
  * KDBUS_ITEM_NAME:            A connection's source name
  * KDBUS_ITEM_ID:              A connection ID
+ * KDBUS_ITEM_DST_ID:          A connection ID
  * KDBUS_ITEM_NAME_ADD:
  * KDBUS_ITEM_NAME_REMOVE:
  * KDBUS_ITEM_NAME_CHANGE:     Well-known name changes, carry
@@ -364,11 +346,11 @@ static int kdbus_match_db_remove_unlocked(struct kdbus_match_db *mdb,
  * For kdbus_notify_{id,name}_change structs, only the ID and name fields
  * are looked at when adding an entry. The flags are unused.
  *
- * Also note that KDBUS_ITEM_BLOOM_MASK, KDBUS_ITEM_NAME and KDBUS_ITEM_ID
- * are used to match messages from userspace, while the others apply to
- * kernel-generated notifications.
+ * Also note that KDBUS_ITEM_BLOOM_MASK, KDBUS_ITEM_NAME, KDBUS_ITEM_ID,
+ * and KDBUS_ITEM_DST_ID are used to match messages from userspace, while the
+ * others apply to kernel-generated notifications.
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp)
 {
@@ -383,6 +365,7 @@ int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp)
                { .type = KDBUS_ITEM_BLOOM_MASK, .multiple = true },
                { .type = KDBUS_ITEM_NAME, .multiple = true },
                { .type = KDBUS_ITEM_ID, .multiple = true },
+               { .type = KDBUS_ITEM_DST_ID, .multiple = true },
                { .type = KDBUS_ITEM_NAME_ADD, .multiple = true },
                { .type = KDBUS_ITEM_NAME_REMOVE, .multiple = true },
                { .type = KDBUS_ITEM_NAME_CHANGE, .multiple = true },
@@ -465,6 +448,10 @@ int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp)
                        rule->src_id = item->id;
                        break;
 
+               case KDBUS_ITEM_DST_ID:
+                       rule->dst_id = item->id;
+                       break;
+
                case KDBUS_ITEM_NAME_ADD:
                case KDBUS_ITEM_NAME_REMOVE:
                case KDBUS_ITEM_NAME_CHANGE:
@@ -528,7 +515,7 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_match_remove(struct kdbus_conn *conn, void __user *argp)
 {
index ea42929..ceb492f 100644 (file)
@@ -16,8 +16,8 @@
 #define __KDBUS_MATCH_H
 
 struct kdbus_conn;
-struct kdbus_kmsg;
 struct kdbus_match_db;
+struct kdbus_staging;
 
 struct kdbus_match_db *kdbus_match_db_new(void);
 void kdbus_match_db_free(struct kdbus_match_db *db);
@@ -25,9 +25,9 @@ int kdbus_match_db_add(struct kdbus_conn *conn,
                       struct kdbus_cmd_match *cmd);
 int kdbus_match_db_remove(struct kdbus_conn *conn,
                          struct kdbus_cmd_match *cmd);
-bool kdbus_match_db_match_kmsg(struct kdbus_match_db *db,
-                              struct kdbus_conn *conn_src,
-                              struct kdbus_kmsg *kmsg);
+bool kdbus_match_db_match_msg(struct kdbus_match_db *db,
+                             struct kdbus_conn *conn_src,
+                             const struct kdbus_staging *staging);
 
 int kdbus_cmd_match_add(struct kdbus_conn *conn, void __user *argp);
 int kdbus_cmd_match_remove(struct kdbus_conn *conn, void __user *argp);
index 4b1487a..ae565cd 100644 (file)
 #include "names.h"
 #include "policy.h"
 
-#define KDBUS_KMSG_HEADER_SIZE offsetof(struct kdbus_kmsg, msg)
+static const char * const zeros = "\0\0\0\0\0\0\0";
 
-static struct kdbus_msg_resources *kdbus_msg_resources_new(void)
+static struct kdbus_gaps *kdbus_gaps_new(size_t n_memfds, size_t n_fds)
 {
-       struct kdbus_msg_resources *r;
+       size_t size_offsets, size_memfds, size_fds, size;
+       struct kdbus_gaps *gaps;
+
+       size_offsets = n_memfds * sizeof(*gaps->memfd_offsets);
+       size_memfds = n_memfds * sizeof(*gaps->memfd_files);
+       size_fds = n_fds * sizeof(*gaps->fd_files);
+       size = sizeof(*gaps) + size_offsets + size_memfds + size_fds;
 
-       r = kzalloc(sizeof(*r), GFP_KERNEL);
-       if (!r)
+       gaps = kzalloc(size, GFP_KERNEL);
+       if (!gaps)
                return ERR_PTR(-ENOMEM);
 
-       kref_init(&r->kref);
+       kref_init(&gaps->kref);
+       gaps->n_memfds = 0; /* we reserve n_memfds, but don't enforce them */
+       gaps->memfd_offsets = (void *)(gaps + 1);
+       gaps->memfd_files = (void *)((u8 *)gaps->memfd_offsets + size_offsets);
+       gaps->n_fds = 0; /* we reserve n_fds, but don't enforce them */
+       gaps->fd_files = (void *)((u8 *)gaps->memfd_files + size_memfds);
 
-       return r;
+       return gaps;
 }
 
-static void __kdbus_msg_resources_free(struct kref *kref)
+static void kdbus_gaps_free(struct kref *kref)
 {
-       struct kdbus_msg_resources *r =
-               container_of(kref, struct kdbus_msg_resources, kref);
+       struct kdbus_gaps *gaps = container_of(kref, struct kdbus_gaps, kref);
        size_t i;
 
-       for (i = 0; i < r->data_count; ++i) {
-               switch (r->data[i].type) {
-               case KDBUS_MSG_DATA_VEC:
-                       /* nothing to do */
-                       break;
-               case KDBUS_MSG_DATA_MEMFD:
-                       if (r->data[i].memfd.file)
-                               fput(r->data[i].memfd.file);
-                       break;
-               }
-       }
-
-       for (i = 0; i < r->fds_count; i++)
-               if (r->fds[i])
-                       fput(r->fds[i]);
+       for (i = 0; i < gaps->n_fds; ++i)
+               if (gaps->fd_files[i])
+                       fput(gaps->fd_files[i]);
+       for (i = 0; i < gaps->n_memfds; ++i)
+               if (gaps->memfd_files[i])
+                       fput(gaps->memfd_files[i]);
 
-       kfree(r->dst_name);
-       kfree(r->data);
-       kfree(r->fds);
-       kfree(r);
+       kfree(gaps);
 }
 
 /**
- * kdbus_msg_resources_ref() - Acquire reference to msg resources
- * @r:         resources to acquire ref to
+ * kdbus_gaps_ref() - gain reference
+ * @gaps:      gaps object
  *
- * Return: The acquired resource
+ * Return: @gaps is returned
  */
-struct kdbus_msg_resources *
-kdbus_msg_resources_ref(struct kdbus_msg_resources *r)
+struct kdbus_gaps *kdbus_gaps_ref(struct kdbus_gaps *gaps)
 {
-       if (r)
-               kref_get(&r->kref);
-       return r;
+       if (gaps)
+               kref_get(&gaps->kref);
+       return gaps;
 }
 
 /**
- * kdbus_msg_resources_unref() - Drop reference to msg resources
- * @r:         resources to drop reference of
+ * kdbus_gaps_unref() - drop reference
+ * @gaps:      gaps object
  *
  * Return: NULL
  */
-struct kdbus_msg_resources *
-kdbus_msg_resources_unref(struct kdbus_msg_resources *r)
+struct kdbus_gaps *kdbus_gaps_unref(struct kdbus_gaps *gaps)
 {
-       if (r)
-               kref_put(&r->kref, __kdbus_msg_resources_free);
+       if (gaps)
+               kref_put(&gaps->kref, kdbus_gaps_free);
        return NULL;
 }
 
 /**
- * kdbus_kmsg_free() - free allocated message
- * @kmsg:              Message
- */
-void kdbus_kmsg_free(struct kdbus_kmsg *kmsg)
-{
-       if (!kmsg)
-               return;
-
-       kdbus_msg_resources_unref(kmsg->res);
-       kdbus_meta_conn_unref(kmsg->conn_meta);
-       kdbus_meta_proc_unref(kmsg->proc_meta);
-       kfree(kmsg->iov);
-       kfree(kmsg);
-}
-
-/**
- * kdbus_kmsg_new() - allocate message
- * @bus:               Bus this message is allocated on
- * @extra_size:                Additional size to reserve for data
+ * kdbus_gaps_install() - install file-descriptors
+ * @gaps:              gaps object, or NULL
+ * @slice:             pool slice that contains the message
+ * @out_incomplete     output variable to note incomplete fds
+ *
+ * This function installs all file-descriptors of @gaps into the current
+ * process and copies the file-descriptor numbers into the target pool slice.
  *
- * Return: new kdbus_kmsg on success, ERR_PTR on failure.
+ * If the file-descriptors were only partially installed, then @out_incomplete
+ * will be set to true. Otherwise, it's set to false.
+ *
+ * Return: 0 on success, negative error code on failure
  */
-struct kdbus_kmsg *kdbus_kmsg_new(struct kdbus_bus *bus, size_t extra_size)
+int kdbus_gaps_install(struct kdbus_gaps *gaps, struct kdbus_pool_slice *slice,
+                      bool *out_incomplete)
 {
-       struct kdbus_kmsg *m;
-       size_t size;
-       int ret;
+       bool incomplete_fds = false;
+       struct kvec kvec;
+       size_t i, n_fds;
+       int ret, *fds;
+
+       if (!gaps) {
+               /* nothing to do */
+               *out_incomplete = incomplete_fds;
+               return 0;
+       }
 
-       size = sizeof(struct kdbus_kmsg) + KDBUS_ITEM_SIZE(extra_size);
-       m = kzalloc(size, GFP_KERNEL);
-       if (!m)
-               return ERR_PTR(-ENOMEM);
+       n_fds = gaps->n_fds + gaps->n_memfds;
+       if (n_fds < 1) {
+               /* nothing to do */
+               *out_incomplete = incomplete_fds;
+               return 0;
+       }
 
-       m->seq = atomic64_inc_return(&bus->domain->last_id);
-       m->msg.size = size - KDBUS_KMSG_HEADER_SIZE;
-       m->msg.items[0].size = KDBUS_ITEM_SIZE(extra_size);
+       fds = kmalloc_array(n_fds, sizeof(*fds), GFP_TEMPORARY);
+       n_fds = 0;
+       if (!fds)
+               return -ENOMEM;
 
-       m->proc_meta = kdbus_meta_proc_new();
-       if (IS_ERR(m->proc_meta)) {
-               ret = PTR_ERR(m->proc_meta);
-               m->proc_meta = NULL;
-               goto exit;
+       /* 1) allocate fds and copy them over */
+
+       if (gaps->n_fds > 0) {
+               for (i = 0; i < gaps->n_fds; ++i) {
+                       int fd;
+
+                       fd = get_unused_fd_flags(O_CLOEXEC);
+                       if (fd < 0)
+                               incomplete_fds = true;
+
+                       WARN_ON(!gaps->fd_files[i]);
+
+                       fds[n_fds++] = fd < 0 ? -1 : fd;
+               }
+
+               /*
+                * The file-descriptor array can only be present once per
+                * message. Hence, prepare all fds and then copy them over with
+                * a single kvec.
+                */
+
+               WARN_ON(!gaps->fd_offset);
+
+               kvec.iov_base = fds;
+               kvec.iov_len = gaps->n_fds * sizeof(*fds);
+               ret = kdbus_pool_slice_copy_kvec(slice, gaps->fd_offset,
+                                                &kvec, 1, kvec.iov_len);
+               if (ret < 0)
+                       goto exit;
        }
 
-       m->conn_meta = kdbus_meta_conn_new();
-       if (IS_ERR(m->conn_meta)) {
-               ret = PTR_ERR(m->conn_meta);
-               m->conn_meta = NULL;
-               goto exit;
+       for (i = 0; i < gaps->n_memfds; ++i) {
+               int memfd;
+
+               memfd = get_unused_fd_flags(O_CLOEXEC);
+               if (memfd < 0) {
+                       incomplete_fds = true;
+                       /* memfds are initialized to -1, skip copying it */
+                       continue;
+               }
+
+               fds[n_fds++] = memfd;
+
+               /*
+                * memfds have to be copied individually as they each are put
+                * into a separate item. This should not be an issue, though,
+                * as usually there is no need to send more than one memfd per
+                * message.
+                */
+
+               WARN_ON(!gaps->memfd_offsets[i]);
+               WARN_ON(!gaps->memfd_files[i]);
+
+               kvec.iov_base = &memfd;
+               kvec.iov_len = sizeof(memfd);
+               ret = kdbus_pool_slice_copy_kvec(slice, gaps->memfd_offsets[i],
+                                                &kvec, 1, kvec.iov_len);
+               if (ret < 0)
+                       goto exit;
        }
 
-       return m;
+       /* 2) install fds now that everything was successful */
+
+       for (i = 0; i < gaps->n_fds; ++i)
+               if (fds[i] >= 0)
+                       fd_install(fds[i], get_file(gaps->fd_files[i]));
+       for (i = 0; i < gaps->n_memfds; ++i)
+               if (fds[gaps->n_fds + i] >= 0)
+                       fd_install(fds[gaps->n_fds + i],
+                                  get_file(gaps->memfd_files[i]));
+
+       ret = 0;
 
 exit:
-       kdbus_kmsg_free(m);
-       return ERR_PTR(ret);
+       if (ret < 0)
+               for (i = 0; i < n_fds; ++i)
+                       put_unused_fd(fds[i]);
+       kfree(fds);
+       *out_incomplete = incomplete_fds;
+       return ret;
 }
 
-static int kdbus_handle_check_file(struct file *file)
+static struct file *kdbus_get_fd(int fd)
 {
-       /*
-        * Don't allow file descriptors in the transport that themselves allow
-        * file descriptor queueing. This will eventually be allowed once both
-        * unix domain sockets and kdbus share a generic garbage collector.
-        */
+       struct file *f, *ret;
+       struct inode *inode;
+       struct socket *sock;
 
-       if (file->f_op == &kdbus_handle_ops)
-               return -EOPNOTSUPP;
+       if (fd < 0)
+               return ERR_PTR(-EBADF);
 
-       return 0;
+       f = fget_raw(fd);
+       if (!f)
+               return ERR_PTR(-EBADF);
+
+       inode = file_inode(f);
+       sock = S_ISSOCK(inode->i_mode) ? SOCKET_I(inode) : NULL;
+
+       if (f->f_mode & FMODE_PATH)
+               ret = f; /* O_PATH is always allowed */
+       else if (f->f_op == &kdbus_handle_ops)
+               ret = ERR_PTR(-EOPNOTSUPP); /* disallow kdbus-fd over kdbus */
+       else if (sock && sock->sk && sock->ops && sock->ops->family == PF_UNIX)
+               ret = ERR_PTR(-EOPNOTSUPP); /* disallow UDS over kdbus */
+       else
+               ret = f; /* all other are allowed */
+
+       if (f != ret)
+               fput(f);
+
+       return ret;
 }
 
-static const char * const zeros = "\0\0\0\0\0\0\0";
+static struct file *kdbus_get_memfd(const struct kdbus_memfd *memfd)
+{
+       const int m = F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL;
+       struct file *f, *ret;
+       int s;
+
+       if (memfd->fd < 0)
+               return ERR_PTR(-EBADF);
+
+       f = fget(memfd->fd);
+       if (!f)
+               return ERR_PTR(-EBADF);
+
+       s = shmem_get_seals(f);
+       if (s < 0)
+               ret = ERR_PTR(-EMEDIUMTYPE);
+       else if ((s & m) != m)
+               ret = ERR_PTR(-ETXTBSY);
+       else if (memfd->start + memfd->size > (u64)i_size_read(file_inode(f)))
+               ret = ERR_PTR(-EFAULT);
+       else
+               ret = f;
+
+       if (f != ret)
+               fput(f);
+
+       return ret;
+}
 
-/*
- * kdbus_msg_scan_items() - validate incoming data and prepare parsing
- * @kmsg:              Message
- * @bus:               Bus the message is sent over
- *
- * Return: 0 on success, negative errno on failure.
- *
- * Files references in MEMFD or FDS items are pinned.
- *
- * On errors, the caller should drop any taken reference with
- * kdbus_kmsg_free()
- */
-static int kdbus_msg_scan_items(struct kdbus_kmsg *kmsg,
-                               struct kdbus_bus *bus)
+static int kdbus_msg_examine(struct kdbus_msg *msg, struct kdbus_bus *bus,
+                            struct kdbus_cmd_send *cmd, size_t *out_n_memfds,
+                            size_t *out_n_fds, size_t *out_n_parts)
 {
-       struct kdbus_msg_resources *res = kmsg->res;
-       const struct kdbus_msg *msg = &kmsg->msg;
-       const struct kdbus_item *item;
-       size_t n, n_vecs, n_memfds;
-       bool has_bloom = false;
-       bool has_name = false;
-       bool has_fds = false;
-       bool is_broadcast;
-       bool is_signal;
-       u64 vec_size;
-
-       is_broadcast = (msg->dst_id == KDBUS_DST_ID_BROADCAST);
-       is_signal = !!(msg->flags & KDBUS_MSG_SIGNAL);
-
-       /* count data payloads */
-       n_vecs = 0;
-       n_memfds = 0;
-       KDBUS_ITEMS_FOREACH(item, msg->items, KDBUS_ITEMS_SIZE(msg, items)) {
-               switch (item->type) {
-               case KDBUS_ITEM_PAYLOAD_VEC:
-                       ++n_vecs;
-                       break;
-               case KDBUS_ITEM_PAYLOAD_MEMFD:
-                       ++n_memfds;
-                       if (item->memfd.size % 8)
-                               ++n_vecs;
-                       break;
-               default:
-                       break;
-               }
-       }
+       struct kdbus_item *item, *fds = NULL, *bloom = NULL, *dstname = NULL;
+       u64 n_parts, n_memfds, n_fds, vec_size;
 
-       n = n_vecs + n_memfds;
-       if (n > 0) {
-               res->data = kcalloc(n, sizeof(*res->data), GFP_KERNEL);
-               if (!res->data)
-                       return -ENOMEM;
+       /*
+        * Step 1:
+        * Validate the message and command parameters.
+        */
+
+       /* KDBUS_PAYLOAD_KERNEL is reserved to kernel messages */
+       if (msg->payload_type == KDBUS_PAYLOAD_KERNEL)
+               return -EINVAL;
+
+       if (msg->dst_id == KDBUS_DST_ID_BROADCAST) {
+               /* broadcasts must be marked as signals */
+               if (!(msg->flags & KDBUS_MSG_SIGNAL))
+                       return -EBADMSG;
+               /* broadcasts cannot have timeouts */
+               if (msg->timeout_ns > 0)
+                       return -ENOTUNIQ;
        }
 
-       if (n_vecs > 0) {
-               kmsg->iov = kcalloc(n_vecs, sizeof(*kmsg->iov), GFP_KERNEL);
-               if (!kmsg->iov)
-                       return -ENOMEM;
+       if (msg->flags & KDBUS_MSG_EXPECT_REPLY) {
+               /* if you expect a reply, you must specify a timeout */
+               if (msg->timeout_ns == 0)
+                       return -EINVAL;
+               /* signals cannot have replies */
+               if (msg->flags & KDBUS_MSG_SIGNAL)
+                       return -ENOTUNIQ;
+       } else {
+               /* must expect reply if sent as synchronous call */
+               if (cmd->flags & KDBUS_SEND_SYNC_REPLY)
+                       return -EINVAL;
+               /* cannot mark replies as signal */
+               if (msg->cookie_reply && (msg->flags & KDBUS_MSG_SIGNAL))
+                       return -EINVAL;
        }
 
-       /* import data payloads */
-       n = 0;
-       vec_size = 0;
-       KDBUS_ITEMS_FOREACH(item, msg->items, KDBUS_ITEMS_SIZE(msg, items)) {
-               size_t payload_size = KDBUS_ITEM_PAYLOAD_SIZE(item);
-               struct iovec *iov = kmsg->iov + kmsg->iov_count;
+       /*
+        * Step 2:
+        * Validate all passed items. While at it, select some statistics that
+        * are required to allocate state objects later on.
+        *
+        * Generic item validation has already been done via
+        * kdbus_item_validate(). Furthermore, the number of items is naturally
+        * limited by the maximum message size. Hence, only non-generic item
+        * checks are performed here (mainly integer overflow tests).
+        */
 
-               if (++n > KDBUS_MSG_MAX_ITEMS)
-                       return -E2BIG;
+       n_parts = 0;
+       n_memfds = 0;
+       n_fds = 0;
+       vec_size = 0;
 
+       KDBUS_ITEMS_FOREACH(item, msg->items, KDBUS_ITEMS_SIZE(msg, items)) {
                switch (item->type) {
                case KDBUS_ITEM_PAYLOAD_VEC: {
-                       struct kdbus_msg_data *d = res->data + res->data_count;
                        void __force __user *ptr = KDBUS_PTR(item->vec.address);
-                       size_t size = item->vec.size;
+                       u64 size = item->vec.size;
 
                        if (vec_size + size < vec_size)
                                return -EMSGSIZE;
                        if (vec_size + size > KDBUS_MSG_MAX_PAYLOAD_VEC_SIZE)
                                return -EMSGSIZE;
+                       if (ptr && unlikely(!access_ok(VERIFY_READ, ptr, size)))
+                               return -EFAULT;
 
-                       d->type = KDBUS_MSG_DATA_VEC;
-                       d->size = size;
-
-                       if (ptr) {
-                               if (unlikely(!access_ok(VERIFY_READ, ptr,
-                                                       size)))
-                                       return -EFAULT;
-
-                               d->vec.off = kmsg->pool_size;
-                               iov->iov_base = ptr;
-                               iov->iov_len = size;
-                       } else {
-                               d->vec.off = ~0ULL;
-                               iov->iov_base = (char __user *)zeros;
-                               iov->iov_len = size % 8;
-                       }
-
-                       if (kmsg->pool_size + iov->iov_len < kmsg->pool_size)
-                               return -EMSGSIZE;
-
-                       kmsg->pool_size += iov->iov_len;
-                       ++kmsg->iov_count;
-                       ++res->vec_count;
-                       ++res->data_count;
-                       vec_size += size;
-
+                       if (ptr || size % 8) /* data or padding */
+                               ++n_parts;
                        break;
                }
-
                case KDBUS_ITEM_PAYLOAD_MEMFD: {
-                       struct kdbus_msg_data *d = res->data + res->data_count;
                        u64 start = item->memfd.start;
                        u64 size = item->memfd.size;
-                       size_t pad = size % 8;
-                       int seals, mask;
-                       struct file *f;
 
-                       if (kmsg->pool_size + size % 8 < kmsg->pool_size)
-                               return -EMSGSIZE;
                        if (start + size < start)
                                return -EMSGSIZE;
-
-                       if (item->memfd.fd < 0)
-                               return -EBADF;
-
-                       if (res->memfd_count >= KDBUS_MSG_MAX_MEMFD_ITEMS)
+                       if (n_memfds >= KDBUS_MSG_MAX_MEMFD_ITEMS)
                                return -E2BIG;
 
-                       f = fget(item->memfd.fd);
-                       if (!f)
-                               return -EBADF;
-
-                       if (pad) {
-                               iov->iov_base = (char __user *)zeros;
-                               iov->iov_len = pad;
+                       ++n_memfds;
+                       if (size % 8) /* vec-padding required */
+                               ++n_parts;
+                       break;
+               }
+               case KDBUS_ITEM_FDS: {
+                       if (fds)
+                               return -EEXIST;
 
-                               kmsg->pool_size += pad;
-                               ++kmsg->iov_count;
-                       }
+                       fds = item;
+                       n_fds = KDBUS_ITEM_PAYLOAD_SIZE(item) / sizeof(int);
+                       if (n_fds > KDBUS_CONN_MAX_FDS_PER_USER)
+                               return -EMFILE;
 
-                       ++res->data_count;
-                       ++res->memfd_count;
+                       break;
+               }
+               case KDBUS_ITEM_BLOOM_FILTER: {
+                       u64 bloom_size;
 
-                       d->type = KDBUS_MSG_DATA_MEMFD;
-                       d->size = size;
-                       d->memfd.start = start;
-                       d->memfd.file = f;
+                       if (bloom)
+                               return -EEXIST;
 
-                       /*
-                        * We only accept a sealed memfd file whose content
-                        * cannot be altered by the sender or anybody else
-                        * while it is shared or in-flight. Other files need
-                        * to be passed with KDBUS_MSG_FDS.
-                        */
-                       seals = shmem_get_seals(f);
-                       if (seals < 0)
-                               return -EMEDIUMTYPE;
+                       bloom = item;
+                       bloom_size = KDBUS_ITEM_PAYLOAD_SIZE(item) -
+                                    offsetof(struct kdbus_bloom_filter, data);
+                       if (!KDBUS_IS_ALIGNED8(bloom_size))
+                               return -EFAULT;
+                       if (bloom_size != bus->bloom.size)
+                               return -EDOM;
 
-                       mask = F_SEAL_SHRINK | F_SEAL_GROW |
-                               F_SEAL_WRITE | F_SEAL_SEAL;
-                       if ((seals & mask) != mask)
-                               return -ETXTBSY;
+                       break;
+               }
+               case KDBUS_ITEM_DST_NAME: {
+                       if (dstname)
+                               return -EEXIST;
 
-                       if (start + size > (u64)i_size_read(file_inode(f)))
-                               return -EBADF;
+                       dstname = item;
+                       if (!kdbus_name_is_valid(item->str, false))
+                               return -EINVAL;
+                       if (msg->dst_id == KDBUS_DST_ID_BROADCAST)
+                               return -EBADMSG;
 
                        break;
                }
+               default:
+                       return -EINVAL;
+               }
+       }
 
-               case KDBUS_ITEM_FDS: {
-                       unsigned int i;
-                       unsigned int fds_count = payload_size / sizeof(int);
+       /*
+        * Step 3:
+        * Validate that required items were actually passed, and that no item
+        * contradicts the message flags.
+        */
 
-                       /* do not allow multiple fd arrays */
-                       if (has_fds)
-                               return -EEXIST;
-                       has_fds = true;
+       /* bloom filters must be attached _iff_ it's a signal */
+       if (!(msg->flags & KDBUS_MSG_SIGNAL) != !bloom)
+               return -EBADMSG;
+       /* destination name is required if no ID is given */
+       if (msg->dst_id == KDBUS_DST_ID_NAME && !dstname)
+               return -EDESTADDRREQ;
+       /* cannot send file-descriptors attached to broadcasts */
+       if (msg->dst_id == KDBUS_DST_ID_BROADCAST && fds)
+               return -ENOTUNIQ;
 
-                       /* Do not allow to broadcast file descriptors */
-                       if (is_broadcast)
-                               return -ENOTUNIQ;
+       *out_n_memfds = n_memfds;
+       *out_n_fds = n_fds;
+       *out_n_parts = n_parts;
 
-                       if (fds_count > KDBUS_CONN_MAX_FDS_PER_USER)
-                               return -EMFILE;
+       return 0;
+}
+
+static bool kdbus_staging_merge_vecs(struct kdbus_staging *staging,
+                                    struct kdbus_item **prev_item,
+                                    struct iovec **prev_vec,
+                                    const struct kdbus_item *merge)
+{
+       void __user *ptr = (void __user *)KDBUS_PTR(merge->vec.address);
+       u64 padding = merge->vec.size % 8;
+       struct kdbus_item *prev = *prev_item;
+       struct iovec *vec = *prev_vec;
+
+       /* XXX: merging is disabled so far */
+       if (0 && prev && prev->type == KDBUS_ITEM_PAYLOAD_OFF &&
+           !merge->vec.address == !prev->vec.address) {
+               /*
+                * If we merge two VECs, we can always drop the second
+                * PAYLOAD_VEC item. Hence, include its size in the previous
+                * one.
+                */
+               prev->vec.size += merge->vec.size;
+
+               if (ptr) {
+                       /*
+                        * If we merge two data VECs, we need two iovecs to copy
+                        * the data. But the items can be easily merged by
+                        * summing their lengths.
+                        */
+                       vec = &staging->parts[staging->n_parts++];
+                       vec->iov_len = merge->vec.size;
+                       vec->iov_base = ptr;
+                       staging->n_payload += vec->iov_len;
+               } else if (padding) {
+                       /*
+                        * If we merge two 0-vecs with the second 0-vec
+                        * requiring padding, we need to insert an iovec to copy
+                        * the 0-padding. We try merging it with the previous
+                        * 0-padding iovec. This might end up with an
+                        * iov_len==0, in which case we simply drop the iovec.
+                        */
+                       if (vec) {
+                               staging->n_payload -= vec->iov_len;
+                               vec->iov_len = prev->vec.size % 8;
+                               if (!vec->iov_len) {
+                                       --staging->n_parts;
+                                       vec = NULL;
+                               } else {
+                                       staging->n_payload += vec->iov_len;
+                               }
+                       } else {
+                               vec = &staging->parts[staging->n_parts++];
+                               vec->iov_len = padding;
+                               vec->iov_base = (char __user *)zeros;
+                               staging->n_payload += vec->iov_len;
+                       }
+               } else {
+                       /*
+                        * If we merge two 0-vecs with the second 0-vec having
+                        * no padding, we know the padding of the first stays
+                        * the same. Hence, @vec needs no adjustment.
+                        */
+               }
 
-                       res->fds = kcalloc(fds_count, sizeof(struct file *),
-                                          GFP_KERNEL);
-                       if (!res->fds)
-                               return -ENOMEM;
+               /* successfully merged with previous item */
+               merge = prev;
+       } else {
+               /*
+                * If we cannot merge the payload item with the previous one,
+                * we simply insert a new iovec for the data/padding.
+                */
+               if (ptr) {
+                       vec = &staging->parts[staging->n_parts++];
+                       vec->iov_len = merge->vec.size;
+                       vec->iov_base = ptr;
+                       staging->n_payload += vec->iov_len;
+               } else if (padding) {
+                       vec = &staging->parts[staging->n_parts++];
+                       vec->iov_len = padding;
+                       vec->iov_base = (char __user *)zeros;
+                       staging->n_payload += vec->iov_len;
+               } else {
+                       vec = NULL;
+               }
+       }
 
-                       for (i = 0; i < fds_count; i++) {
-                               int fd = item->fds[i];
-                               int ret;
+       *prev_item = (struct kdbus_item *)merge;
+       *prev_vec = vec;
 
-                               /*
-                                * Verify the fd and increment the usage count.
-                                * Use fget_raw() to allow passing O_PATH fds.
-                                */
-                               if (fd < 0)
-                                       return -EBADF;
+       return merge == prev;
+}
 
-                               res->fds[i] = fget_raw(fd);
-                               if (!res->fds[i])
-                                       return -EBADF;
+static int kdbus_staging_import(struct kdbus_staging *staging)
+{
+       struct kdbus_item *it, *item, *last, *prev_payload;
+       struct kdbus_gaps *gaps = staging->gaps;
+       struct kdbus_msg *msg = staging->msg;
+       struct iovec *part, *prev_part;
+       bool drop_item;
 
-                               res->fds_count++;
+       drop_item = false;
+       last = NULL;
+       prev_payload = NULL;
+       prev_part = NULL;
 
-                               ret = kdbus_handle_check_file(res->fds[i]);
-                               if (ret < 0)
-                                       return ret;
+       /*
+        * We modify msg->items along the way; make sure to use @item as offset
+        * to the next item (instead of the iterator @it).
+        */
+       for (it = item = msg->items;
+            it >= msg->items &&
+                    (u8 *)it < (u8 *)msg + msg->size &&
+                    (u8 *)it + it->size <= (u8 *)msg + msg->size; ) {
+               /*
+                * If we dropped items along the way, move current item to
+                * front. We must not access @it afterwards, but use @item
+                * instead!
+                */
+               if (it != item)
+                       memmove(item, it, it->size);
+               it = (void *)((u8 *)it + KDBUS_ALIGN8(item->size));
+
+               switch (item->type) {
+               case KDBUS_ITEM_PAYLOAD_VEC: {
+                       size_t offset = staging->n_payload;
+
+                       if (kdbus_staging_merge_vecs(staging, &prev_payload,
+                                                    &prev_part, item)) {
+                               drop_item = true;
+                       } else if (item->vec.address) {
+                               /* real offset is patched later on */
+                               item->type = KDBUS_ITEM_PAYLOAD_OFF;
+                               item->vec.offset = offset;
+                       } else {
+                               item->type = KDBUS_ITEM_PAYLOAD_OFF;
+                               item->vec.offset = ~0ULL;
                        }
 
                        break;
                }
+               case KDBUS_ITEM_PAYLOAD_MEMFD: {
+                       struct file *f;
 
-               case KDBUS_ITEM_BLOOM_FILTER: {
-                       u64 bloom_size;
+                       f = kdbus_get_memfd(&item->memfd);
+                       if (IS_ERR(f))
+                               return PTR_ERR(f);
+
+                       gaps->memfd_files[gaps->n_memfds] = f;
+                       gaps->memfd_offsets[gaps->n_memfds] =
+                                       (u8 *)&item->memfd.fd - (u8 *)msg;
+                       ++gaps->n_memfds;
+
+                       /* memfds cannot be merged */
+                       prev_payload = item;
+                       prev_part = NULL;
+
+                       /* insert padding to make following VECs aligned */
+                       if (item->memfd.size % 8) {
+                               part = &staging->parts[staging->n_parts++];
+                               part->iov_len = item->memfd.size % 8;
+                               part->iov_base = (char __user *)zeros;
+                               staging->n_payload += part->iov_len;
+                       }
 
-                       /* do not allow multiple bloom filters */
-                       if (has_bloom)
-                               return -EEXIST;
-                       has_bloom = true;
+                       break;
+               }
+               case KDBUS_ITEM_FDS: {
+                       size_t i, n_fds;
 
-                       bloom_size = payload_size -
-                                    offsetof(struct kdbus_bloom_filter, data);
+                       n_fds = KDBUS_ITEM_PAYLOAD_SIZE(item) / sizeof(int);
+                       for (i = 0; i < n_fds; ++i) {
+                               struct file *f;
 
-                       /*
-                       * Allow only bloom filter sizes of a multiple of 64bit.
-                       */
-                       if (!KDBUS_IS_ALIGNED8(bloom_size))
-                               return -EFAULT;
+                               f = kdbus_get_fd(item->fds[i]);
+                               if (IS_ERR(f))
+                                       return PTR_ERR(f);
 
-                       /* do not allow mismatching bloom filter sizes */
-                       if (bloom_size != bus->bloom.size)
-                               return -EDOM;
+                               gaps->fd_files[gaps->n_fds++] = f;
+                       }
+
+                       gaps->fd_offset = (u8 *)item->fds - (u8 *)msg;
 
-                       kmsg->bloom_filter = &item->bloom_filter;
                        break;
                }
-
+               case KDBUS_ITEM_BLOOM_FILTER:
+                       staging->bloom_filter = &item->bloom_filter;
+                       break;
                case KDBUS_ITEM_DST_NAME:
-                       /* do not allow multiple names */
-                       if (has_name)
-                               return -EEXIST;
-                       has_name = true;
-
-                       if (!kdbus_name_is_valid(item->str, false))
-                               return -EINVAL;
-
-                       res->dst_name = kstrdup(item->str, GFP_KERNEL);
-                       if (!res->dst_name)
-                               return -ENOMEM;
+                       staging->dst_name = item->str;
                        break;
+               }
 
-               default:
-                       return -EINVAL;
+               /* drop item if we merged it with a previous one */
+               if (drop_item) {
+                       drop_item = false;
+               } else {
+                       last = item;
+                       item = KDBUS_ITEM_NEXT(item);
                }
        }
 
-       /* name is needed if no ID is given */
-       if (msg->dst_id == KDBUS_DST_ID_NAME && !has_name)
-               return -EDESTADDRREQ;
+       /* adjust message size regarding dropped items */
+       msg->size = offsetof(struct kdbus_msg, items);
+       if (last)
+               msg->size += ((u8 *)last - (u8 *)msg->items) + last->size;
 
-       if (is_broadcast) {
-               /* Broadcasts can't take names */
-               if (has_name)
-                       return -EBADMSG;
+       return 0;
+}
 
-               /* All broadcasts have to be signals */
-               if (!is_signal)
-                       return -EBADMSG;
+static void kdbus_staging_reserve(struct kdbus_staging *staging)
+{
+       struct iovec *part;
 
-               /* Timeouts are not allowed for broadcasts */
-               if (msg->timeout_ns > 0)
-                       return -ENOTUNIQ;
+       part = &staging->parts[staging->n_parts++];
+       part->iov_base = (void __user *)zeros;
+       part->iov_len = 0;
+}
+
+static struct kdbus_staging *kdbus_staging_new(struct kdbus_bus *bus,
+                                              size_t n_parts,
+                                              size_t msg_extra_size)
+{
+       const size_t reserved_parts = 5; /* see below for explanation */
+       struct kdbus_staging *staging;
+       int ret;
+
+       n_parts += reserved_parts;
+
+       staging = kzalloc(sizeof(*staging) + n_parts * sizeof(*staging->parts) +
+                         msg_extra_size, GFP_TEMPORARY);
+       if (!staging)
+               return ERR_PTR(-ENOMEM);
+
+       staging->msg_seqnum = atomic64_inc_return(&bus->last_message_id);
+       staging->n_parts = 0; /* we reserve n_parts, but don't enforce them */
+       staging->parts = (void *)(staging + 1);
+
+       if (msg_extra_size) /* if requested, allocate message, too */
+               staging->msg = (void *)((u8 *)staging->parts +
+                                       n_parts * sizeof(*staging->parts));
+
+       staging->meta_proc = kdbus_meta_proc_new();
+       if (IS_ERR(staging->meta_proc)) {
+               ret = PTR_ERR(staging->meta_proc);
+               staging->meta_proc = NULL;
+               goto error;
+       }
+
+       staging->meta_conn = kdbus_meta_conn_new();
+       if (IS_ERR(staging->meta_conn)) {
+               ret = PTR_ERR(staging->meta_conn);
+               staging->meta_conn = NULL;
+               goto error;
        }
 
        /*
-        * Signal messages require a bloom filter, and bloom filters are
-        * only valid with signals.
+        * Prepare iovecs to copy the message into the target pool. We use the
+        * following iovecs:
+        *   * iovec to copy "kdbus_msg.size"
+        *   * iovec to copy "struct kdbus_msg" (minus size) plus items
+        *   * iovec for possible padding after the items
+        *   * iovec for metadata items
+        *   * iovec for possible padding after the items
+        *
+        * Make sure to update @reserved_parts if you add more parts here.
         */
-       if (is_signal ^ has_bloom)
-               return -EBADMSG;
 
-       return 0;
+       kdbus_staging_reserve(staging); /* msg.size */
+       kdbus_staging_reserve(staging); /* msg (minus msg.size) plus items */
+       kdbus_staging_reserve(staging); /* msg padding */
+       kdbus_staging_reserve(staging); /* meta */
+       kdbus_staging_reserve(staging); /* meta padding */
+
+       return staging;
+
+error:
+       kdbus_staging_free(staging);
+       return ERR_PTR(ret);
 }
 
-/**
- * kdbus_kmsg_new_from_cmd() - create kernel message from send payload
- * @conn:              Connection
- * @cmd_send:          Payload of KDBUS_CMD_SEND
- *
- * Return: a new kdbus_kmsg on success, ERR_PTR on failure.
- */
-struct kdbus_kmsg *kdbus_kmsg_new_from_cmd(struct kdbus_conn *conn,
-                                          struct kdbus_cmd_send *cmd_send)
+struct kdbus_staging *kdbus_staging_new_kernel(struct kdbus_bus *bus,
+                                              u64 dst, u64 cookie_timeout,
+                                              size_t it_size, size_t it_type)
+{
+       struct kdbus_staging *staging;
+       size_t size;
+
+       size = offsetof(struct kdbus_msg, items) +
+              KDBUS_ITEM_HEADER_SIZE + it_size;
+
+       staging = kdbus_staging_new(bus, 0, KDBUS_ALIGN8(size));
+       if (IS_ERR(staging))
+               return ERR_CAST(staging);
+
+       staging->msg->size = size;
+       staging->msg->flags = (dst == KDBUS_DST_ID_BROADCAST) ?
+                                                       KDBUS_MSG_SIGNAL : 0;
+       staging->msg->dst_id = dst;
+       staging->msg->src_id = KDBUS_SRC_ID_KERNEL;
+       staging->msg->payload_type = KDBUS_PAYLOAD_KERNEL;
+       staging->msg->cookie_reply = cookie_timeout;
+       staging->notify = staging->msg->items;
+       staging->notify->size = KDBUS_ITEM_HEADER_SIZE + it_size;
+       staging->notify->type = it_type;
+
+       return staging;
+}
+
+struct kdbus_staging *kdbus_staging_new_user(struct kdbus_bus *bus,
+                                            struct kdbus_cmd_send *cmd,
+                                            struct kdbus_msg *msg)
 {
-       struct kdbus_kmsg *m;
-       u64 size;
+       const size_t reserved_parts = 1; /* see below for explanation */
+       size_t n_memfds, n_fds, n_parts;
+       struct kdbus_staging *staging;
        int ret;
 
-       ret = kdbus_copy_from_user(&size, KDBUS_PTR(cmd_send->msg_address),
-                                  sizeof(size));
+       /*
+        * Examine user-supplied message and figure out how many resources we
+        * need to allocate in our staging area. This requires us to iterate
+        * the message twice, but saves us from re-allocating our resources
+        * all the time.
+        */
+
+       ret = kdbus_msg_examine(msg, bus, cmd, &n_memfds, &n_fds, &n_parts);
        if (ret < 0)
                return ERR_PTR(ret);
 
-       if (size < sizeof(struct kdbus_msg) || size > KDBUS_MSG_MAX_SIZE)
-               return ERR_PTR(-EINVAL);
+       n_parts += reserved_parts;
 
-       m = kmalloc(size + KDBUS_KMSG_HEADER_SIZE, GFP_KERNEL);
-       if (!m)
-               return ERR_PTR(-ENOMEM);
+       /*
+        * Allocate staging area with the number of required resources. Make
+        * sure that we have enough iovecs for all required parts pre-allocated
+        * so this will hopefully be the only memory allocation for this
+        * message transaction.
+        */
 
-       memset(m, 0, KDBUS_KMSG_HEADER_SIZE);
-       m->seq = atomic64_inc_return(&conn->ep->bus->domain->last_id);
+       staging = kdbus_staging_new(bus, n_parts, 0);
+       if (IS_ERR(staging))
+               return ERR_CAST(staging);
 
-       m->proc_meta = kdbus_meta_proc_new();
-       if (IS_ERR(m->proc_meta)) {
-               ret = PTR_ERR(m->proc_meta);
-               m->proc_meta = NULL;
-               goto exit_free;
-       }
+       staging->msg = msg;
 
-       m->conn_meta = kdbus_meta_conn_new();
-       if (IS_ERR(m->conn_meta)) {
-               ret = PTR_ERR(m->conn_meta);
-               m->conn_meta = NULL;
-               goto exit_free;
-       }
+       /*
+        * If the message contains memfds or fd items, we need to remember some
+        * state so we can fill in the requested information at RECV time.
+        * File-descriptors cannot be passed at SEND time. Hence, allocate a
+        * gaps-object to remember that state. That gaps object is linked to
+        * from the staging area, but will also be linked to from the message
+        * queue of each peer. Hence, each receiver owns a reference to it, and
+        * it will later be used to fill the 'gaps' in message that couldn't be
+        * filled at SEND time.
+        * Note that the 'gaps' object is read-only once the staging-allocator
+        * returns. There might be connections receiving a queued message while
+        * the sender still broadcasts the message to other receivers.
+        */
 
-       if (copy_from_user(&m->msg, KDBUS_PTR(cmd_send->msg_address), size)) {
-               ret = -EFAULT;
-               goto exit_free;
+       if (n_memfds > 0 || n_fds > 0) {
+               staging->gaps = kdbus_gaps_new(n_memfds, n_fds);
+               if (IS_ERR(staging->gaps)) {
+                       ret = PTR_ERR(staging->gaps);
+                       staging->gaps = NULL;
+                       kdbus_staging_free(staging);
+                       return ERR_PTR(ret);
+               }
        }
 
-       if (m->msg.size != size) {
-               ret = -EINVAL;
-               goto exit_free;
-       }
+       /*
+        * kdbus_staging_new() already reserves parts for message setup. For
+        * user-supplied messages, we add the following iovecs:
+        *   ... variable number of iovecs for payload ...
+        *   * final iovec for possible padding of payload
+        *
+        * Make sure to update @reserved_parts if you add more parts here.
+        */
+
+       ret = kdbus_staging_import(staging); /* payload */
+       kdbus_staging_reserve(staging); /* payload padding */
+
+       if (ret < 0)
+               goto error;
+
+       return staging;
 
-       if (m->msg.flags & ~(KDBUS_MSG_EXPECT_REPLY |
-                            KDBUS_MSG_NO_AUTO_START |
-                            KDBUS_MSG_SIGNAL)) {
-               ret = -EINVAL;
-               goto exit_free;
+error:
+       kdbus_staging_free(staging);
+       return ERR_PTR(ret);
+}
+
+struct kdbus_staging *kdbus_staging_free(struct kdbus_staging *staging)
+{
+       if (!staging)
+               return NULL;
+
+       kdbus_meta_conn_unref(staging->meta_conn);
+       kdbus_meta_proc_unref(staging->meta_proc);
+       kdbus_gaps_unref(staging->gaps);
+       kfree(staging);
+
+       return NULL;
+}
+
+static int kdbus_staging_collect_metadata(struct kdbus_staging *staging,
+                                         struct kdbus_conn *src,
+                                         struct kdbus_conn *dst,
+                                         u64 *out_attach)
+{
+       u64 attach;
+       int ret;
+
+       if (src)
+               attach = kdbus_meta_msg_mask(src, dst);
+       else
+               attach = KDBUS_ATTACH_TIMESTAMP; /* metadata for kernel msgs */
+
+       if (src && !src->meta_fake) {
+               ret = kdbus_meta_proc_collect(staging->meta_proc, attach);
+               if (ret < 0)
+                       return ret;
        }
 
-       ret = kdbus_items_validate(m->msg.items,
-                                  KDBUS_ITEMS_SIZE(&m->msg, items));
+       ret = kdbus_meta_conn_collect(staging->meta_conn, src,
+                                     staging->msg_seqnum, attach);
        if (ret < 0)
-               goto exit_free;
+               return ret;
 
-       m->res = kdbus_msg_resources_new();
-       if (IS_ERR(m->res)) {
-               ret = PTR_ERR(m->res);
-               m->res = NULL;
-               goto exit_free;
+       *out_attach = attach;
+       return 0;
+}
+
+/**
+ * kdbus_staging_emit() - emit linearized message in target pool
+ * @staging:           staging object to create message from
+ * @src:               sender of the message (or NULL)
+ * @dst:               target connection to allocate message for
+ *
+ * This allocates a pool-slice for @dst and copies the message provided by
+ * @staging into it. The new slice is then returned to the caller for further
+ * processing. It's not linked into any queue, yet.
+ *
+ * Return: Newly allocated slice or ERR_PTR on failure.
+ */
+struct kdbus_pool_slice *kdbus_staging_emit(struct kdbus_staging *staging,
+                                           struct kdbus_conn *src,
+                                           struct kdbus_conn *dst)
+{
+       struct kdbus_item *item, *meta_items = NULL;
+       struct kdbus_pool_slice *slice = NULL;
+       size_t off, size, meta_size;
+       struct iovec *v;
+       u64 attach, msg_size;
+       int ret;
+
+       /*
+        * Step 1:
+        * Collect metadata from @src depending on the attach-flags allowed for
+        * @dst. Translate it into the namespaces pinned by @dst.
+        */
+
+       ret = kdbus_staging_collect_metadata(staging, src, dst, &attach);
+       if (ret < 0)
+               goto error;
+
+       ret = kdbus_meta_emit(staging->meta_proc, NULL, staging->meta_conn,
+                             dst, attach, &meta_items, &meta_size);
+       if (ret < 0)
+               goto error;
+
+       /*
+        * Step 2:
+        * Setup iovecs for the message. See kdbus_staging_new() for allocation
+        * of those iovecs. All reserved iovecs have been initialized with
+        * iov_len=0 + iov_base=zeros. Furthermore, the iovecs to copy the
+        * actual message payload have already been initialized and need not be
+        * touched.
+        */
+
+       v = staging->parts;
+       msg_size = staging->msg->size;
+
+       /* msg.size */
+       v->iov_len = sizeof(msg_size);
+       v->iov_base = (void __user *)&msg_size;
+       ++v;
+
+       /* msg (after msg.size) plus items */
+       v->iov_len = staging->msg->size - sizeof(staging->msg->size);
+       v->iov_base = (void __user *)((u8 *)staging->msg +
+                                     sizeof(staging->msg->size));
+       ++v;
+
+       /* padding after msg */
+       v->iov_len = KDBUS_ALIGN8(staging->msg->size) - staging->msg->size;
+       v->iov_base = (void __user *)zeros;
+       ++v;
+
+       if (meta_size > 0) {
+               /* metadata items */
+               v->iov_len = meta_size;
+               v->iov_base = (void __user *)meta_items;
+               ++v;
+
+               /* padding after metadata */
+               v->iov_len = KDBUS_ALIGN8(meta_size) - meta_size;
+               v->iov_base = (void __user *)zeros;
+               ++v;
+
+               msg_size = KDBUS_ALIGN8(msg_size) + meta_size;
+       } else {
+               /* metadata items */
+               v->iov_len = 0;
+               v->iov_base = (void __user *)zeros;
+               ++v;
+
+               /* padding after metadata */
+               v->iov_len = 0;
+               v->iov_base = (void __user *)zeros;
+               ++v;
        }
 
-       /* do not accept kernel-generated messages */
-       if (m->msg.payload_type == KDBUS_PAYLOAD_KERNEL) {
-               ret = -EINVAL;
-               goto exit_free;
+       /* ... payload iovecs are already filled in ... */
+
+       /* compute overall size and fill in padding after payload */
+       size = KDBUS_ALIGN8(msg_size);
+
+       if (staging->n_payload > 0) {
+               size += staging->n_payload;
+
+               v = &staging->parts[staging->n_parts - 1];
+               v->iov_len = KDBUS_ALIGN8(size) - size;
+               v->iov_base = (void __user *)zeros;
+
+               size = KDBUS_ALIGN8(size);
        }
 
-       if (m->msg.flags & KDBUS_MSG_EXPECT_REPLY) {
-               /* requests for replies need timeout and cookie */
-               if (m->msg.timeout_ns == 0 || m->msg.cookie == 0) {
-                       ret = -EINVAL;
-                       goto exit_free;
-               }
+       /*
+        * Step 3:
+        * The PAYLOAD_OFF items in the message contain a relative 'offset'
+        * field that tells the receiver where to find the actual payload. This
+        * offset is relative to the start of the message, and as such depends
+        * on the size of the metadata items we inserted. This size is variable
+        * and changes for each peer we send the message to. Hence, we remember
+        * the last relative offset that was used to calculate the 'offset'
+        * fields. For each message, we re-calculate it and patch all items, in
+        * case it changed.
+        */
 
-               /* replies may not be expected for broadcasts */
-               if (m->msg.dst_id == KDBUS_DST_ID_BROADCAST) {
-                       ret = -ENOTUNIQ;
-                       goto exit_free;
-               }
+       off = KDBUS_ALIGN8(msg_size);
 
-               /* replies may not be expected for signals */
-               if (m->msg.flags & KDBUS_MSG_SIGNAL) {
-                       ret = -EINVAL;
-                       goto exit_free;
-               }
-       } else {
-               /*
-                * KDBUS_SEND_SYNC_REPLY is only valid together with
-                * KDBUS_MSG_EXPECT_REPLY
-                */
-               if (cmd_send->flags & KDBUS_SEND_SYNC_REPLY) {
-                       ret = -EINVAL;
-                       goto exit_free;
-               }
+       if (off != staging->i_payload) {
+               KDBUS_ITEMS_FOREACH(item, staging->msg->items,
+                                   KDBUS_ITEMS_SIZE(staging->msg, items)) {
+                       if (item->type != KDBUS_ITEM_PAYLOAD_OFF)
+                               continue;
 
-               /* replies cannot be signals */
-               if (m->msg.cookie_reply && (m->msg.flags & KDBUS_MSG_SIGNAL)) {
-                       ret = -EINVAL;
-                       goto exit_free;
+                       item->vec.offset -= staging->i_payload;
+                       item->vec.offset += off;
                }
+
+               staging->i_payload = off;
        }
 
-       ret = kdbus_msg_scan_items(m, conn->ep->bus);
+       /*
+        * Step 4:
+        * Allocate pool slice and copy over all data. Make sure to properly
+        * account on user quota.
+        */
+
+       ret = kdbus_conn_quota_inc(dst, src ? src->user : NULL, size,
+                                  staging->gaps ? staging->gaps->n_fds : 0);
        if (ret < 0)
-               goto exit_free;
+               goto error;
 
-       /* patch-in the source of this message */
-       if (m->msg.src_id > 0 && m->msg.src_id != conn->id) {
-               ret = -EINVAL;
-               goto exit_free;
+       slice = kdbus_pool_slice_alloc(dst->pool, size, true);
+       if (IS_ERR(slice)) {
+               ret = PTR_ERR(slice);
+               slice = NULL;
+               goto error;
        }
-       m->msg.src_id = conn->id;
 
-       return m;
+       WARN_ON(kdbus_pool_slice_size(slice) != size);
 
-exit_free:
-       kdbus_kmsg_free(m);
-       return ERR_PTR(ret);
+       ret = kdbus_pool_slice_copy_iovec(slice, 0, staging->parts,
+                                         staging->n_parts, size);
+       if (ret < 0)
+               goto error;
+
+       /* all done, return slice to caller */
+       goto exit;
+
+error:
+       if (slice)
+               kdbus_conn_quota_dec(dst, src ? src->user : NULL, size,
+                                    staging->gaps ? staging->gaps->n_fds : 0);
+       kdbus_pool_slice_release(slice);
+       slice = ERR_PTR(ret);
+exit:
+       kfree(meta_items);
+       return slice;
 }
index af47758..298f9c9 100644 (file)
 #ifndef __KDBUS_MESSAGE_H
 #define __KDBUS_MESSAGE_H
 
-#include "util.h"
-#include "metadata.h"
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <uapi/linux/kdbus.h>
 
-/**
- * enum kdbus_msg_data_type - Type of kdbus_msg_data payloads
- * @KDBUS_MSG_DATA_VEC:                Data vector provided by user-space
- * @KDBUS_MSG_DATA_MEMFD:      Memfd payload
- */
-enum kdbus_msg_data_type {
-       KDBUS_MSG_DATA_VEC,
-       KDBUS_MSG_DATA_MEMFD,
-};
-
-/**
- * struct kdbus_msg_data - Data payload as stored by messages
- * @type:      Type of payload (KDBUS_MSG_DATA_*)
- * @size:      Size of the described payload
- * @off:       The offset, relative to the vec slice
- * @start:     Offset inside the memfd
- * @file:      Backing file referenced by the memfd
- */
-struct kdbus_msg_data {
-       unsigned int type;
-       u64 size;
-
-       union {
-               struct {
-                       u64 off;
-               } vec;
-               struct {
-                       u64 start;
-                       struct file *file;
-               } memfd;
-       };
-};
+struct kdbus_bus;
+struct kdbus_conn;
+struct kdbus_meta_conn;
+struct kdbus_meta_proc;
+struct kdbus_pool_slice;
 
 /**
- * struct kdbus_kmsg_resources - resources of a message
+ * struct kdbus_gaps - gaps in message to be filled later
  * @kref:              Reference counter
- * @dst_name:          Short-cut to msg for faster lookup
- * @fds:               Array of file descriptors to pass
- * @fds_count:         Number of file descriptors to pass
- * @data:              Array of data payloads
- * @vec_count:         Number of VEC entries
- * @memfd_count:       Number of MEMFD entries in @data
- * @data_count:                Sum of @vec_count + @memfd_count
+ * @n_memfd_offs:      Number of memfds
+ * @memfd_offs:                Offsets of kdbus_memfd items in target slice
+ * @n_fds:             Number of fds
+ * @fds:               Array of sent fds
+ * @fds_offset:                Offset of fd-array in target slice
+ *
+ * The 'gaps' object is used to track data that is needed to fill gaps in a
+ * message at RECV time. Usually, we try to compile the whole message at SEND
+ * time. This has the advantage, that we don't have to cache any information and
+ * can keep the memory consumption small. Furthermore, all copy operations can
+ * be combined into a single function call, which speeds up transactions
+ * considerably.
+ * However, things like file-descriptors can only be fully installed at RECV
+ * time. The gaps object tracks this data and pins it until a message is
+ * received. The gaps object is shared between all receivers of the same
+ * message.
  */
-struct kdbus_msg_resources {
+struct kdbus_gaps {
        struct kref kref;
-       const char *dst_name;
 
-       struct file **fds;
-       unsigned int fds_count;
+       /* state tracking for KDBUS_ITEM_PAYLOAD_MEMFD entries */
+       size_t n_memfds;
+       u64 *memfd_offsets;
+       struct file **memfd_files;
 
-       struct kdbus_msg_data *data;
-       size_t vec_count;
-       size_t memfd_count;
-       size_t data_count;
+       /* state tracking for KDBUS_ITEM_FDS */
+       size_t n_fds;
+       struct file **fd_files;
+       u64 fd_offset;
 };
 
-struct kdbus_msg_resources *
-kdbus_msg_resources_ref(struct kdbus_msg_resources *r);
-struct kdbus_msg_resources *
-kdbus_msg_resources_unref(struct kdbus_msg_resources *r);
+struct kdbus_gaps *kdbus_gaps_ref(struct kdbus_gaps *gaps);
+struct kdbus_gaps *kdbus_gaps_unref(struct kdbus_gaps *gaps);
+int kdbus_gaps_install(struct kdbus_gaps *gaps, struct kdbus_pool_slice *slice,
+                      bool *out_incomplete);
 
 /**
- * struct kdbus_kmsg - internal message handling data
- * @seq:               Domain-global message sequence number
- * @notify_type:       Short-cut for faster lookup
- * @notify_old_id:     Short-cut for faster lookup
- * @notify_new_id:     Short-cut for faster lookup
- * @notify_name:       Short-cut for faster lookup
- * @dst_name_id:       Short-cut to msg for faster lookup
- * @bloom_filter:      Bloom filter to match message properties
- * @bloom_generation:  Generation of bloom element set
- * @notify_entry:      List of kernel-generated notifications
- * @iov:               Array of iovec, describing the payload to copy
- * @iov_count:         Number of array members in @iov
- * @pool_size:         Overall size of inlined data referenced by @iov
- * @proc_meta:         Appended SCM-like metadata of the sending process
- * @conn_meta:         Appended SCM-like metadata of the sending connection
- * @res:               Message resources
- * @msg:               Message from or to userspace
+ * struct kdbus_staging - staging area to import messages
+ * @msg:               User-supplied message
+ * @gaps:              Gaps-object created during import (or NULL if empty)
+ * @msg_seqnum:                Message sequence number
+ * @notify_entry:      Entry into list of kernel-generated notifications
+ * @i_payload:         Current relative index of start of payload
+ * @n_payload:         Total number of bytes needed for payload
+ * @n_parts:           Number of parts
+ * @parts:             Array of iovecs that make up the whole message
+ * @meta_proc:         Process metadata of the sender (or NULL if empty)
+ * @meta_conn:         Connection metadata of the sender (or NULL if empty)
+ * @bloom_filter:      Pointer to the bloom-item in @msg, or NULL
+ * @dst_name:          Pointer to the dst-name-item in @msg, or NULL
+ * @notify:            Pointer to the notification item in @msg, or NULL
+ *
+ * The kdbus_staging object is a temporary staging area to import user-supplied
+ * messages into the kernel. It is only used during SEND and dropped once the
+ * message is queued. Any data that cannot be collected during SEND, is
+ * collected in a kdbus_gaps object and attached to the message queue.
  */
-struct kdbus_kmsg {
-       u64 seq;
-       u64 notify_type;
-       u64 notify_old_id;
-       u64 notify_new_id;
-       const char *notify_name;
-
-       u64 dst_name_id;
-       const struct kdbus_bloom_filter *bloom_filter;
-       u64 bloom_generation;
+struct kdbus_staging {
+       struct kdbus_msg *msg;
+       struct kdbus_gaps *gaps;
+       u64 msg_seqnum;
        struct list_head notify_entry;
 
-       struct iovec *iov;
-       size_t iov_count;
-       u64 pool_size;
+       /* crafted iovecs to copy the message */
+       size_t i_payload;
+       size_t n_payload;
+       size_t n_parts;
+       struct iovec *parts;
 
-       struct kdbus_meta_proc *proc_meta;
-       struct kdbus_meta_conn *conn_meta;
-       struct kdbus_msg_resources *res;
+       /* metadata state */
+       struct kdbus_meta_proc *meta_proc;
+       struct kdbus_meta_conn *meta_conn;
 
-       /* variable size, must be the last member */
-       struct kdbus_msg msg;
+       /* cached pointers into @msg */
+       const struct kdbus_bloom_filter *bloom_filter;
+       const char *dst_name;
+       struct kdbus_item *notify;
 };
 
-struct kdbus_bus;
-struct kdbus_conn;
-
-struct kdbus_kmsg *kdbus_kmsg_new(struct kdbus_bus *bus, size_t extra_size);
-struct kdbus_kmsg *kdbus_kmsg_new_from_cmd(struct kdbus_conn *conn,
-                                          struct kdbus_cmd_send *cmd_send);
-void kdbus_kmsg_free(struct kdbus_kmsg *kmsg);
+struct kdbus_staging *kdbus_staging_new_kernel(struct kdbus_bus *bus,
+                                              u64 dst, u64 cookie_timeout,
+                                              size_t it_size, size_t it_type);
+struct kdbus_staging *kdbus_staging_new_user(struct kdbus_bus *bus,
+                                            struct kdbus_cmd_send *cmd,
+                                            struct kdbus_msg *msg);
+struct kdbus_staging *kdbus_staging_free(struct kdbus_staging *staging);
+struct kdbus_pool_slice *kdbus_staging_emit(struct kdbus_staging *staging,
+                                           struct kdbus_conn *src,
+                                           struct kdbus_conn *dst);
 
 #endif
index 501bebd..71ca475 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/uidgid.h>
 #include <linux/uio.h>
 #include <linux/user_namespace.h>
-#include <linux/version.h>
 
 #include "bus.h"
 #include "connection.h"
  * @lock:              Object lock
  * @collected:         Bitmask of collected items
  * @valid:             Bitmask of collected and valid items
- * @uid:               UID of process
- * @euid:              EUID of process
- * @suid:              SUID of process
- * @fsuid:             FSUID of process
- * @gid:               GID of process
- * @egid:              EGID of process
- * @sgid:              SGID of process
- * @fsgid:             FSGID of process
+ * @cred:              Credentials
  * @pid:               PID of process
  * @tgid:              TGID of process
  * @ppid:              PPID of process
- * @auxgrps:           Auxiliary groups
- * @n_auxgrps:         Number of items in @auxgrps
  * @tid_comm:          TID comm line
  * @pid_comm:          PID comm line
  * @exe_path:          Executable path
  * @root_path:         Root-FS path
  * @cmdline:           Command-line
  * @cgroup:            Full cgroup path
- * @caps:              Capabilities
- * @caps_namespace:    User-namespace of @caps
  * @seclabel:          Seclabel
  * @audit_loginuid:    Audit login-UID
  * @audit_sessionid:   Audit session-ID
@@ -77,18 +65,15 @@ struct kdbus_meta_proc {
        u64 valid;
 
        /* KDBUS_ITEM_CREDS */
-       kuid_t uid, euid, suid, fsuid;
-       kgid_t gid, egid, sgid, fsgid;
+       /* KDBUS_ITEM_AUXGROUPS */
+       /* KDBUS_ITEM_CAPS */
+       const struct cred *cred;
 
        /* KDBUS_ITEM_PIDS */
        struct pid *pid;
        struct pid *tgid;
        struct pid *ppid;
 
-       /* KDBUS_ITEM_AUXGROUPS */
-       kgid_t *auxgrps;
-       size_t n_auxgrps;
-
        /* KDBUS_ITEM_TID_COMM */
        char tid_comm[TASK_COMM_LEN];
        /* KDBUS_ITEM_PID_COMM */
@@ -104,16 +89,6 @@ struct kdbus_meta_proc {
        /* KDBUS_ITEM_CGROUP */
        char *cgroup;
 
-       /* KDBUS_ITEM_CAPS */
-       struct caps {
-               /* binary compatible to kdbus_caps */
-               u32 last_cap;
-               struct {
-                       u32 caps[_KERNEL_CAPABILITY_U32S];
-               } set[4];
-       } caps;
-       struct user_namespace *caps_namespace;
-
        /* KDBUS_ITEM_SECLABEL */
        char *seclabel;
 
@@ -150,6 +125,14 @@ struct kdbus_meta_conn {
        char *conn_description;
 };
 
+/* fixed size equivalent of "kdbus_caps" */
+struct kdbus_meta_caps {
+       u32 last_cap;
+       struct {
+               u32 caps[_KERNEL_CAPABILITY_U32S];
+       } set[4];
+};
+
 /**
  * kdbus_meta_proc_new() - Create process metadata object
  *
@@ -176,13 +159,13 @@ static void kdbus_meta_proc_free(struct kref *kref)
 
        path_put(&mp->exe_path);
        path_put(&mp->root_path);
-       put_user_ns(mp->caps_namespace);
+       if (mp->cred)
+               put_cred(mp->cred);
        put_pid(mp->ppid);
        put_pid(mp->tgid);
        put_pid(mp->pid);
 
        kfree(mp->seclabel);
-       kfree(mp->auxgrps);
        kfree(mp->cmdline);
        kfree(mp->cgroup);
        kfree(mp);
@@ -214,21 +197,6 @@ struct kdbus_meta_proc *kdbus_meta_proc_unref(struct kdbus_meta_proc *mp)
        return NULL;
 }
 
-static void kdbus_meta_proc_collect_creds(struct kdbus_meta_proc *mp)
-{
-       mp->uid         = current_uid();
-       mp->euid        = current_euid();
-       mp->suid        = current_suid();
-       mp->fsuid       = current_fsuid();
-
-       mp->gid         = current_gid();
-       mp->egid        = current_egid();
-       mp->sgid        = current_sgid();
-       mp->fsgid       = current_fsgid();
-
-       mp->valid |= KDBUS_ATTACH_CREDS;
-}
-
 static void kdbus_meta_proc_collect_pids(struct kdbus_meta_proc *mp)
 {
        struct task_struct *parent;
@@ -244,32 +212,6 @@ static void kdbus_meta_proc_collect_pids(struct kdbus_meta_proc *mp)
        mp->valid |= KDBUS_ATTACH_PIDS;
 }
 
-static int kdbus_meta_proc_collect_auxgroups(struct kdbus_meta_proc *mp)
-{
-       struct group_info *info;
-       size_t i;
-
-       info = get_current_groups();
-
-       if (info->ngroups > 0) {
-               mp->auxgrps = kmalloc_array(info->ngroups, sizeof(kgid_t),
-                                           GFP_KERNEL);
-               if (!mp->auxgrps) {
-                       put_group_info(info);
-                       return -ENOMEM;
-               }
-
-               for (i = 0; i < info->ngroups; i++)
-                       mp->auxgrps[i] = GROUP_AT(info, i);
-       }
-
-       mp->n_auxgrps = info->ngroups;
-       put_group_info(info);
-       mp->valid |= KDBUS_ATTACH_AUXGROUPS;
-
-       return 0;
-}
-
 static void kdbus_meta_proc_collect_tid_comm(struct kdbus_meta_proc *mp)
 {
        get_task_comm(mp->tid_comm, current);
@@ -284,42 +226,29 @@ static void kdbus_meta_proc_collect_pid_comm(struct kdbus_meta_proc *mp)
 
 static void kdbus_meta_proc_collect_exe(struct kdbus_meta_proc *mp)
 {
-       struct mm_struct *mm;
-
-       mm = get_task_mm(current);
-       if (!mm)
-               return;
+       struct file *exe_file;
 
-       down_read(&mm->mmap_sem);
-       if (mm->exe_file) {
-               mp->exe_path = mm->exe_file->f_path;
+       rcu_read_lock();
+       exe_file = rcu_dereference(current->mm->exe_file);
+       if (exe_file) {
+               mp->exe_path = exe_file->f_path;
                path_get(&mp->exe_path);
                get_fs_root(current->fs, &mp->root_path);
                mp->valid |= KDBUS_ATTACH_EXE;
        }
-       up_read(&mm->mmap_sem);
-
-       mmput(mm);
+       rcu_read_unlock();
 }
 
 static int kdbus_meta_proc_collect_cmdline(struct kdbus_meta_proc *mp)
 {
-       struct mm_struct *mm;
+       struct mm_struct *mm = current->mm;
        char *cmdline;
 
-       mm = get_task_mm(current);
-       if (!mm)
-               return 0;
-
-       if (mm->arg_start >= mm->arg_end) {
-               mmput(mm);
+       if (!mm->arg_end)
                return 0;
-       }
 
        cmdline = strndup_user((const char __user *)mm->arg_start,
                               mm->arg_end - mm->arg_start);
-       mmput(mm);
-
        if (IS_ERR(cmdline))
                return PTR_ERR(cmdline);
 
@@ -355,30 +284,6 @@ static int kdbus_meta_proc_collect_cgroup(struct kdbus_meta_proc *mp)
        return 0;
 }
 
-static void kdbus_meta_proc_collect_caps(struct kdbus_meta_proc *mp)
-{
-       const struct cred *c = current_cred();
-       int i;
-
-       /* ABI: "last_cap" equals /proc/sys/kernel/cap_last_cap */
-       mp->caps.last_cap = CAP_LAST_CAP;
-       mp->caps_namespace = get_user_ns(current_user_ns());
-
-       CAP_FOR_EACH_U32(i) {
-               mp->caps.set[0].caps[i] = c->cap_inheritable.cap[i];
-               mp->caps.set[1].caps[i] = c->cap_permitted.cap[i];
-               mp->caps.set[2].caps[i] = c->cap_effective.cap[i];
-               mp->caps.set[3].caps[i] = c->cap_bset.cap[i];
-       }
-
-       /* clear unused bits */
-       for (i = 0; i < 4; i++)
-               mp->caps.set[i].caps[CAP_TO_INDEX(CAP_LAST_CAP)] &=
-                                               CAP_LAST_U32_VALID_MASK;
-
-       mp->valid |= KDBUS_ATTACH_CAPS;
-}
-
 static int kdbus_meta_proc_collect_seclabel(struct kdbus_meta_proc *mp)
 {
 #ifdef CONFIG_SECURITY
@@ -445,10 +350,17 @@ int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what)
 
        mutex_lock(&mp->lock);
 
-       if ((what & KDBUS_ATTACH_CREDS) &&
-           !(mp->collected & KDBUS_ATTACH_CREDS)) {
-               kdbus_meta_proc_collect_creds(mp);
-               mp->collected |= KDBUS_ATTACH_CREDS;
+       /* creds, auxgrps and caps share "struct cred" as context */
+       {
+               const u64 m_cred = KDBUS_ATTACH_CREDS |
+                                  KDBUS_ATTACH_AUXGROUPS |
+                                  KDBUS_ATTACH_CAPS;
+
+               if ((what & m_cred) && !(mp->collected & m_cred)) {
+                       mp->cred = get_current_cred();
+                       mp->valid |= m_cred;
+                       mp->collected |= m_cred;
+               }
        }
 
        if ((what & KDBUS_ATTACH_PIDS) &&
@@ -457,14 +369,6 @@ int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what)
                mp->collected |= KDBUS_ATTACH_PIDS;
        }
 
-       if ((what & KDBUS_ATTACH_AUXGROUPS) &&
-           !(mp->collected & KDBUS_ATTACH_AUXGROUPS)) {
-               ret = kdbus_meta_proc_collect_auxgroups(mp);
-               if (ret < 0)
-                       goto exit_unlock;
-               mp->collected |= KDBUS_ATTACH_AUXGROUPS;
-       }
-
        if ((what & KDBUS_ATTACH_TID_COMM) &&
            !(mp->collected & KDBUS_ATTACH_TID_COMM)) {
                kdbus_meta_proc_collect_tid_comm(mp);
@@ -499,12 +403,6 @@ int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what)
                mp->collected |= KDBUS_ATTACH_CGROUP;
        }
 
-       if ((what & KDBUS_ATTACH_CAPS) &&
-           !(mp->collected & KDBUS_ATTACH_CAPS)) {
-               kdbus_meta_proc_collect_caps(mp);
-               mp->collected |= KDBUS_ATTACH_CAPS;
-       }
-
        if ((what & KDBUS_ATTACH_SECLABEL) &&
            !(mp->collected & KDBUS_ATTACH_SECLABEL)) {
                ret = kdbus_meta_proc_collect_seclabel(mp);
@@ -527,101 +425,116 @@ exit_unlock:
 }
 
 /**
- * kdbus_meta_proc_fake() - Fill process metadata from faked credentials
- * @mp:                Metadata
+ * kdbus_meta_fake_new() - Create fake metadata object
+ *
+ * Return: Pointer to new object on success, ERR_PTR on failure.
+ */
+struct kdbus_meta_fake *kdbus_meta_fake_new(void)
+{
+       struct kdbus_meta_fake *mf;
+
+       mf = kzalloc(sizeof(*mf), GFP_KERNEL);
+       if (!mf)
+               return ERR_PTR(-ENOMEM);
+
+       return mf;
+}
+
+/**
+ * kdbus_meta_fake_free() - Free fake metadata object
+ * @mf:                Fake metadata object
+ *
+ * Return: NULL
+ */
+struct kdbus_meta_fake *kdbus_meta_fake_free(struct kdbus_meta_fake *mf)
+{
+       if (mf) {
+               put_pid(mf->ppid);
+               put_pid(mf->tgid);
+               put_pid(mf->pid);
+               kfree(mf->seclabel);
+               kfree(mf);
+       }
+
+       return NULL;
+}
+
+/**
+ * kdbus_meta_fake_collect() - Fill fake metadata from faked credentials
+ * @mf:                Fake metadata object
  * @creds:     Creds to set, may be %NULL
  * @pids:      PIDs to set, may be %NULL
  * @seclabel:  Seclabel to set, may be %NULL
  *
  * This function takes information stored in @creds, @pids and @seclabel and
- * resolves them to kernel-representations, if possible. A call to this function
- * is considered an alternative to calling kdbus_meta_add_current(), which
- * derives the same information from the 'current' task.
- *
- * This call uses the current task's namespaces to resolve the given
- * information.
+ * resolves them to kernel-representations, if possible. This call uses the
+ * current task's namespaces to resolve the given information.
  *
- * Return: 0 on success, negative error number otherwise.
+ * Return: 0 on success, negative error code on failure.
  */
-int kdbus_meta_proc_fake(struct kdbus_meta_proc *mp,
-                        const struct kdbus_creds *creds,
-                        const struct kdbus_pids *pids,
-                        const char *seclabel)
+int kdbus_meta_fake_collect(struct kdbus_meta_fake *mf,
+                           const struct kdbus_creds *creds,
+                           const struct kdbus_pids *pids,
+                           const char *seclabel)
 {
-       int ret;
+       if (mf->valid)
+               return -EALREADY;
 
-       if (!mp)
-               return 0;
-
-       mutex_lock(&mp->lock);
-
-       if (creds && !(mp->collected & KDBUS_ATTACH_CREDS)) {
+       if (creds) {
                struct user_namespace *ns = current_user_ns();
 
-               mp->uid         = make_kuid(ns, creds->uid);
-               mp->euid        = make_kuid(ns, creds->euid);
-               mp->suid        = make_kuid(ns, creds->suid);
-               mp->fsuid       = make_kuid(ns, creds->fsuid);
-
-               mp->gid         = make_kgid(ns, creds->gid);
-               mp->egid        = make_kgid(ns, creds->egid);
-               mp->sgid        = make_kgid(ns, creds->sgid);
-               mp->fsgid       = make_kgid(ns, creds->fsgid);
-
-               if ((creds->uid   != (uid_t)-1 && !uid_valid(mp->uid))   ||
-                   (creds->euid  != (uid_t)-1 && !uid_valid(mp->euid))  ||
-                   (creds->suid  != (uid_t)-1 && !uid_valid(mp->suid))  ||
-                   (creds->fsuid != (uid_t)-1 && !uid_valid(mp->fsuid)) ||
-                   (creds->gid   != (gid_t)-1 && !gid_valid(mp->gid))   ||
-                   (creds->egid  != (gid_t)-1 && !gid_valid(mp->egid))  ||
-                   (creds->sgid  != (gid_t)-1 && !gid_valid(mp->sgid))  ||
-                   (creds->fsgid != (gid_t)-1 && !gid_valid(mp->fsgid))) {
-                       ret = -EINVAL;
-                       goto exit_unlock;
-               }
-
-               mp->valid |= KDBUS_ATTACH_CREDS;
-               mp->collected |= KDBUS_ATTACH_CREDS;
+               mf->uid         = make_kuid(ns, creds->uid);
+               mf->euid        = make_kuid(ns, creds->euid);
+               mf->suid        = make_kuid(ns, creds->suid);
+               mf->fsuid       = make_kuid(ns, creds->fsuid);
+
+               mf->gid         = make_kgid(ns, creds->gid);
+               mf->egid        = make_kgid(ns, creds->egid);
+               mf->sgid        = make_kgid(ns, creds->sgid);
+               mf->fsgid       = make_kgid(ns, creds->fsgid);
+
+               if ((creds->uid   != (uid_t)-1 && !uid_valid(mf->uid))   ||
+                   (creds->euid  != (uid_t)-1 && !uid_valid(mf->euid))  ||
+                   (creds->suid  != (uid_t)-1 && !uid_valid(mf->suid))  ||
+                   (creds->fsuid != (uid_t)-1 && !uid_valid(mf->fsuid)) ||
+                   (creds->gid   != (gid_t)-1 && !gid_valid(mf->gid))   ||
+                   (creds->egid  != (gid_t)-1 && !gid_valid(mf->egid))  ||
+                   (creds->sgid  != (gid_t)-1 && !gid_valid(mf->sgid))  ||
+                   (creds->fsgid != (gid_t)-1 && !gid_valid(mf->fsgid)))
+                       return -EINVAL;
+
+               mf->valid |= KDBUS_ATTACH_CREDS;
        }
 
-       if (pids && !(mp->collected & KDBUS_ATTACH_PIDS)) {
-               mp->pid = get_pid(find_vpid(pids->tid));
-               mp->tgid = get_pid(find_vpid(pids->pid));
-               mp->ppid = get_pid(find_vpid(pids->ppid));
-
-               if ((pids->tid != 0 && !mp->pid) ||
-                   (pids->pid != 0 && !mp->tgid) ||
-                   (pids->ppid != 0 && !mp->ppid)) {
-                       put_pid(mp->pid);
-                       put_pid(mp->tgid);
-                       put_pid(mp->ppid);
-                       mp->pid = NULL;
-                       mp->tgid = NULL;
-                       mp->ppid = NULL;
-                       ret = -EINVAL;
-                       goto exit_unlock;
+       if (pids) {
+               mf->pid = get_pid(find_vpid(pids->tid));
+               mf->tgid = get_pid(find_vpid(pids->pid));
+               mf->ppid = get_pid(find_vpid(pids->ppid));
+
+               if ((pids->tid != 0 && !mf->pid) ||
+                   (pids->pid != 0 && !mf->tgid) ||
+                   (pids->ppid != 0 && !mf->ppid)) {
+                       put_pid(mf->pid);
+                       put_pid(mf->tgid);
+                       put_pid(mf->ppid);
+                       mf->pid = NULL;
+                       mf->tgid = NULL;
+                       mf->ppid = NULL;
+                       return -EINVAL;
                }
 
-               mp->valid |= KDBUS_ATTACH_PIDS;
-               mp->collected |= KDBUS_ATTACH_PIDS;
+               mf->valid |= KDBUS_ATTACH_PIDS;
        }
 
-       if (seclabel && !(mp->collected & KDBUS_ATTACH_SECLABEL)) {
-               mp->seclabel = kstrdup(seclabel, GFP_KERNEL);
-               if (!mp->seclabel) {
-                       ret = -ENOMEM;
-                       goto exit_unlock;
-               }
+       if (seclabel) {
+               mf->seclabel = kstrdup(seclabel, GFP_KERNEL);
+               if (!mf->seclabel)
+                       return -ENOMEM;
 
-               mp->valid |= KDBUS_ATTACH_SECLABEL;
-               mp->collected |= KDBUS_ATTACH_SECLABEL;
+               mf->valid |= KDBUS_ATTACH_SECLABEL;
        }
 
-       ret = 0;
-
-exit_unlock:
-       mutex_unlock(&mp->lock);
-       return ret;
+       return 0;
 }
 
 /**
@@ -676,13 +589,13 @@ struct kdbus_meta_conn *kdbus_meta_conn_unref(struct kdbus_meta_conn *mc)
 }
 
 static void kdbus_meta_conn_collect_timestamp(struct kdbus_meta_conn *mc,
-                                             struct kdbus_kmsg *kmsg)
+                                             u64 msg_seqnum)
 {
        mc->ts.monotonic_ns = ktime_get_ns();
        mc->ts.realtime_ns = ktime_get_real_ns();
 
-       if (kmsg)
-               mc->ts.seqnum = kmsg->seq;
+       if (msg_seqnum)
+               mc->ts.seqnum = msg_seqnum;
 
        mc->valid |= KDBUS_ATTACH_TIMESTAMP;
 }
@@ -690,38 +603,46 @@ static void kdbus_meta_conn_collect_timestamp(struct kdbus_meta_conn *mc,
 static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc,
                                         struct kdbus_conn *conn)
 {
-       const struct kdbus_name_entry *e;
+       const struct kdbus_name_owner *owner;
        struct kdbus_item *item;
        size_t slen, size;
 
        lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
 
        size = 0;
-       list_for_each_entry(e, &conn->names_list, conn_entry)
-               size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_name) +
-                                       strlen(e->name) + 1);
+       /* open-code length calculation to avoid final padding */
+       list_for_each_entry(owner, &conn->names_list, conn_entry)
+               if (!(owner->flags & KDBUS_NAME_IN_QUEUE))
+                       size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE +
+                               sizeof(struct kdbus_name) +
+                               strlen(owner->name->name) + 1;
 
        if (!size)
                return 0;
 
-       item = kmalloc(size, GFP_KERNEL);
+       /* make sure we include zeroed padding for convenience helpers */
+       item = kmalloc(KDBUS_ALIGN8(size), GFP_KERNEL);
        if (!item)
                return -ENOMEM;
 
        mc->owned_names_items = item;
        mc->owned_names_size = size;
 
-       list_for_each_entry(e, &conn->names_list, conn_entry) {
-               slen = strlen(e->name) + 1;
+       list_for_each_entry(owner, &conn->names_list, conn_entry) {
+               if (owner->flags & KDBUS_NAME_IN_QUEUE)
+                       continue;
+
+               slen = strlen(owner->name->name) + 1;
                kdbus_item_set(item, KDBUS_ITEM_OWNED_NAME, NULL,
                               sizeof(struct kdbus_name) + slen);
-               item->name.flags = e->flags;
-               memcpy(item->name.name, e->name, slen);
+               item->name.flags = owner->flags;
+               memcpy(item->name.name, owner->name->name, slen);
                item = KDBUS_ITEM_NEXT(item);
        }
 
        /* sanity check: the buffer should be completely written now */
-       WARN_ON((u8 *)item != (u8 *)mc->owned_names_items + size);
+       WARN_ON((u8 *)item !=
+                       (u8 *)mc->owned_names_items + KDBUS_ALIGN8(size));
 
        mc->valid |= KDBUS_ATTACH_NAMES;
        return 0;
@@ -744,11 +665,12 @@ static int kdbus_meta_conn_collect_description(struct kdbus_meta_conn *mc,
 /**
  * kdbus_meta_conn_collect() - Collect connection metadata
  * @mc:                Message metadata object
- * @kmsg:      Kmsg to collect data from
  * @conn:      Connection to collect data from
+ * @msg_seqnum:        Sequence number of the message to send
  * @what:      Attach flags to collect
  *
- * This collects connection metadata from @kmsg and @conn and saves it in @mc.
+ * This collects connection metadata from @msg_seqnum and @conn and saves it
+ * in @mc.
  *
  * If KDBUS_ATTACH_NAMES is set in @what and @conn is non-NULL, the caller must
  * hold the name-registry read-lock of conn->ep->bus->registry.
@@ -756,9 +678,8 @@ static int kdbus_meta_conn_collect_description(struct kdbus_meta_conn *mc,
  * Return: 0 on success, negative error code on failure.
  */
 int kdbus_meta_conn_collect(struct kdbus_meta_conn *mc,
-                           struct kdbus_kmsg *kmsg,
                            struct kdbus_conn *conn,
-                           u64 what)
+                           u64 msg_seqnum, u64 what)
 {
        int ret;
 
@@ -769,9 +690,9 @@ int kdbus_meta_conn_collect(struct kdbus_meta_conn *mc,
 
        mutex_lock(&mc->lock);
 
-       if (kmsg && (what & KDBUS_ATTACH_TIMESTAMP) &&
+       if (msg_seqnum && (what & KDBUS_ATTACH_TIMESTAMP) &&
            !(mc->collected & KDBUS_ATTACH_TIMESTAMP)) {
-               kdbus_meta_conn_collect_timestamp(mc, kmsg);
+               kdbus_meta_conn_collect_timestamp(mc, msg_seqnum);
                mc->collected |= KDBUS_ATTACH_TIMESTAMP;
        }
 
@@ -798,258 +719,392 @@ exit_unlock:
        return ret;
 }
 
-/*
- * kdbus_meta_export_prepare() - Prepare metadata for export
- * @mp:                Process metadata, or NULL
- * @mc:                Connection metadata, or NULL
- * @mask:      Pointer to mask of KDBUS_ATTACH_* flags to export
- * @sz:                Pointer to return the size needed by the metadata
- *
- * Does a conservative calculation of how much space metadata information
- * will take up during export. It is 'conservative' because for string
- * translations in namespaces, it will use the kernel namespaces, which is
- * the longest possible version.
- *
- * The actual size consumed by kdbus_meta_export() may hence vary from the
- * one reported here, but it is guaranteed never to be greater.
- *
- * Return: 0 on success, negative error number otherwise.
- */
-int kdbus_meta_export_prepare(struct kdbus_meta_proc *mp,
-                             struct kdbus_meta_conn *mc,
-                             u64 *mask, size_t *sz)
+static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
+                                  const struct kdbus_meta_proc *mp,
+                                  struct user_namespace *user_ns)
 {
-       char *exe_pathname = NULL;
-       void *exe_page = NULL;
-       size_t size = 0;
-       u64 valid = 0;
-       int ret = 0;
+       struct user_namespace *iter;
+       const struct cred *cred = mp->cred;
+       bool parent = false, owner = false;
+       int i;
 
-       if (mp) {
-               mutex_lock(&mp->lock);
-               valid |= mp->valid;
-               mutex_unlock(&mp->lock);
+       /*
+        * This translates the effective capabilities of 'cred' into the given
+        * user-namespace. If the given user-namespace is a child-namespace of
+        * the user-namespace of 'cred', the mask can be copied verbatim. If
+        * not, the mask is cleared.
+        * There's one exception: If 'cred' is the owner of any user-namespace
+        * in the path between the given user-namespace and the user-namespace
+        * of 'cred', then it has all effective capabilities set. This means,
+        * the user who created a user-namespace always has all effective
+        * capabilities in any child namespaces. Note that this is based on the
+        * uid of the namespace creator, not the task hierarchy.
+        */
+       for (iter = user_ns; iter; iter = iter->parent) {
+               if (iter == cred->user_ns) {
+                       parent = true;
+                       break;
+               }
+
+               if (iter == &init_user_ns)
+                       break;
+
+               if ((iter->parent == cred->user_ns) &&
+                   uid_eq(iter->owner, cred->euid)) {
+                       owner = true;
+                       break;
+               }
        }
 
-       if (mc) {
-               mutex_lock(&mc->lock);
-               valid |= mc->valid;
-               mutex_unlock(&mc->lock);
+       out->last_cap = CAP_LAST_CAP;
+
+       CAP_FOR_EACH_U32(i) {
+               if (parent) {
+                       out->set[0].caps[i] = cred->cap_inheritable.cap[i];
+                       out->set[1].caps[i] = cred->cap_permitted.cap[i];
+                       out->set[2].caps[i] = cred->cap_effective.cap[i];
+                       out->set[3].caps[i] = cred->cap_bset.cap[i];
+               } else if (owner) {
+                       out->set[0].caps[i] = 0U;
+                       out->set[1].caps[i] = ~0U;
+                       out->set[2].caps[i] = ~0U;
+                       out->set[3].caps[i] = ~0U;
+               } else {
+                       out->set[0].caps[i] = 0U;
+                       out->set[1].caps[i] = 0U;
+                       out->set[2].caps[i] = 0U;
+                       out->set[3].caps[i] = 0U;
+               }
        }
 
-       *mask &= valid;
-       *mask &= kdbus_meta_attach_mask;
+       /* clear unused bits */
+       for (i = 0; i < 4; i++)
+               out->set[i].caps[CAP_TO_INDEX(CAP_LAST_CAP)] &=
+                                       CAP_LAST_U32_VALID_MASK;
+}
 
-       if (!*mask)
-               goto exit;
+/* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself */
+static uid_t kdbus_from_kuid_keep(struct user_namespace *ns, kuid_t uid)
+{
+       return uid_valid(uid) ? from_kuid_munged(ns, uid) : ((uid_t)-1);
+}
+
+/* This is equivalent to from_kgid_munged(), but maps INVALID_GID to itself */
+static gid_t kdbus_from_kgid_keep(struct user_namespace *ns, kgid_t gid)
+{
+       return gid_valid(gid) ? from_kgid_munged(ns, gid) : ((gid_t)-1);
+}
+
+struct kdbus_meta_staging {
+       const struct kdbus_meta_proc *mp;
+       const struct kdbus_meta_fake *mf;
+       const struct kdbus_meta_conn *mc;
+       const struct kdbus_conn *conn;
+       u64 mask;
+
+       void *exe;
+       const char *exe_path;
+};
+
+static size_t kdbus_meta_measure(struct kdbus_meta_staging *staging)
+{
+       const struct kdbus_meta_proc *mp = staging->mp;
+       const struct kdbus_meta_fake *mf = staging->mf;
+       const struct kdbus_meta_conn *mc = staging->mc;
+       const u64 mask = staging->mask;
+       size_t size = 0;
 
        /* process metadata */
 
-       if (mp && (*mask & KDBUS_ATTACH_CREDS))
+       if (mf && (mask & KDBUS_ATTACH_CREDS))
+               size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_creds));
+       else if (mp && (mask & KDBUS_ATTACH_CREDS))
                size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_creds));
 
-       if (mp && (*mask & KDBUS_ATTACH_PIDS))
+       if (mf && (mask & KDBUS_ATTACH_PIDS))
+               size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_pids));
+       else if (mp && (mask & KDBUS_ATTACH_PIDS))
                size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_pids));
 
-       if (mp && (*mask & KDBUS_ATTACH_AUXGROUPS))
-               size += KDBUS_ITEM_SIZE(mp->n_auxgrps * sizeof(u64));
+       if (mp && (mask & KDBUS_ATTACH_AUXGROUPS))
+               size += KDBUS_ITEM_SIZE(mp->cred->group_info->ngroups *
+                                       sizeof(u64));
 
-       if (mp && (*mask & KDBUS_ATTACH_TID_COMM))
+       if (mp && (mask & KDBUS_ATTACH_TID_COMM))
                size += KDBUS_ITEM_SIZE(strlen(mp->tid_comm) + 1);
 
-       if (mp && (*mask & KDBUS_ATTACH_PID_COMM))
+       if (mp && (mask & KDBUS_ATTACH_PID_COMM))
                size += KDBUS_ITEM_SIZE(strlen(mp->pid_comm) + 1);
 
-       if (mp && (*mask & KDBUS_ATTACH_EXE)) {
-               exe_page = (void *)__get_free_page(GFP_TEMPORARY);
-               if (!exe_page) {
-                       ret = -ENOMEM;
-                       goto exit;
-               }
-
-               exe_pathname = d_path(&mp->exe_path, exe_page, PAGE_SIZE);
-               if (IS_ERR(exe_pathname)) {
-                       ret = PTR_ERR(exe_pathname);
-                       goto exit;
-               }
+       if (staging->exe_path && (mask & KDBUS_ATTACH_EXE))
+               size += KDBUS_ITEM_SIZE(strlen(staging->exe_path) + 1);
 
-               size += KDBUS_ITEM_SIZE(strlen(exe_pathname) + 1);
-               free_page((unsigned long)exe_page);
-       }
-
-       if (mp && (*mask & KDBUS_ATTACH_CMDLINE))
+       if (mp && (mask & KDBUS_ATTACH_CMDLINE))
                size += KDBUS_ITEM_SIZE(strlen(mp->cmdline) + 1);
 
-       if (mp && (*mask & KDBUS_ATTACH_CGROUP))
+       if (mp && (mask & KDBUS_ATTACH_CGROUP))
                size += KDBUS_ITEM_SIZE(strlen(mp->cgroup) + 1);
 
-       if (mp && (*mask & KDBUS_ATTACH_CAPS))
-               size += KDBUS_ITEM_SIZE(sizeof(mp->caps));
+       if (mp && (mask & KDBUS_ATTACH_CAPS))
+               size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_meta_caps));
 
-       if (mp && (*mask & KDBUS_ATTACH_SECLABEL))
+       if (mf && (mask & KDBUS_ATTACH_SECLABEL))
+               size += KDBUS_ITEM_SIZE(strlen(mf->seclabel) + 1);
+       else if (mp && (mask & KDBUS_ATTACH_SECLABEL))
                size += KDBUS_ITEM_SIZE(strlen(mp->seclabel) + 1);
 
-       if (mp && (*mask & KDBUS_ATTACH_AUDIT))
+       if (mp && (mask & KDBUS_ATTACH_AUDIT))
                size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_audit));
 
        /* connection metadata */
 
-       if (mc && (*mask & KDBUS_ATTACH_NAMES))
-               size += mc->owned_names_size;
+       if (mc && (mask & KDBUS_ATTACH_NAMES))
+               size += KDBUS_ALIGN8(mc->owned_names_size);
 
-       if (mc && (*mask & KDBUS_ATTACH_CONN_DESCRIPTION))
+       if (mc && (mask & KDBUS_ATTACH_CONN_DESCRIPTION))
                size += KDBUS_ITEM_SIZE(strlen(mc->conn_description) + 1);
 
-       if (mc && (*mask & KDBUS_ATTACH_TIMESTAMP))
+       if (mc && (mask & KDBUS_ATTACH_TIMESTAMP))
                size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_timestamp));
 
-exit:
-       *sz = size;
-
-       return ret;
+       return size;
 }
 
-static int kdbus_meta_push_kvec(struct kvec *kvec,
-                               struct kdbus_item_header *hdr,
-                               u64 type, void *payload,
-                               size_t payload_size, u64 *size)
+static struct kdbus_item *kdbus_write_head(struct kdbus_item **iter,
+                                          u64 type, u64 size)
 {
-       hdr->type = type;
-       hdr->size = KDBUS_ITEM_HEADER_SIZE + payload_size;
-       kdbus_kvec_set(kvec++, hdr, sizeof(*hdr), size);
-       kdbus_kvec_set(kvec++, payload, payload_size, size);
-       return 2 + !!kdbus_kvec_pad(kvec++, size);
-}
+       struct kdbus_item *item = *iter;
+       size_t padding;
 
-/* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself */
-static uid_t kdbus_from_kuid_keep(kuid_t uid)
-{
-       return uid_valid(uid) ?
-               from_kuid_munged(current_user_ns(), uid) : ((uid_t)-1);
+       item->type = type;
+       item->size = KDBUS_ITEM_HEADER_SIZE + size;
+
+       /* clear padding */
+       padding = KDBUS_ALIGN8(item->size) - item->size;
+       if (padding)
+               memset(item->data + size, 0, padding);
+
+       *iter = KDBUS_ITEM_NEXT(item);
+       return item;
 }
 
-/* This is equivalent to from_kgid_munged(), but maps INVALID_GID to itself */
-static gid_t kdbus_from_kgid_keep(kgid_t gid)
+static struct kdbus_item *kdbus_write_full(struct kdbus_item **iter,
+                                          u64 type, u64 size, const void *data)
 {
-       return gid_valid(gid) ?
-               from_kgid_munged(current_user_ns(), gid) : ((gid_t)-1);
+       struct kdbus_item *item;
+
+       item = kdbus_write_head(iter, type, size);
+       memcpy(item->data, data, size);
+       return item;
 }
 
-/**
- * kdbus_meta_export() - export information from metadata into a slice
- * @mp:                Process metadata, or NULL
- * @mc:                Connection metadata, or NULL
- * @mask:      Mask of KDBUS_ATTACH_* flags to export
- * @slice:     The slice to export to
- * @offset:    The offset inside @slice to write to
- * @real_size: The real size the metadata consumed
- *
- * This function exports information from metadata into @slice at offset
- * @offset inside that slice. Only information that is requested in @mask
- * and that has been collected before is exported.
- *
- * In order to make sure not to write out of bounds, @mask must be the same
- * value that was previously returned from kdbus_meta_export_prepare(). The
- * function will, however, not necessarily write as many bytes as returned by
- * kdbus_meta_export_prepare(); depending on the namespaces in question, it
- * might use up less than that.
- *
- * All information will be translated using the current namespaces.
- *
- * Return: 0 on success, negative error number otherwise.
- */
-int kdbus_meta_export(struct kdbus_meta_proc *mp,
-                     struct kdbus_meta_conn *mc,
-                     u64 mask,
-                     struct kdbus_pool_slice *slice,
-                     off_t offset,
-                     size_t *real_size)
+static size_t kdbus_meta_write(struct kdbus_meta_staging *staging, void *mem,
+                              size_t size)
 {
-       struct user_namespace *user_ns = current_user_ns();
-       struct kdbus_item_header item_hdr[13], *hdr;
-       char *exe_pathname = NULL;
-       struct kdbus_creds creds;
-       struct kdbus_pids pids;
-       void *exe_page = NULL;
-       struct kvec kvec[40];
-       u64 *auxgrps = NULL;
-       size_t cnt = 0;
-       u64 size = 0;
-       int ret = 0;
-
-       hdr = &item_hdr[0];
+       struct user_namespace *user_ns = staging->conn->cred->user_ns;
+       struct pid_namespace *pid_ns = ns_of_pid(staging->conn->pid);
+       struct kdbus_item *item = NULL, *items = mem;
+       u8 *end, *owned_names_end = NULL;
 
-       /*
-        * TODO: We currently have no sane way of translating a set of caps
-        * between different user namespaces. Until that changes, we have
-        * to drop such items.
-        */
-       if (mp && mp->caps_namespace != user_ns)
-               mask &= ~KDBUS_ATTACH_CAPS;
+       /* process metadata */
 
-       if (mask == 0) {
-               *real_size = 0;
-               return 0;
+       if (staging->mf && (staging->mask & KDBUS_ATTACH_CREDS)) {
+               const struct kdbus_meta_fake *mf = staging->mf;
+
+               item = kdbus_write_head(&items, KDBUS_ITEM_CREDS,
+                                       sizeof(struct kdbus_creds));
+               item->creds = (struct kdbus_creds){
+                       .uid    = kdbus_from_kuid_keep(user_ns, mf->uid),
+                       .euid   = kdbus_from_kuid_keep(user_ns, mf->euid),
+                       .suid   = kdbus_from_kuid_keep(user_ns, mf->suid),
+                       .fsuid  = kdbus_from_kuid_keep(user_ns, mf->fsuid),
+                       .gid    = kdbus_from_kgid_keep(user_ns, mf->gid),
+                       .egid   = kdbus_from_kgid_keep(user_ns, mf->egid),
+                       .sgid   = kdbus_from_kgid_keep(user_ns, mf->sgid),
+                       .fsgid  = kdbus_from_kgid_keep(user_ns, mf->fsgid),
+               };
+       } else if (staging->mp && (staging->mask & KDBUS_ATTACH_CREDS)) {
+               const struct cred *c = staging->mp->cred;
+
+               item = kdbus_write_head(&items, KDBUS_ITEM_CREDS,
+                                       sizeof(struct kdbus_creds));
+               item->creds = (struct kdbus_creds){
+                       .uid    = kdbus_from_kuid_keep(user_ns, c->uid),
+                       .euid   = kdbus_from_kuid_keep(user_ns, c->euid),
+                       .suid   = kdbus_from_kuid_keep(user_ns, c->suid),
+                       .fsuid  = kdbus_from_kuid_keep(user_ns, c->fsuid),
+                       .gid    = kdbus_from_kgid_keep(user_ns, c->gid),
+                       .egid   = kdbus_from_kgid_keep(user_ns, c->egid),
+                       .sgid   = kdbus_from_kgid_keep(user_ns, c->sgid),
+                       .fsgid  = kdbus_from_kgid_keep(user_ns, c->fsgid),
+               };
        }
 
-       /* process metadata */
+       if (staging->mf && (staging->mask & KDBUS_ATTACH_PIDS)) {
+               item = kdbus_write_head(&items, KDBUS_ITEM_PIDS,
+                                       sizeof(struct kdbus_pids));
+               item->pids = (struct kdbus_pids){
+                       .pid = pid_nr_ns(staging->mf->tgid, pid_ns),
+                       .tid = pid_nr_ns(staging->mf->pid, pid_ns),
+                       .ppid = pid_nr_ns(staging->mf->ppid, pid_ns),
+               };
+       } else if (staging->mp && (staging->mask & KDBUS_ATTACH_PIDS)) {
+               item = kdbus_write_head(&items, KDBUS_ITEM_PIDS,
+                                       sizeof(struct kdbus_pids));
+               item->pids = (struct kdbus_pids){
+                       .pid = pid_nr_ns(staging->mp->tgid, pid_ns),
+                       .tid = pid_nr_ns(staging->mp->pid, pid_ns),
+                       .ppid = pid_nr_ns(staging->mp->ppid, pid_ns),
+               };
+       }
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_AUXGROUPS)) {
+               const struct group_info *info = staging->mp->cred->group_info;
+               size_t i;
 
-       if (mp && (mask & KDBUS_ATTACH_CREDS)) {
-               creds.uid       = kdbus_from_kuid_keep(mp->uid);
-               creds.euid      = kdbus_from_kuid_keep(mp->euid);
-               creds.suid      = kdbus_from_kuid_keep(mp->suid);
-               creds.fsuid     = kdbus_from_kuid_keep(mp->fsuid);
-               creds.gid       = kdbus_from_kgid_keep(mp->gid);
-               creds.egid      = kdbus_from_kgid_keep(mp->egid);
-               creds.sgid      = kdbus_from_kgid_keep(mp->sgid);
-               creds.fsgid     = kdbus_from_kgid_keep(mp->fsgid);
-
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_CREDS,
-                                           &creds, sizeof(creds), &size);
+               item = kdbus_write_head(&items, KDBUS_ITEM_AUXGROUPS,
+                                       info->ngroups * sizeof(u64));
+               for (i = 0; i < info->ngroups; ++i)
+                       item->data64[i] = from_kgid_munged(user_ns,
+                                                          GROUP_AT(info, i));
        }
 
-       if (mp && (mask & KDBUS_ATTACH_PIDS)) {
-               pids.pid = pid_vnr(mp->tgid);
-               pids.tid = pid_vnr(mp->pid);
-               pids.ppid = pid_vnr(mp->ppid);
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_TID_COMM))
+               item = kdbus_write_full(&items, KDBUS_ITEM_TID_COMM,
+                                       strlen(staging->mp->tid_comm) + 1,
+                                       staging->mp->tid_comm);
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_PID_COMM))
+               item = kdbus_write_full(&items, KDBUS_ITEM_PID_COMM,
+                                       strlen(staging->mp->pid_comm) + 1,
+                                       staging->mp->pid_comm);
+
+       if (staging->exe_path && (staging->mask & KDBUS_ATTACH_EXE))
+               item = kdbus_write_full(&items, KDBUS_ITEM_EXE,
+                                       strlen(staging->exe_path) + 1,
+                                       staging->exe_path);
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_CMDLINE))
+               item = kdbus_write_full(&items, KDBUS_ITEM_CMDLINE,
+                                       strlen(staging->mp->cmdline) + 1,
+                                       staging->mp->cmdline);
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_CGROUP))
+               item = kdbus_write_full(&items, KDBUS_ITEM_CGROUP,
+                                       strlen(staging->mp->cgroup) + 1,
+                                       staging->mp->cgroup);
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_CAPS)) {
+               item = kdbus_write_head(&items, KDBUS_ITEM_CAPS,
+                                       sizeof(struct kdbus_meta_caps));
+               kdbus_meta_export_caps((void*)&item->caps, staging->mp,
+                                      user_ns);
+       }
 
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_PIDS,
-                                           &pids, sizeof(pids), &size);
+       if (staging->mf && (staging->mask & KDBUS_ATTACH_SECLABEL))
+               item = kdbus_write_full(&items, KDBUS_ITEM_SECLABEL,
+                                       strlen(staging->mf->seclabel) + 1,
+                                       staging->mf->seclabel);
+       else if (staging->mp && (staging->mask & KDBUS_ATTACH_SECLABEL))
+               item = kdbus_write_full(&items, KDBUS_ITEM_SECLABEL,
+                                       strlen(staging->mp->seclabel) + 1,
+                                       staging->mp->seclabel);
+
+       if (staging->mp && (staging->mask & KDBUS_ATTACH_AUDIT)) {
+               item = kdbus_write_head(&items, KDBUS_ITEM_AUDIT,
+                                       sizeof(struct kdbus_audit));
+               item->audit = (struct kdbus_audit){
+                       .loginuid = from_kuid(user_ns,
+                                             staging->mp->audit_loginuid),
+                       .sessionid = staging->mp->audit_sessionid,
+               };
        }
 
-       if (mp && (mask & KDBUS_ATTACH_AUXGROUPS)) {
-               size_t payload_size = mp->n_auxgrps * sizeof(u64);
-               int i;
+       /* connection metadata */
 
-               auxgrps = kmalloc(payload_size, GFP_KERNEL);
-               if (!auxgrps) {
-                       ret = -ENOMEM;
-                       goto exit;
-               }
+       if (staging->mc && (staging->mask & KDBUS_ATTACH_NAMES)) {
+               memcpy(items, staging->mc->owned_names_items,
+                      KDBUS_ALIGN8(staging->mc->owned_names_size));
+               owned_names_end = (u8 *)items + staging->mc->owned_names_size;
+               items = (void *)KDBUS_ALIGN8((unsigned long)owned_names_end);
+       }
+
+       if (staging->mc && (staging->mask & KDBUS_ATTACH_CONN_DESCRIPTION))
+               item = kdbus_write_full(&items, KDBUS_ITEM_CONN_DESCRIPTION,
+                               strlen(staging->mc->conn_description) + 1,
+                               staging->mc->conn_description);
 
-               for (i = 0; i < mp->n_auxgrps; i++)
-                       auxgrps[i] = from_kgid_munged(user_ns, mp->auxgrps[i]);
+       if (staging->mc && (staging->mask & KDBUS_ATTACH_TIMESTAMP))
+               item = kdbus_write_full(&items, KDBUS_ITEM_TIMESTAMP,
+                                       sizeof(staging->mc->ts),
+                                       &staging->mc->ts);
 
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_AUXGROUPS,
-                                           auxgrps, payload_size, &size);
+       /*
+        * Return real size (minus trailing padding). In case of 'owned_names'
+        * we cannot deduce it from item->size, so treat it special.
+        */
+
+       if (items == (void *)KDBUS_ALIGN8((unsigned long)owned_names_end))
+               end = owned_names_end;
+       else if (item)
+               end = (u8 *)item + item->size;
+       else
+               end = mem;
+
+       WARN_ON((u8 *)items - (u8 *)mem != size);
+       WARN_ON((void *)KDBUS_ALIGN8((unsigned long)end) != (void *)items);
+
+       return end - (u8 *)mem;
+}
+
+int kdbus_meta_emit(struct kdbus_meta_proc *mp,
+                   struct kdbus_meta_fake *mf,
+                   struct kdbus_meta_conn *mc,
+                   struct kdbus_conn *conn,
+                   u64 mask,
+                   struct kdbus_item **out_items,
+                   size_t *out_size)
+{
+       struct kdbus_meta_staging staging = {};
+       struct kdbus_item *items = NULL;
+       size_t size = 0;
+       int ret;
+
+       if (WARN_ON(mf && mp))
+               mp = NULL;
+
+       staging.mp = mp;
+       staging.mf = mf;
+       staging.mc = mc;
+       staging.conn = conn;
+
+       /* get mask of valid items */
+       if (mf)
+               staging.mask |= mf->valid;
+       if (mp) {
+               mutex_lock(&mp->lock);
+               staging.mask |= mp->valid;
+               mutex_unlock(&mp->lock);
+       }
+       if (mc) {
+               mutex_lock(&mc->lock);
+               staging.mask |= mc->valid;
+               mutex_unlock(&mc->lock);
        }
 
-       if (mp && (mask & KDBUS_ATTACH_TID_COMM))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_TID_COMM, mp->tid_comm,
-                                           strlen(mp->tid_comm) + 1, &size);
+       staging.mask &= mask;
 
-       if (mp && (mask & KDBUS_ATTACH_PID_COMM))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_PID_COMM, mp->pid_comm,
-                                           strlen(mp->pid_comm) + 1, &size);
+       if (!staging.mask) { /* bail out if nothing to do */
+               ret = 0;
+               goto exit;
+       }
 
-       if (mp && (mask & KDBUS_ATTACH_EXE)) {
+       /* EXE is special as it needs a temporary page to assemble */
+       if (mp && (staging.mask & KDBUS_ATTACH_EXE)) {
                struct path p;
 
                /*
-                * TODO: We need access to __d_path() so we can write the path
+                * XXX: We need access to __d_path() so we can write the path
                 * relative to conn->root_path. Once upstream, we need
                 * EXPORT_SYMBOL(__d_path) or an equivalent of d_path() that
                 * takes the root path directly. Until then, we drop this item
@@ -1057,103 +1112,236 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
                 */
 
                get_fs_root(current->fs, &p);
-               if (path_equal(&p, &mp->root_path)) {
-                       exe_page = (void *)__get_free_page(GFP_TEMPORARY);
-                       if (!exe_page) {
+               if (path_equal(&p, &conn->root_path)) {
+                       staging.exe = (void *)__get_free_page(GFP_TEMPORARY);
+                       if (!staging.exe) {
                                path_put(&p);
                                ret = -ENOMEM;
                                goto exit;
                        }
 
-                       exe_pathname = d_path(&mp->exe_path, exe_page,
-                                             PAGE_SIZE);
-                       if (IS_ERR(exe_pathname)) {
+                       staging.exe_path = d_path(&mp->exe_path, staging.exe,
+                                                 PAGE_SIZE);
+                       if (IS_ERR(staging.exe_path)) {
                                path_put(&p);
-                               ret = PTR_ERR(exe_pathname);
+                               ret = PTR_ERR(staging.exe_path);
                                goto exit;
                        }
-
-                       cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                                   KDBUS_ITEM_EXE,
-                                                   exe_pathname,
-                                                   strlen(exe_pathname) + 1,
-                                                   &size);
                }
                path_put(&p);
        }
 
-       if (mp && (mask & KDBUS_ATTACH_CMDLINE))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_CMDLINE, mp->cmdline,
-                                           strlen(mp->cmdline) + 1, &size);
+       size = kdbus_meta_measure(&staging);
+       if (!size) { /* bail out if nothing to do */
+               ret = 0;
+               goto exit;
+       }
 
-       if (mp && (mask & KDBUS_ATTACH_CGROUP))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_CGROUP, mp->cgroup,
-                                           strlen(mp->cgroup) + 1, &size);
+       items = kmalloc(size, GFP_KERNEL);
+       if (!items) {
+               ret = -ENOMEM;
+               goto exit;
+       }
 
-       if (mp && (mask & KDBUS_ATTACH_CAPS))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_CAPS, &mp->caps,
-                                           sizeof(mp->caps), &size);
-
-       if (mp && (mask & KDBUS_ATTACH_SECLABEL))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_SECLABEL, mp->seclabel,
-                                           strlen(mp->seclabel) + 1, &size);
-
-       if (mp && (mask & KDBUS_ATTACH_AUDIT)) {
-               struct kdbus_audit a = {
-                       .loginuid = from_kuid(user_ns, mp->audit_loginuid),
-                       .sessionid = mp->audit_sessionid,
-               };
+       size = kdbus_meta_write(&staging, items, size);
+       if (!size) {
+               kfree(items);
+               items = NULL;
+       }
+
+       ret = 0;
 
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_AUDIT,
-                                           &a, sizeof(a), &size);
+exit:
+       if (staging.exe)
+               free_page((unsigned long)staging.exe);
+       if (ret >= 0) {
+               *out_items = items;
+               *out_size = size;
        }
+       return ret;
+}
 
-       /* connection metadata */
+enum {
+       KDBUS_META_PROC_NONE,
+       KDBUS_META_PROC_NORMAL,
+};
 
-       if (mc && (mask & KDBUS_ATTACH_NAMES))
-               kdbus_kvec_set(&kvec[cnt++], mc->owned_names_items,
-                              mc->owned_names_size, &size);
+/**
+ * kdbus_proc_permission() - check /proc permissions on target pid
+ * @pid_ns:            namespace we operate in
+ * @cred:              credentials of requestor
+ * @target:            target process
+ *
+ * This checks whether a process with credentials @cred can access information
+ * of @target in the namespace @pid_ns. This tries to follow /proc permissions,
+ * but is slightly more restrictive.
+ *
+ * Return: The /proc access level (KDBUS_META_PROC_*) is returned.
+ */
+static unsigned int kdbus_proc_permission(const struct pid_namespace *pid_ns,
+                                         const struct cred *cred,
+                                         struct pid *target)
+{
+       if (pid_ns->hide_pid < 1)
+               return KDBUS_META_PROC_NORMAL;
 
-       if (mc && (mask & KDBUS_ATTACH_CONN_DESCRIPTION))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_CONN_DESCRIPTION,
-                                           mc->conn_description,
-                                           strlen(mc->conn_description) + 1,
-                                           &size);
+       /* XXX: we need groups_search() exported for aux-groups */
+       if (gid_eq(cred->egid, pid_ns->pid_gid))
+               return KDBUS_META_PROC_NORMAL;
 
-       if (mc && (mask & KDBUS_ATTACH_TIMESTAMP))
-               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
-                                           KDBUS_ITEM_TIMESTAMP, &mc->ts,
-                                           sizeof(mc->ts), &size);
+       /*
+        * XXX: If ptrace_may_access(PTRACE_MODE_READ) is granted, you can
+        * overwrite hide_pid. However, ptrace_may_access() only supports
+        * checking 'current', hence, we cannot use this here. But we
+        * simply decide to not support this override, so no need to worry.
+        */
 
-       ret = kdbus_pool_slice_copy_kvec(slice, offset, kvec, cnt, size);
-       *real_size = size;
+       return KDBUS_META_PROC_NONE;
+}
 
-exit:
-       kfree(auxgrps);
+/**
+ * kdbus_meta_proc_mask() - calculate which metadata would be visible to
+ *                         a connection via /proc
+ * @prv_pid:           pid of metadata provider
+ * @req_pid:           pid of metadata requestor
+ * @req_cred:          credentials of metadata reqeuestor
+ * @wanted:            metadata that is requested
+ *
+ * This checks which metadata items of @prv_pid can be read via /proc by the
+ * requestor @req_pid.
+ *
+ * Return: Set of metadata flags the requestor can see (limited by @wanted).
+ */
+static u64 kdbus_meta_proc_mask(struct pid *prv_pid,
+                               struct pid *req_pid,
+                               const struct cred *req_cred,
+                               u64 wanted)
+{
+       struct pid_namespace *prv_ns, *req_ns;
+       unsigned int proc;
 
-       if (exe_page)
-               free_page((unsigned long)exe_page);
+       prv_ns = ns_of_pid(prv_pid);
+       req_ns = ns_of_pid(req_pid);
 
-       return ret;
+       /*
+        * If the sender is not visible in the receiver namespace, then the
+        * receiver cannot access the sender via its own procfs. Hence, we do
+        * not attach any additional metadata.
+        */
+       if (!pid_nr_ns(prv_pid, req_ns))
+               return 0;
+
+       /*
+        * If the pid-namespace of the receiver has hide_pid set, it cannot see
+        * any process but its own. We shortcut this /proc permission check if
+        * provider and requestor are the same. If not, we perform rather
+        * expensive /proc permission checks.
+        */
+       if (prv_pid == req_pid)
+               proc = KDBUS_META_PROC_NORMAL;
+       else
+               proc = kdbus_proc_permission(req_ns, req_cred, prv_pid);
+
+       /* you need /proc access to read standard process attributes */
+       if (proc < KDBUS_META_PROC_NORMAL)
+               wanted &= ~(KDBUS_ATTACH_TID_COMM |
+                           KDBUS_ATTACH_PID_COMM |
+                           KDBUS_ATTACH_SECLABEL |
+                           KDBUS_ATTACH_CMDLINE |
+                           KDBUS_ATTACH_CGROUP |
+                           KDBUS_ATTACH_AUDIT |
+                           KDBUS_ATTACH_CAPS |
+                           KDBUS_ATTACH_EXE);
+
+       /* clear all non-/proc flags */
+       return wanted & (KDBUS_ATTACH_TID_COMM |
+                        KDBUS_ATTACH_PID_COMM |
+                        KDBUS_ATTACH_SECLABEL |
+                        KDBUS_ATTACH_CMDLINE |
+                        KDBUS_ATTACH_CGROUP |
+                        KDBUS_ATTACH_AUDIT |
+                        KDBUS_ATTACH_CAPS |
+                        KDBUS_ATTACH_EXE);
 }
 
 /**
- * kdbus_meta_calc_attach_flags() - calculate attach flags for a sender
- *                                 and a receiver
- * @sender:            Sending connection
- * @receiver:          Receiving connection
+ * kdbus_meta_get_mask() - calculate attach flags mask for metadata request
+ * @prv_pid:           pid of metadata provider
+ * @prv_mask:          mask of metadata the provide grants unchecked
+ * @req_pid:           pid of metadata requestor
+ * @req_cred:          credentials of metadata requestor
+ * @req_mask:          mask of metadata that is requested
  *
- * Return: the attach flags both the sender and the receiver have opted-in
- * for.
+ * This calculates the metadata items that the requestor @req_pid can access
+ * from the metadata provider @prv_pid. This permission check consists of
+ * several different parts:
+ *  - Providers can grant metadata items unchecked. Regardless of their type,
+ *    they're always granted to the requestor. This mask is passed as @prv_mask.
+ *  - Basic items (credentials and connection metadata) are granted implicitly
+ *    to everyone. They're publicly available to any bus-user that can see the
+ *    provider.
+ *  - Process credentials that are not granted implicitly follow the same
+ *    permission checks as /proc. This means, we always assume a requestor
+ *    process has access to their *own* /proc mount, if they have access to
+ *    kdbusfs.
+ *
+ * Return: Mask of metadata that is granted.
+ */
+static u64 kdbus_meta_get_mask(struct pid *prv_pid, u64 prv_mask,
+                              struct pid *req_pid,
+                              const struct cred *req_cred, u64 req_mask)
+{
+       u64 missing, impl_mask, proc_mask = 0;
+
+       /*
+        * Connection metadata and basic unix process credentials are
+        * transmitted implicitly, and cannot be suppressed. Both are required
+        * to perform user-space policies on the receiver-side. Furthermore,
+        * connection metadata is public state, anyway, and unix credentials
+        * are needed for UDS-compatibility. We extend them slightly by
+        * auxiliary groups and additional uids/gids/pids.
+        */
+       impl_mask = /* connection metadata */
+                   KDBUS_ATTACH_CONN_DESCRIPTION |
+                   KDBUS_ATTACH_TIMESTAMP |
+                   KDBUS_ATTACH_NAMES |
+                   /* credentials and pids */
+                   KDBUS_ATTACH_AUXGROUPS |
+                   KDBUS_ATTACH_CREDS |
+                   KDBUS_ATTACH_PIDS;
+
+       /*
+        * Calculate the set of metadata that is not granted implicitly nor by
+        * the sender, but still requested by the receiver. If any are left,
+        * perform rather expensive /proc access checks for them.
+        */
+       missing = req_mask & ~((prv_mask | impl_mask) & req_mask);
+       if (missing)
+               proc_mask = kdbus_meta_proc_mask(prv_pid, req_pid, req_cred,
+                                                missing);
+
+       return (prv_mask | impl_mask | proc_mask) & req_mask;
+}
+
+/**
+ */
+u64 kdbus_meta_info_mask(const struct kdbus_conn *conn, u64 mask)
+{
+       return kdbus_meta_get_mask(conn->pid,
+                                  atomic64_read(&conn->attach_flags_send),
+                                  task_pid(current),
+                                  current_cred(),
+                                  mask);
+}
+
+/**
  */
-u64 kdbus_meta_calc_attach_flags(const struct kdbus_conn *sender,
-                                const struct kdbus_conn *receiver)
+u64 kdbus_meta_msg_mask(const struct kdbus_conn *snd,
+                       const struct kdbus_conn *rcv)
 {
-       return atomic64_read(&sender->attach_flags_send) &
-              atomic64_read(&receiver->attach_flags_recv);
+       return kdbus_meta_get_mask(task_pid(current),
+                                  atomic64_read(&snd->attach_flags_send),
+                                  rcv->pid,
+                                  rcv->cred,
+                                  atomic64_read(&rcv->attach_flags_recv));
 }
index 0c977cd..dba7cc7 100644 (file)
 #define __KDBUS_METADATA_H
 
 #include <linux/kernel.h>
-#include <uapi/linux/kdbus.h>
 
 struct kdbus_conn;
-struct kdbus_kmsg;
 struct kdbus_pool_slice;
 
 struct kdbus_meta_proc;
 struct kdbus_meta_conn;
 
-extern unsigned long long kdbus_meta_attach_mask;
+/**
+ * struct kdbus_meta_fake - Fake metadata
+ * @valid:             Bitmask of collected and valid items
+ * @uid:               UID of process
+ * @euid:              EUID of process
+ * @suid:              SUID of process
+ * @fsuid:             FSUID of process
+ * @gid:               GID of process
+ * @egid:              EGID of process
+ * @sgid:              SGID of process
+ * @fsgid:             FSGID of process
+ * @pid:               PID of process
+ * @tgid:              TGID of process
+ * @ppid:              PPID of process
+ * @seclabel:          Seclabel
+ */
+struct kdbus_meta_fake {
+       u64 valid;
+
+       /* KDBUS_ITEM_CREDS */
+       kuid_t uid, euid, suid, fsuid;
+       kgid_t gid, egid, sgid, fsgid;
+
+       /* KDBUS_ITEM_PIDS */
+       struct pid *pid, *tgid, *ppid;
+
+       /* KDBUS_ITEM_SECLABEL */
+       char *seclabel;
+};
 
 struct kdbus_meta_proc *kdbus_meta_proc_new(void);
 struct kdbus_meta_proc *kdbus_meta_proc_ref(struct kdbus_meta_proc *mp);
 struct kdbus_meta_proc *kdbus_meta_proc_unref(struct kdbus_meta_proc *mp);
 int kdbus_meta_proc_collect(struct kdbus_meta_proc *mp, u64 what);
-int kdbus_meta_proc_fake(struct kdbus_meta_proc *mp,
-                        const struct kdbus_creds *creds,
-                        const struct kdbus_pids *pids,
-                        const char *seclabel);
+
+struct kdbus_meta_fake *kdbus_meta_fake_new(void);
+struct kdbus_meta_fake *kdbus_meta_fake_free(struct kdbus_meta_fake *mf);
+int kdbus_meta_fake_collect(struct kdbus_meta_fake *mf,
+                           const struct kdbus_creds *creds,
+                           const struct kdbus_pids *pids,
+                           const char *seclabel);
 
 struct kdbus_meta_conn *kdbus_meta_conn_new(void);
 struct kdbus_meta_conn *kdbus_meta_conn_ref(struct kdbus_meta_conn *mc);
 struct kdbus_meta_conn *kdbus_meta_conn_unref(struct kdbus_meta_conn *mc);
 int kdbus_meta_conn_collect(struct kdbus_meta_conn *mc,
-                           struct kdbus_kmsg *kmsg,
                            struct kdbus_conn *conn,
-                           u64 what);
-
-int kdbus_meta_export_prepare(struct kdbus_meta_proc *mp,
-                             struct kdbus_meta_conn *mc,
-                             u64 *mask, size_t *sz);
-int kdbus_meta_export(struct kdbus_meta_proc *mp,
-                     struct kdbus_meta_conn *mc,
-                     u64 mask,
-                     struct kdbus_pool_slice *slice,
-                     off_t offset, size_t *real_size);
-u64 kdbus_meta_calc_attach_flags(const struct kdbus_conn *sender,
-                                const struct kdbus_conn *receiver);
+                           u64 msg_seqnum, u64 what);
+
+int kdbus_meta_emit(struct kdbus_meta_proc *mp,
+                   struct kdbus_meta_fake *mf,
+                   struct kdbus_meta_conn *mc,
+                   struct kdbus_conn *conn,
+                   u64 mask,
+                   struct kdbus_item **out_items,
+                   size_t *out_size);
+u64 kdbus_meta_info_mask(const struct kdbus_conn *conn, u64 mask);
+u64 kdbus_meta_msg_mask(const struct kdbus_conn *snd,
+                       const struct kdbus_conn *rcv);
 
 #endif
index df99e4d..bf44ca3 100644 (file)
 #include "notify.h"
 #include "policy.h"
 
-struct kdbus_name_pending {
-       u64 flags;
-       struct kdbus_conn *conn;
-       struct kdbus_name_entry *name;
-       struct list_head conn_entry;
-       struct list_head name_entry;
-};
+#define KDBUS_NAME_SAVED_MASK (KDBUS_NAME_ALLOW_REPLACEMENT |  \
+                              KDBUS_NAME_QUEUE)
 
-static int kdbus_name_pending_new(struct kdbus_name_entry *e,
-                                 struct kdbus_conn *conn, u64 flags)
+static bool kdbus_name_owner_is_used(struct kdbus_name_owner *owner)
 {
-       struct kdbus_name_pending *p;
-
-       kdbus_conn_assert_active(conn);
-
-       p = kmalloc(sizeof(*p), GFP_KERNEL);
-       if (!p)
-               return -ENOMEM;
-
-       p->flags = flags;
-       p->conn = conn;
-       p->name = e;
-       list_add_tail(&p->conn_entry, &conn->names_queue_list);
-       list_add_tail(&p->name_entry, &e->queue);
-
-       return 0;
+       return !list_empty(&owner->name_entry) ||
+              owner == owner->name->activator;
 }
 
-static void kdbus_name_pending_free(struct kdbus_name_pending *p)
+static struct kdbus_name_owner *
+kdbus_name_owner_new(struct kdbus_conn *conn, struct kdbus_name_entry *name,
+                    u64 flags)
 {
-       if (!p)
-               return;
+       struct kdbus_name_owner *owner;
 
-       list_del(&p->name_entry);
-       list_del(&p->conn_entry);
-       kfree(p);
-}
-
-static struct kdbus_name_entry *
-kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash, const char *name)
-{
-       struct kdbus_name_entry *e;
-       size_t namelen;
+       kdbus_conn_assert_active(conn);
 
-       namelen = strlen(name);
+       if (conn->name_count >= KDBUS_CONN_MAX_NAMES)
+               return ERR_PTR(-E2BIG);
 
-       e = kmalloc(sizeof(*e) + namelen + 1, GFP_KERNEL);
-       if (!e)
+       owner = kmalloc(sizeof(*owner), GFP_KERNEL);
+       if (!owner)
                return ERR_PTR(-ENOMEM);
 
-       e->name_id = ++r->name_seq_last;
-       e->flags = 0;
-       e->conn = NULL;
-       e->activator = NULL;
-       INIT_LIST_HEAD(&e->queue);
-       INIT_LIST_HEAD(&e->conn_entry);
-       hash_add(r->entries_hash, &e->hentry, hash);
-       memcpy(e->name, name, namelen + 1);
+       owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+       owner->conn = conn;
+       owner->name = name;
+       list_add_tail(&owner->conn_entry, &conn->names_list);
+       INIT_LIST_HEAD(&owner->name_entry);
 
-       return e;
+       ++conn->name_count;
+       return owner;
 }
 
-static void kdbus_name_entry_free(struct kdbus_name_entry *e)
+static void kdbus_name_owner_free(struct kdbus_name_owner *owner)
 {
-       if (!e)
+       if (!owner)
                return;
 
-       WARN_ON(!list_empty(&e->conn_entry));
-       WARN_ON(!list_empty(&e->queue));
-       WARN_ON(e->activator);
-       WARN_ON(e->conn);
-
-       hash_del(&e->hentry);
-       kfree(e);
+       WARN_ON(kdbus_name_owner_is_used(owner));
+       --owner->conn->name_count;
+       list_del(&owner->conn_entry);
+       kfree(owner);
 }
 
-static void kdbus_name_entry_set_owner(struct kdbus_name_entry *e,
-                                      struct kdbus_conn *conn, u64 flags)
+static struct kdbus_name_owner *
+kdbus_name_owner_find(struct kdbus_name_entry *name, struct kdbus_conn *conn)
 {
-       WARN_ON(e->conn);
+       struct kdbus_name_owner *owner;
 
-       e->conn = kdbus_conn_ref(conn);
-       e->flags = flags;
-       atomic_inc(&conn->name_count);
-       list_add_tail(&e->conn_entry, &e->conn->names_list);
+       /*
+        * Use conn->names_list over name->queue to make sure boundaries of
+        * this linear search are controlled by the connection itself.
+        * Furthermore, this will find normal owners as well as activators
+        * without any additional code.
+        */
+       list_for_each_entry(owner, &conn->names_list, conn_entry)
+               if (owner->name == name)
+                       return owner;
+
+       return NULL;
 }
 
-static void kdbus_name_entry_remove_owner(struct kdbus_name_entry *e)
+static bool kdbus_name_entry_is_used(struct kdbus_name_entry *name)
 {
-       WARN_ON(!e->conn);
-
-       list_del_init(&e->conn_entry);
-       atomic_dec(&e->conn->name_count);
-       e->flags = 0;
-       e->conn = kdbus_conn_unref(e->conn);
+       return !list_empty(&name->queue) || name->activator;
 }
 
-static void kdbus_name_entry_replace_owner(struct kdbus_name_entry *e,
-                                          struct kdbus_conn *conn, u64 flags)
+static struct kdbus_name_owner *
+kdbus_name_entry_first(struct kdbus_name_entry *name)
 {
-       if (WARN_ON(!e->conn) || WARN_ON(conn == e->conn))
-               return;
-
-       kdbus_notify_name_change(conn->ep->bus, KDBUS_ITEM_NAME_CHANGE,
-                                e->conn->id, conn->id,
-                                e->flags, flags, e->name);
-       kdbus_name_entry_remove_owner(e);
-       kdbus_name_entry_set_owner(e, conn, flags);
+       return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+                                       name_entry);
 }
 
-/**
- * kdbus_name_is_valid() - check if a name is valid
- * @p:                 The name to check
- * @allow_wildcard:    Whether or not to allow a wildcard name
- *
- * A name is valid if all of the following criterias are met:
- *
- *  - The name has two or more elements separated by a period ('.') character.
- *  - All elements must contain at least one character.
- *  - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-"
- *    and must not begin with a digit.
- *  - The name must not exceed KDBUS_NAME_MAX_LEN.
- *  - If @allow_wildcard is true, the name may end on '.*'
- */
-bool kdbus_name_is_valid(const char *p, bool allow_wildcard)
+static struct kdbus_name_entry *
+kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash,
+                    const char *name_str)
 {
-       bool dot, found_dot = false;
-       const char *q;
+       struct kdbus_name_entry *name;
+       size_t namelen;
 
-       for (dot = true, q = p; *q; q++) {
-               if (*q == '.') {
-                       if (dot)
-                               return false;
+       lockdep_assert_held(&r->rwlock);
 
-                       found_dot = true;
-                       dot = true;
-               } else {
-                       bool good;
+       namelen = strlen(name_str);
 
-                       good = isalpha(*q) || (!dot && isdigit(*q)) ||
-                               *q == '_' || *q == '-' ||
-                               (allow_wildcard && dot &&
-                                       *q == '*' && *(q + 1) == '\0');
+       name = kmalloc(sizeof(*name) + namelen + 1, GFP_KERNEL);
+       if (!name)
+               return ERR_PTR(-ENOMEM);
 
-                       if (!good)
-                               return false;
+       name->name_id = ++r->name_seq_last;
+       name->activator = NULL;
+       INIT_LIST_HEAD(&name->queue);
+       hash_add(r->entries_hash, &name->hentry, hash);
+       memcpy(name->name, name_str, namelen + 1);
 
-                       dot = false;
-               }
-       }
+       return name;
+}
 
-       if (q - p > KDBUS_NAME_MAX_LEN)
-               return false;
+static void kdbus_name_entry_free(struct kdbus_name_entry *name)
+{
+       if (!name)
+               return;
 
-       if (dot)
-               return false;
+       WARN_ON(kdbus_name_entry_is_used(name));
+       hash_del(&name->hentry);
+       kfree(name);
+}
 
-       if (!found_dot)
-               return false;
+static struct kdbus_name_entry *
+kdbus_name_entry_find(struct kdbus_name_registry *r, u32 hash,
+                     const char *name_str)
+{
+       struct kdbus_name_entry *name;
 
-       return true;
+       lockdep_assert_held(&r->rwlock);
+
+       hash_for_each_possible(r->entries_hash, name, hentry, hash)
+               if (!strcmp(name->name, name_str))
+                       return name;
+
+       return NULL;
 }
 
 /**
@@ -218,32 +179,19 @@ struct kdbus_name_registry *kdbus_name_registry_new(void)
 }
 
 /**
- * kdbus_name_registry_free() - drop a name reg's reference
- * @reg:               The name registry, may be %NULL
+ * kdbus_name_registry_free() - free name registry
+ * @r:         name registry to free, or NULL
  *
- * Cleanup the name registry's internal structures.
+ * Free a name registry and cleanup all internal objects. This is a no-op if
+ * you pass NULL as registry.
  */
-void kdbus_name_registry_free(struct kdbus_name_registry *reg)
+void kdbus_name_registry_free(struct kdbus_name_registry *r)
 {
-       if (!reg)
+       if (!r)
                return;
 
-       WARN_ON(!hash_empty(reg->entries_hash));
-       kfree(reg);
-}
-
-static struct kdbus_name_entry *
-kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name)
-{
-       struct kdbus_name_entry *e;
-
-       lockdep_assert_held(&reg->rwlock);
-
-       hash_for_each_possible(reg->entries_hash, e, hentry, hash)
-               if (strcmp(e->name, name) == 0)
-                       return e;
-
-       return NULL;
+       WARN_ON(!hash_empty(r->entries_hash));
+       kfree(r);
 }
 
 /**
@@ -260,183 +208,286 @@ kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name)
 struct kdbus_name_entry *
 kdbus_name_lookup_unlocked(struct kdbus_name_registry *reg, const char *name)
 {
-       return kdbus_name_find(reg, kdbus_strhash(name), name);
+       return kdbus_name_entry_find(reg, kdbus_strhash(name), name);
 }
 
-/**
- * kdbus_name_acquire() - acquire a name
- * @reg:               The name registry
- * @conn:              The connection to pin this entry to
- * @name:              The name to acquire
- * @flags:             Acquisition flags (KDBUS_NAME_*)
- * @return_flags:      Pointer to return flags for the acquired name
- *                     (KDBUS_NAME_*), may be %NULL
- *
- * Callers must ensure that @conn is either a privileged bus user or has
- * sufficient privileges in the policy-db to own the well-known name @name.
- *
- * Return: 0 success, negative error number on failure.
- */
-int kdbus_name_acquire(struct kdbus_name_registry *reg,
-                      struct kdbus_conn *conn, const char *name,
-                      u64 flags, u64 *return_flags)
+static int kdbus_name_become_activator(struct kdbus_name_owner *owner,
+                                      u64 *return_flags)
 {
-       struct kdbus_name_entry *e;
-       u64 rflags = 0;
-       int ret = 0;
-       u32 hash;
+       if (kdbus_name_owner_is_used(owner))
+               return -EALREADY;
+       if (owner->name->activator)
+               return -EEXIST;
 
-       kdbus_conn_assert_active(conn);
+       owner->name->activator = owner;
+       owner->flags |= KDBUS_NAME_ACTIVATOR;
 
-       down_write(&reg->rwlock);
-
-       if (!kdbus_conn_policy_own_name(conn, current_cred(), name)) {
-               ret = -EPERM;
-               goto exit_unlock;
+       if (kdbus_name_entry_first(owner->name)) {
+               owner->flags |= KDBUS_NAME_IN_QUEUE;
+       } else {
+               owner->flags |= KDBUS_NAME_PRIMARY;
+               kdbus_notify_name_change(owner->conn->ep->bus,
+                                        KDBUS_ITEM_NAME_ADD,
+                                        0, owner->conn->id,
+                                        0, owner->flags,
+                                        owner->name->name);
        }
 
-       hash = kdbus_strhash(name);
-       e = kdbus_name_find(reg, hash, name);
-       if (!e) {
-               /* claim new name */
+       if (return_flags)
+               *return_flags = owner->flags | KDBUS_NAME_ACQUIRED;
 
-               if (conn->activator_of) {
-                       ret = -EINVAL;
-                       goto exit_unlock;
-               }
+       return 0;
+}
 
-               e = kdbus_name_entry_new(reg, hash, name);
-               if (IS_ERR(e)) {
-                       ret = PTR_ERR(e);
-                       goto exit_unlock;
-               }
+static int kdbus_name_update(struct kdbus_name_owner *owner, u64 flags,
+                            u64 *return_flags)
+{
+       struct kdbus_name_owner *primary, *activator;
+       struct kdbus_name_entry *name;
+       struct kdbus_bus *bus;
+       u64 nflags = 0;
+       int ret = 0;
 
-               if (kdbus_conn_is_activator(conn)) {
-                       e->activator = kdbus_conn_ref(conn);
-                       conn->activator_of = e;
+       name = owner->name;
+       bus = owner->conn->ep->bus;
+       primary = kdbus_name_entry_first(name);
+       activator = name->activator;
+
+       /* cannot be activator and acquire a name */
+       if (owner == activator)
+               return -EUCLEAN;
+
+       /* update saved flags */
+       owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+
+       if (!primary) {
+               /*
+                * No primary owner (but maybe an activator). Take over the
+                * name.
+                */
+
+               list_add(&owner->name_entry, &name->queue);
+               owner->flags |= KDBUS_NAME_PRIMARY;
+               nflags |= KDBUS_NAME_ACQUIRED;
+
+               /* move messages to new owner on activation */
+               if (activator) {
+                       kdbus_conn_move_messages(owner->conn, activator->conn,
+                                                name->name_id);
+                       kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+                                       activator->conn->id, owner->conn->id,
+                                       activator->flags, owner->flags,
+                                       name->name);
+                       activator->flags &= ~KDBUS_NAME_PRIMARY;
+                       activator->flags |= KDBUS_NAME_IN_QUEUE;
+               } else {
+                       kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_ADD,
+                                                0, owner->conn->id,
+                                                0, owner->flags,
+                                                name->name);
                }
 
-               kdbus_name_entry_set_owner(e, conn, flags);
-               kdbus_notify_name_change(e->conn->ep->bus, KDBUS_ITEM_NAME_ADD,
-                                        0, e->conn->id, 0, e->flags, e->name);
-       } else if (e->conn == conn || e == conn->activator_of) {
-               /* connection already owns that name */
-               ret = -EALREADY;
-       } else if (kdbus_conn_is_activator(conn)) {
-               /* activator claims existing name */
-
-               if (conn->activator_of) {
-                       ret = -EINVAL; /* multiple names not allowed */
-               } else if (e->activator) {
-                       ret = -EEXIST; /* only one activator per name */
+       } else if (owner == primary) {
+               /*
+                * Already the primary owner of the name, flags were already
+                * updated. Nothing to do.
+                */
+
+               owner->flags |= KDBUS_NAME_PRIMARY;
+
+       } else if ((primary->flags & KDBUS_NAME_ALLOW_REPLACEMENT) &&
+                  (flags & KDBUS_NAME_REPLACE_EXISTING)) {
+               /*
+                * We're not the primary owner but can replace it. Move us
+                * ahead of the primary owner and acquire the name (possibly
+                * skipping queued owners ahead of us).
+                */
+
+               list_del_init(&owner->name_entry);
+               list_add(&owner->name_entry, &name->queue);
+               owner->flags |= KDBUS_NAME_PRIMARY;
+               nflags |= KDBUS_NAME_ACQUIRED;
+
+               kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+                                        primary->conn->id, owner->conn->id,
+                                        primary->flags, owner->flags,
+                                        name->name);
+
+               /* requeue old primary, or drop if queueing not wanted */
+               if (primary->flags & KDBUS_NAME_QUEUE) {
+                       primary->flags &= ~KDBUS_NAME_PRIMARY;
+                       primary->flags |= KDBUS_NAME_IN_QUEUE;
                } else {
-                       e->activator = kdbus_conn_ref(conn);
-                       conn->activator_of = e;
-               }
-       } else if (e->flags & KDBUS_NAME_ACTIVATOR) {
-               /* claim name of an activator */
-
-               kdbus_conn_move_messages(conn, e->activator, 0);
-               kdbus_name_entry_replace_owner(e, conn, flags);
-       } else if ((flags & KDBUS_NAME_REPLACE_EXISTING) &&
-                  (e->flags & KDBUS_NAME_ALLOW_REPLACEMENT)) {
-               /* claim name of a previous owner */
-
-               if (e->flags & KDBUS_NAME_QUEUE) {
-                       /* move owner back to queue if they asked for it */
-                       ret = kdbus_name_pending_new(e, e->conn, e->flags);
-                       if (ret < 0)
-                               goto exit_unlock;
+                       list_del_init(&primary->name_entry);
+                       kdbus_name_owner_free(primary);
                }
 
-               kdbus_name_entry_replace_owner(e, conn, flags);
        } else if (flags & KDBUS_NAME_QUEUE) {
-               /* add to waiting-queue of the name */
+               /*
+                * Name is already occupied and we cannot take it over, but
+                * queuing is allowed. Put us silently on the queue, if not
+                * already there.
+                */
+
+               owner->flags |= KDBUS_NAME_IN_QUEUE;
+               if (!kdbus_name_owner_is_used(owner)) {
+                       list_add_tail(&owner->name_entry, &name->queue);
+                       nflags |= KDBUS_NAME_ACQUIRED;
+               }
+       } else if (kdbus_name_owner_is_used(owner)) {
+               /*
+                * Already queued on name, but re-queueing was not requested.
+                * Make sure to unlink it from the name, the caller is
+                * responsible for releasing it.
+                */
+
+               list_del_init(&owner->name_entry);
+       } else {
+               /*
+                * Name is already claimed and queueing is not requested.
+                * Return error to the caller.
+                */
+
+               ret = -EEXIST;
+       }
 
-               struct kdbus_name_pending *p;
-               bool in_queue = false;
+       if (return_flags)
+               *return_flags = owner->flags | nflags;
 
-               list_for_each_entry(p, &e->queue, name_entry) {
-                       if (p->conn == conn) {
-                               /* connection is already queued */
-                               rflags |= KDBUS_NAME_IN_QUEUE;
-                               in_queue = true;
-                               break;
-                       }
+       return ret;
+}
+
+int kdbus_name_acquire(struct kdbus_name_registry *reg,
+                      struct kdbus_conn *conn, const char *name_str,
+                      u64 flags, u64 *return_flags)
+{
+       struct kdbus_name_entry *name = NULL;
+       struct kdbus_name_owner *owner = NULL;
+       u32 hash;
+       int ret;
+
+       kdbus_conn_assert_active(conn);
+
+       down_write(&reg->rwlock);
+
+       /*
+        * Verify the connection has access to the name. Do this before testing
+        * for double-acquisitions and other errors to make sure we do not leak
+        * information about this name through possible custom endpoints.
+        */
+       if (!kdbus_conn_policy_own_name(conn, current_cred(), name_str)) {
+               ret = -EPERM;
+               goto exit;
+       }
+
+       /*
+        * Lookup the name entry. If it already exists, search for an owner
+        * entry as we might already own that name. If either does not exist,
+        * we will allocate a fresh one.
+        */
+       hash = kdbus_strhash(name_str);
+       name = kdbus_name_entry_find(reg, hash, name_str);
+       if (name) {
+               owner = kdbus_name_owner_find(name, conn);
+       } else {
+               name = kdbus_name_entry_new(reg, hash, name_str);
+               if (IS_ERR(name)) {
+                       ret = PTR_ERR(name);
+                       name = NULL;
+                       goto exit;
                }
+       }
 
-               if (!in_queue) {
-                       ret = kdbus_name_pending_new(e, conn, flags);
-                       if (ret >= 0)
-                               /* tell the caller that we queued it */
-                               rflags |= KDBUS_NAME_IN_QUEUE;
+       /* create name owner object if not already queued */
+       if (!owner) {
+               owner = kdbus_name_owner_new(conn, name, flags);
+               if (IS_ERR(owner)) {
+                       ret = PTR_ERR(owner);
+                       owner = NULL;
+                       goto exit;
                }
-       } else {
-               /* the name is busy, return a failure */
-               ret = -EEXIST;
        }
 
-       if (ret == 0 && return_flags)
-               *return_flags = rflags;
+       if (flags & KDBUS_NAME_ACTIVATOR)
+               ret = kdbus_name_become_activator(owner, return_flags);
+       else
+               ret = kdbus_name_update(owner, flags, return_flags);
+       if (ret < 0)
+               goto exit;
 
-exit_unlock:
+exit:
+       if (owner && !kdbus_name_owner_is_used(owner))
+               kdbus_name_owner_free(owner);
+       if (name && !kdbus_name_entry_is_used(name))
+               kdbus_name_entry_free(name);
        up_write(&reg->rwlock);
        kdbus_notify_flush(conn->ep->bus);
        return ret;
 }
 
-static void kdbus_name_release_unlocked(struct kdbus_name_registry *reg,
-                                       struct kdbus_name_entry *e)
+static void kdbus_name_release_unlocked(struct kdbus_name_owner *owner)
 {
-       struct kdbus_name_pending *p;
-
-       lockdep_assert_held(&reg->rwlock);
-
-       p = list_first_entry_or_null(&e->queue, struct kdbus_name_pending,
-                                    name_entry);
-
-       if (p) {
-               /* give it to first active waiter in the queue */
-               kdbus_name_entry_replace_owner(e, p->conn, p->flags);
-               kdbus_name_pending_free(p);
-       } else if (e->activator && e->activator != e->conn) {
-               /* hand it back to an active activator connection */
-               kdbus_conn_move_messages(e->activator, e->conn, e->name_id);
-               kdbus_name_entry_replace_owner(e, e->activator,
-                                              KDBUS_NAME_ACTIVATOR);
-       } else {
-               /* release the name */
-               kdbus_notify_name_change(e->conn->ep->bus,
-                                        KDBUS_ITEM_NAME_REMOVE,
-                                        e->conn->id, 0, e->flags, 0, e->name);
-               kdbus_name_entry_remove_owner(e);
-               kdbus_name_entry_free(e);
+       struct kdbus_name_owner *primary, *next;
+       struct kdbus_name_entry *name;
+
+       name = owner->name;
+       primary = kdbus_name_entry_first(name);
+
+       list_del_init(&owner->name_entry);
+       if (owner == name->activator)
+               name->activator = NULL;
+
+       if (!primary || owner == primary) {
+               next = kdbus_name_entry_first(name);
+               if (!next)
+                       next = name->activator;
+
+               if (next) {
+                       /* hand to next in queue */
+                       next->flags &= ~KDBUS_NAME_IN_QUEUE;
+                       next->flags |= KDBUS_NAME_PRIMARY;
+                       if (next == name->activator)
+                               kdbus_conn_move_messages(next->conn,
+                                                        owner->conn,
+                                                        name->name_id);
+
+                       kdbus_notify_name_change(owner->conn->ep->bus,
+                                       KDBUS_ITEM_NAME_CHANGE,
+                                       owner->conn->id, next->conn->id,
+                                       owner->flags, next->flags,
+                                       name->name);
+               } else {
+                       kdbus_notify_name_change(owner->conn->ep->bus,
+                                                KDBUS_ITEM_NAME_REMOVE,
+                                                owner->conn->id, 0,
+                                                owner->flags, 0,
+                                                name->name);
+               }
        }
+
+       kdbus_name_owner_free(owner);
+       if (!kdbus_name_entry_is_used(name))
+               kdbus_name_entry_free(name);
 }
 
 static int kdbus_name_release(struct kdbus_name_registry *reg,
                              struct kdbus_conn *conn,
-                             const char *name)
+                             const char *name_str)
 {
-       struct kdbus_name_pending *p;
-       struct kdbus_name_entry *e;
+       struct kdbus_name_owner *owner;
+       struct kdbus_name_entry *name;
        int ret = 0;
 
        down_write(&reg->rwlock);
-       e = kdbus_name_find(reg, kdbus_strhash(name), name);
-       if (!e) {
-               ret = -ESRCH;
-       } else if (e->conn == conn) {
-               kdbus_name_release_unlocked(reg, e);
+       name = kdbus_name_entry_find(reg, kdbus_strhash(name_str), name_str);
+       if (name) {
+               owner = kdbus_name_owner_find(name, conn);
+               if (owner)
+                       kdbus_name_release_unlocked(owner);
+               else
+                       ret = -EADDRINUSE;
        } else {
-               ret = -EADDRINUSE;
-               list_for_each_entry(p, &e->queue, name_entry) {
-                       if (p->conn == conn) {
-                               kdbus_name_pending_free(p);
-                               ret = 0;
-                               break;
-                       }
-               }
+               ret = -ESRCH;
        }
        up_write(&reg->rwlock);
 
@@ -452,38 +503,79 @@ static int kdbus_name_release(struct kdbus_name_registry *reg,
 void kdbus_name_release_all(struct kdbus_name_registry *reg,
                            struct kdbus_conn *conn)
 {
-       struct kdbus_name_pending *p;
-       struct kdbus_conn *activator = NULL;
-       struct kdbus_name_entry *e;
+       struct kdbus_name_owner *owner;
 
        down_write(&reg->rwlock);
 
-       if (kdbus_conn_is_activator(conn)) {
-               activator = conn->activator_of->activator;
-               conn->activator_of->activator = NULL;
-       }
-
-       while ((p = list_first_entry_or_null(&conn->names_queue_list,
-                                            struct kdbus_name_pending,
-                                            conn_entry)))
-               kdbus_name_pending_free(p);
-       while ((e = list_first_entry_or_null(&conn->names_list,
-                                            struct kdbus_name_entry,
-                                            conn_entry)))
-               kdbus_name_release_unlocked(reg, e);
+       while ((owner = list_first_entry_or_null(&conn->names_list,
+                                                struct kdbus_name_owner,
+                                                conn_entry)))
+               kdbus_name_release_unlocked(owner);
 
        up_write(&reg->rwlock);
 
-       kdbus_conn_unref(activator);
        kdbus_notify_flush(conn->ep->bus);
 }
 
 /**
+ * kdbus_name_is_valid() - check if a name is valid
+ * @p:                 The name to check
+ * @allow_wildcard:    Whether or not to allow a wildcard name
+ *
+ * A name is valid if all of the following criterias are met:
+ *
+ *  - The name has two or more elements separated by a period ('.') character.
+ *  - All elements must contain at least one character.
+ *  - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-"
+ *    and must not begin with a digit.
+ *  - The name must not exceed KDBUS_NAME_MAX_LEN.
+ *  - If @allow_wildcard is true, the name may end on '.*'
+ */
+bool kdbus_name_is_valid(const char *p, bool allow_wildcard)
+{
+       bool dot, found_dot = false;
+       const char *q;
+
+       for (dot = true, q = p; *q; q++) {
+               if (*q == '.') {
+                       if (dot)
+                               return false;
+
+                       found_dot = true;
+                       dot = true;
+               } else {
+                       bool good;
+
+                       good = isalpha(*q) || (!dot && isdigit(*q)) ||
+                               *q == '_' || *q == '-' ||
+                               (allow_wildcard && dot &&
+                                       *q == '*' && *(q + 1) == '\0');
+
+                       if (!good)
+                               return false;
+
+                       dot = false;
+               }
+       }
+
+       if (q - p > KDBUS_NAME_MAX_LEN)
+               return false;
+
+       if (dot)
+               return false;
+
+       if (!found_dot)
+               return false;
+
+       return true;
+}
+
+/**
  * kdbus_cmd_name_acquire() - handle KDBUS_CMD_NAME_ACQUIRE
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp)
 {
@@ -517,22 +609,9 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp)
                goto exit;
        }
 
-       /*
-        * Do atomic_inc_return here to reserve our slot, then decrement
-        * it before returning.
-        */
-       if (atomic_inc_return(&conn->name_count) > KDBUS_CONN_MAX_NAMES) {
-               ret = -E2BIG;
-               goto exit_dec;
-       }
-
        ret = kdbus_name_acquire(conn->ep->bus->name_registry, conn, item_name,
                                 cmd->flags, &cmd->return_flags);
-       if (ret < 0)
-               goto exit_dec;
 
-exit_dec:
-       atomic_dec(&conn->name_count);
 exit:
        return kdbus_args_clear(&args, ret);
 }
@@ -542,7 +621,7 @@ exit:
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp)
 {
@@ -575,7 +654,7 @@ static int kdbus_list_write(struct kdbus_conn *conn,
                            struct kdbus_conn *c,
                            struct kdbus_pool_slice *slice,
                            size_t *pos,
-                           struct kdbus_name_entry *e,
+                           struct kdbus_name_owner *o,
                            bool write)
 {
        struct kvec kvec[4];
@@ -596,22 +675,22 @@ static int kdbus_list_write(struct kdbus_conn *conn,
                u64 flags;
        } h = {};
 
-       if (e && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(),
-                                                     e->name))
+       if (o && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(),
+                                                     o->name->name))
                return 0;
 
        kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &info.size);
 
        /* append name */
-       if (e) {
-               size_t slen = strlen(e->name) + 1;
+       if (o) {
+               size_t slen = strlen(o->name->name) + 1;
 
                h.size = offsetof(struct kdbus_item, name.name) + slen;
                h.type = KDBUS_ITEM_OWNED_NAME;
-               h.flags = e->flags;
+               h.flags = o->flags;
 
                kdbus_kvec_set(&kvec[cnt++], &h, sizeof(h), &info.size);
-               kdbus_kvec_set(&kvec[cnt++], e->name, slen, &info.size);
+               kdbus_kvec_set(&kvec[cnt++], o->name->name, slen, &info.size);
                cnt += !!kdbus_kvec_pad(&kvec[cnt], &info.size);
        }
 
@@ -641,63 +720,52 @@ static int kdbus_list_all(struct kdbus_conn *conn, u64 flags,
                if (kdbus_conn_is_monitor(c))
                        continue;
 
-               /* skip activators */
-               if (!(flags & KDBUS_LIST_ACTIVATORS) &&
-                   kdbus_conn_is_activator(c))
-                       continue;
-
                /* all names the connection owns */
-               if (flags & (KDBUS_LIST_NAMES | KDBUS_LIST_ACTIVATORS)) {
-                       struct kdbus_name_entry *e;
+               if (flags & (KDBUS_LIST_NAMES |
+                            KDBUS_LIST_ACTIVATORS |
+                            KDBUS_LIST_QUEUED)) {
+                       struct kdbus_name_owner *o;
 
-                       list_for_each_entry(e, &c->names_list, conn_entry) {
-                               struct kdbus_conn *a = e->activator;
+                       list_for_each_entry(o, &c->names_list, conn_entry) {
+                               if (o->flags & KDBUS_NAME_ACTIVATOR) {
+                                       if (!(flags & KDBUS_LIST_ACTIVATORS))
+                                               continue;
 
-                               if ((flags & KDBUS_LIST_ACTIVATORS) &&
-                                   a && a != c) {
-                                       ret = kdbus_list_write(conn, a, slice,
-                                                              &p, e, write);
+                                       ret = kdbus_list_write(conn, c, slice,
+                                                              &p, o, write);
                                        if (ret < 0) {
                                                mutex_unlock(&c->lock);
                                                return ret;
                                        }
 
                                        added = true;
-                               }
+                               } else if (o->flags & KDBUS_NAME_IN_QUEUE) {
+                                       if (!(flags & KDBUS_LIST_QUEUED))
+                                               continue;
 
-                               if (flags & KDBUS_LIST_NAMES ||
-                                   kdbus_conn_is_activator(c)) {
                                        ret = kdbus_list_write(conn, c, slice,
-                                                              &p, e, write);
+                                                              &p, o, write);
                                        if (ret < 0) {
                                                mutex_unlock(&c->lock);
                                                return ret;
                                        }
 
                                        added = true;
-                               }
-                       }
-               }
+                               } else if (flags & KDBUS_LIST_NAMES) {
+                                       ret = kdbus_list_write(conn, c, slice,
+                                                              &p, o, write);
+                                       if (ret < 0) {
+                                               mutex_unlock(&c->lock);
+                                               return ret;
+                                       }
 
-               /* queue of names the connection is currently waiting for */
-               if (flags & KDBUS_LIST_QUEUED) {
-                       struct kdbus_name_pending *q;
-
-                       list_for_each_entry(q, &c->names_queue_list,
-                                           conn_entry) {
-                               ret = kdbus_list_write(conn, c, slice, &p,
-                                                      q->name, write);
-                               if (ret < 0) {
-                                       mutex_unlock(&c->lock);
-                                       return ret;
+                                       added = true;
                                }
-
-                               added = true;
                        }
                }
 
                /* nothing added so far, just add the unique ID */
-               if (!added && flags & KDBUS_LIST_UNIQUE) {
+               if (!added && (flags & KDBUS_LIST_UNIQUE)) {
                        ret = kdbus_list_write(conn, c, slice, &p, NULL, write);
                        if (ret < 0)
                                return ret;
@@ -713,7 +781,7 @@ static int kdbus_list_all(struct kdbus_conn *conn, u64 flags,
  * @conn:              connection to operate on
  * @argp:              command payload
  *
- * Return: 0 on success, negative error code on failure.
+ * Return: >=0 on success, negative error code on failure.
  */
 int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp)
 {
index 3dd2589..edac59d 100644 (file)
 #include <linux/hashtable.h>
 #include <linux/rwsem.h>
 
+struct kdbus_name_entry;
+struct kdbus_name_owner;
+struct kdbus_name_registry;
+
 /**
  * struct kdbus_name_registry - names registered for a bus
  * @entries_hash:      Map of entries
@@ -32,27 +36,37 @@ struct kdbus_name_registry {
 
 /**
  * struct kdbus_name_entry - well-know name entry
- * @name_id:           Sequence number of name entry to be able to uniquely
+ * @name_id:           sequence number of name entry to be able to uniquely
  *                     identify a name over its registration lifetime
- * @flags:             KDBUS_NAME_* flags
- * @conn:              Connection owning the name
- * @activator:         Connection of the activator queuing incoming messages
- * @queue:             List of queued connections
- * @conn_entry:                Entry in connection
- * @hentry:            Entry in registry map
- * @name:              The well-known name
+ * @activator:         activator of this name, or NULL
+ * @queue:             list of queued owners
+ * @hentry:            entry in registry map
+ * @name:              well-known name
  */
 struct kdbus_name_entry {
        u64 name_id;
-       u64 flags;
-       struct kdbus_conn *conn;
-       struct kdbus_conn *activator;
+       struct kdbus_name_owner *activator;
        struct list_head queue;
-       struct list_head conn_entry;
        struct hlist_node hentry;
        char name[];
 };
 
+/**
+ * struct kdbus_name_owner - owner of a well-known name
+ * @flags:             KDBUS_NAME_* flags of this owner
+ * @conn:              connection owning the name
+ * @name:              name that is owned
+ * @conn_entry:                link into @conn
+ * @name_entry:                link into @name
+ */
+struct kdbus_name_owner {
+       u64 flags;
+       struct kdbus_conn *conn;
+       struct kdbus_name_entry *name;
+       struct list_head conn_entry;
+       struct list_head name_entry;
+};
+
 bool kdbus_name_is_valid(const char *p, bool allow_wildcard);
 
 struct kdbus_name_registry *kdbus_name_registry_new(void);
@@ -71,4 +85,21 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp);
 int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp);
 int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp);
 
+/**
+ * kdbus_name_get_owner() - get current owner of a name
+ * @name:      name to get current owner of
+ *
+ * This returns a pointer to the current owner of a name (or its activator if
+ * there is no owner). The caller must make sure @name is valid and does not
+ * vanish.
+ *
+ * Return: Pointer to current owner or NULL if there is none.
+ */
+static inline struct kdbus_name_owner *
+kdbus_name_get_owner(struct kdbus_name_entry *name)
+{
+       return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+                                       name_entry) ? : name->activator;
+}
+
 #endif
index 520df00..89f58bc 100644 (file)
  * new active references can be acquired.
  * Once all active references are dropped, the node is considered 'drained'. Now
  * kdbus_node_deactivate() is called on each child of the node before we
- * continue deactvating our node. That is, once all children are entirely
+ * continue deactivating our node. That is, once all children are entirely
  * deactivated, we call ->release_cb() of our node. ->release_cb() can release
  * any resources on that node which are bound to the "active" state of a node.
  * When done, we unlink the node from its parent rb-tree, mark it as
  *                   accessed by other callers to properly initialize
  *                   filesystem nodes.
  *
- *     * node->id: This is an unsigned 32bit integer allocated by an IDR. It is
+ *     * node->id: This is an unsigned 32bit integer allocated by an IDA. It is
  *                 always kept as small as possible during allocation and is
  *                 globally unique across all nodes allocated by this module. 0
  *                 is reserved as "not assigned" and is the default.
 #define KDBUS_NODE_NEW                 (KDBUS_NODE_BIAS - 4)
 
 /* global unique ID mapping for kdbus nodes */
-static DEFINE_IDR(kdbus_node_idr);
-static DECLARE_RWSEM(kdbus_node_idr_lock);
+DEFINE_IDA(kdbus_node_ida);
 
 /**
  * kdbus_node_name_hash() - hash a name
@@ -337,15 +336,11 @@ int kdbus_node_link(struct kdbus_node *node, struct kdbus_node *parent,
                node->hash = kdbus_node_name_hash(name);
        }
 
-       down_write(&kdbus_node_idr_lock);
-       ret = idr_alloc(&kdbus_node_idr, node, 1, 0, GFP_KERNEL);
-       if (ret >= 0)
-               node->id = ret;
-       up_write(&kdbus_node_idr_lock);
-
+       ret = ida_simple_get(&kdbus_node_ida, 1, 0, GFP_KERNEL);
        if (ret < 0)
                return ret;
 
+       node->id = ret;
        ret = 0;
 
        if (parent) {
@@ -440,16 +435,8 @@ struct kdbus_node *kdbus_node_unref(struct kdbus_node *node)
 
                if (node->free_cb)
                        node->free_cb(node);
-
-               down_write(&kdbus_node_idr_lock);
                if (safe.id > 0)
-                       idr_remove(&kdbus_node_idr, safe.id);
-               /* drop caches after last node to not leak memory on unload */
-               if (idr_is_empty(&kdbus_node_idr)) {
-                       idr_destroy(&kdbus_node_idr);
-                       idr_init(&kdbus_node_idr);
-               }
-               up_write(&kdbus_node_idr_lock);
+                       ida_simple_remove(&kdbus_node_ida, safe.id);
 
                kfree(safe.name);
 
@@ -650,7 +637,7 @@ void kdbus_node_deactivate(struct kdbus_node *node)
                        kdbus_fs_flush(pos);
 
                        /*
-                        * If the node was activated and somone subtracted BIAS
+                        * If the node was activated and someone subtracted BIAS
                         * from it to deactivate it, we, and only us, are
                         * responsible to release the extra ref-count that was
                         * taken once in kdbus_node_activate().
index be125ce..970e02b 100644 (file)
@@ -58,6 +58,8 @@ struct kdbus_node {
 
 #define kdbus_node_from_rb(_node) rb_entry((_node), struct kdbus_node, rb)
 
+extern struct ida kdbus_node_ida;
+
 void kdbus_node_init(struct kdbus_node *node, unsigned int type);
 
 int kdbus_node_link(struct kdbus_node *node, struct kdbus_node *parent,
index e4a4542..375758c 100644 (file)
 #include "message.h"
 #include "notify.h"
 
-static inline void kdbus_notify_add_tail(struct kdbus_kmsg *kmsg,
+static inline void kdbus_notify_add_tail(struct kdbus_staging *staging,
                                         struct kdbus_bus *bus)
 {
        spin_lock(&bus->notify_lock);
-       list_add_tail(&kmsg->notify_entry, &bus->notify_list);
+       list_add_tail(&staging->notify_entry, &bus->notify_list);
        spin_unlock(&bus->notify_lock);
 }
 
 static int kdbus_notify_reply(struct kdbus_bus *bus, u64 id,
                              u64 cookie, u64 msg_type)
 {
-       struct kdbus_kmsg *kmsg = NULL;
+       struct kdbus_staging *s;
 
-       WARN_ON(id == 0);
-
-       kmsg = kdbus_kmsg_new(bus, 0);
-       if (IS_ERR(kmsg))
-               return PTR_ERR(kmsg);
-
-       /*
-        * a kernel-generated notification can only contain one
-        * struct kdbus_item, so make a shortcut here for
-        * faster lookup in the match db.
-        */
-       kmsg->notify_type = msg_type;
-       kmsg->msg.flags = KDBUS_MSG_SIGNAL;
-       kmsg->msg.dst_id = id;
-       kmsg->msg.src_id = KDBUS_SRC_ID_KERNEL;
-       kmsg->msg.payload_type = KDBUS_PAYLOAD_KERNEL;
-       kmsg->msg.cookie_reply = cookie;
-       kmsg->msg.items[0].type = msg_type;
-
-       kdbus_notify_add_tail(kmsg, bus);
+       s = kdbus_staging_new_kernel(bus, id, cookie, 0, msg_type);
+       if (IS_ERR(s))
+               return PTR_ERR(s);
 
+       kdbus_notify_add_tail(s, bus);
        return 0;
 }
 
@@ -115,32 +99,24 @@ int kdbus_notify_name_change(struct kdbus_bus *bus, u64 type,
                             u64 old_flags, u64 new_flags,
                             const char *name)
 {
-       struct kdbus_kmsg *kmsg = NULL;
        size_t name_len, extra_size;
+       struct kdbus_staging *s;
 
        name_len = strlen(name) + 1;
        extra_size = sizeof(struct kdbus_notify_name_change) + name_len;
-       kmsg = kdbus_kmsg_new(bus, extra_size);
-       if (IS_ERR(kmsg))
-               return PTR_ERR(kmsg);
-
-       kmsg->msg.flags = KDBUS_MSG_SIGNAL;
-       kmsg->msg.dst_id = KDBUS_DST_ID_BROADCAST;
-       kmsg->msg.src_id = KDBUS_SRC_ID_KERNEL;
-       kmsg->msg.payload_type = KDBUS_PAYLOAD_KERNEL;
-       kmsg->notify_type = type;
-       kmsg->notify_old_id = old_id;
-       kmsg->notify_new_id = new_id;
-       kmsg->msg.items[0].type = type;
-       kmsg->msg.items[0].name_change.old_id.id = old_id;
-       kmsg->msg.items[0].name_change.old_id.flags = old_flags;
-       kmsg->msg.items[0].name_change.new_id.id = new_id;
-       kmsg->msg.items[0].name_change.new_id.flags = new_flags;
-       memcpy(kmsg->msg.items[0].name_change.name, name, name_len);
-       kmsg->notify_name = kmsg->msg.items[0].name_change.name;
-
-       kdbus_notify_add_tail(kmsg, bus);
 
+       s = kdbus_staging_new_kernel(bus, KDBUS_DST_ID_BROADCAST, 0,
+                                    extra_size, type);
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+
+       s->notify->name_change.old_id.id = old_id;
+       s->notify->name_change.old_id.flags = old_flags;
+       s->notify->name_change.new_id.id = new_id;
+       s->notify->name_change.new_id.flags = new_flags;
+       memcpy(s->notify->name_change.name, name, name_len);
+
+       kdbus_notify_add_tail(s, bus);
        return 0;
 }
 
@@ -156,37 +132,19 @@ int kdbus_notify_name_change(struct kdbus_bus *bus, u64 type,
  */
 int kdbus_notify_id_change(struct kdbus_bus *bus, u64 type, u64 id, u64 flags)
 {
-       struct kdbus_kmsg *kmsg = NULL;
-
-       kmsg = kdbus_kmsg_new(bus, sizeof(struct kdbus_notify_id_change));
-       if (IS_ERR(kmsg))
-               return PTR_ERR(kmsg);
+       struct kdbus_staging *s;
+       size_t extra_size;
 
-       kmsg->msg.flags = KDBUS_MSG_SIGNAL;
-       kmsg->msg.dst_id = KDBUS_DST_ID_BROADCAST;
-       kmsg->msg.src_id = KDBUS_SRC_ID_KERNEL;
-       kmsg->msg.payload_type = KDBUS_PAYLOAD_KERNEL;
-       kmsg->notify_type = type;
+       extra_size = sizeof(struct kdbus_notify_id_change);
+       s = kdbus_staging_new_kernel(bus, KDBUS_DST_ID_BROADCAST, 0,
+                                    extra_size, type);
+       if (IS_ERR(s))
+               return PTR_ERR(s);
 
-       switch (type) {
-       case KDBUS_ITEM_ID_ADD:
-               kmsg->notify_new_id = id;
-               break;
-
-       case KDBUS_ITEM_ID_REMOVE:
-               kmsg->notify_old_id = id;
-               break;
-
-       default:
-               BUG();
-       }
-
-       kmsg->msg.items[0].type = type;
-       kmsg->msg.items[0].id_change.id = id;
-       kmsg->msg.items[0].id_change.flags = flags;
-
-       kdbus_notify_add_tail(kmsg, bus);
+       s->notify->id_change.id = id;
+       s->notify->id_change.flags = flags;
 
+       kdbus_notify_add_tail(s, bus);
        return 0;
 }
 
@@ -199,7 +157,7 @@ int kdbus_notify_id_change(struct kdbus_bus *bus, u64 type, u64 id, u64 flags)
 void kdbus_notify_flush(struct kdbus_bus *bus)
 {
        LIST_HEAD(notify_list);
-       struct kdbus_kmsg *kmsg, *tmp;
+       struct kdbus_staging *s, *tmp;
 
        mutex_lock(&bus->notify_flush_lock);
        down_read(&bus->name_registry->rwlock);
@@ -208,25 +166,23 @@ void kdbus_notify_flush(struct kdbus_bus *bus)
        list_splice_init(&bus->notify_list, &notify_list);
        spin_unlock(&bus->notify_lock);
 
-       list_for_each_entry_safe(kmsg, tmp, &notify_list, notify_entry) {
-               kdbus_meta_conn_collect(kmsg->conn_meta, kmsg, NULL,
-                                       KDBUS_ATTACH_TIMESTAMP);
-
-               if (kmsg->msg.dst_id != KDBUS_DST_ID_BROADCAST) {
+       list_for_each_entry_safe(s, tmp, &notify_list, notify_entry) {
+               if (s->msg->dst_id != KDBUS_DST_ID_BROADCAST) {
                        struct kdbus_conn *conn;
 
-                       conn = kdbus_bus_find_conn_by_id(bus, kmsg->msg.dst_id);
+                       conn = kdbus_bus_find_conn_by_id(bus, s->msg->dst_id);
                        if (conn) {
-                               kdbus_bus_eavesdrop(bus, NULL, kmsg);
-                               kdbus_conn_entry_insert(NULL, conn, kmsg, NULL);
+                               kdbus_bus_eavesdrop(bus, NULL, s);
+                               kdbus_conn_entry_insert(NULL, conn, s, NULL,
+                                                       NULL);
                                kdbus_conn_unref(conn);
                        }
                } else {
-                       kdbus_bus_broadcast(bus, NULL, kmsg);
+                       kdbus_bus_broadcast(bus, NULL, s);
                }
 
-               list_del(&kmsg->notify_entry);
-               kdbus_kmsg_free(kmsg);
+               list_del(&s->notify_entry);
+               kdbus_staging_free(s);
        }
 
        up_read(&bus->name_registry->rwlock);
@@ -239,10 +195,10 @@ void kdbus_notify_flush(struct kdbus_bus *bus)
  */
 void kdbus_notify_free(struct kdbus_bus *bus)
 {
-       struct kdbus_kmsg *kmsg, *tmp;
+       struct kdbus_staging *s, *tmp;
 
-       list_for_each_entry_safe(kmsg, tmp, &bus->notify_list, notify_entry) {
-               list_del(&kmsg->notify_entry);
-               kdbus_kmsg_free(kmsg);
+       list_for_each_entry_safe(s, tmp, &bus->notify_list, notify_entry) {
+               list_del(&s->notify_entry);
+               kdbus_staging_free(s);
        }
 }
index dd7fffa..f2618e1 100644 (file)
@@ -344,7 +344,7 @@ err:
  * In order to allow atomic replacement of rules, the function first removes
  * all entries that have been created for the given owner previously.
  *
- * Callers to this function must make sur that the owner is a custom
+ * Callers to this function must make sure that the owner is a custom
  * endpoint, or if the endpoint is a default endpoint, then it must be
  * either a policy holder or an activator.
  *
index 45dcdea..63ccd55 100644 (file)
@@ -44,7 +44,7 @@
  * The receiver's buffer, managed as a pool of allocated and free
  * slices containing the queued messages.
  *
- * Messages sent with KDBUS_CMD_SEND are copied direcly by the
+ * Messages sent with KDBUS_CMD_SEND are copied directly by the
  * sending process into the receiver's pool.
  *
  * Messages received with KDBUS_CMD_RECV just return the offset
index a449464..f9c44d7 100644 (file)
@@ -171,242 +171,43 @@ static void kdbus_queue_entry_unlink(struct kdbus_queue_entry *entry)
 
 /**
  * kdbus_queue_entry_new() - allocate a queue entry
- * @conn_dst:  destination connection
- * @kmsg:      kmsg object the queue entry should track
- * @user:      user to account message on (or NULL for kernel messages)
+ * @src:       source connection, or NULL
+ * @dst:       destination connection
+ * @s:         staging object carrying the message
  *
- * Allocates a queue entry based on a given kmsg and allocate space for
+ * Allocates a queue entry based on a given msg and allocate space for
  * the message payload and the requested metadata in the connection's pool.
  * The entry is not actually added to the queue's lists at this point.
  *
  * Return: the allocated entry on success, or an ERR_PTR on failures.
  */
-struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *conn_dst,
-                                               const struct kdbus_kmsg *kmsg,
-                                               struct kdbus_user *user)
+struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *src,
+                                               struct kdbus_conn *dst,
+                                               struct kdbus_staging *s)
 {
-       struct kdbus_msg_resources *res = kmsg->res;
-       const struct kdbus_msg *msg = &kmsg->msg;
        struct kdbus_queue_entry *entry;
-       size_t memfd_cnt = 0;
-       struct kvec kvec[2];
-       size_t meta_size;
-       size_t msg_size;
-       u64 payload_off;
-       u64 size = 0;
-       int ret = 0;
+       int ret;
 
        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&entry->entry);
-       entry->priority = msg->priority;
-       entry->dst_name_id = kmsg->dst_name_id;
-       entry->msg_res = kdbus_msg_resources_ref(res);
-       entry->proc_meta = kdbus_meta_proc_ref(kmsg->proc_meta);
-       entry->conn_meta = kdbus_meta_conn_ref(kmsg->conn_meta);
-       entry->conn = kdbus_conn_ref(conn_dst);
-
-       if (kmsg->msg.src_id == KDBUS_SRC_ID_KERNEL)
-               msg_size = msg->size;
-       else
-               msg_size = offsetof(struct kdbus_msg, items);
-
-       /* sum up the size of the needed slice */
-       size = msg_size;
-
-       if (res) {
-               size += res->vec_count *
-                       KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
-
-               if (res->memfd_count) {
-                       entry->memfd_offset =
-                               kcalloc(res->memfd_count, sizeof(size_t),
-                                       GFP_KERNEL);
-                       if (!entry->memfd_offset) {
-                               ret = -ENOMEM;
-                               goto exit_free_entry;
-                       }
-
-                       size += res->memfd_count *
-                               KDBUS_ITEM_SIZE(sizeof(struct kdbus_memfd));
-               }
-
-               if (res->fds_count)
-                       size += KDBUS_ITEM_SIZE(sizeof(int) * res->fds_count);
-
-               if (res->dst_name)
-                       size += KDBUS_ITEM_SIZE(strlen(res->dst_name) + 1);
-       }
-
-       /*
-        * Remember the offset of the metadata part, so we can override
-        * this part later during kdbus_queue_entry_install().
-        */
-       entry->meta_offset = size;
-
-       if (entry->proc_meta || entry->conn_meta) {
-               entry->attach_flags =
-                       atomic64_read(&conn_dst->attach_flags_recv);
-
-               ret = kdbus_meta_export_prepare(entry->proc_meta,
-                                               entry->conn_meta,
-                                               &entry->attach_flags,
-                                               &meta_size);
-               if (ret < 0)
-                       goto exit_free_entry;
+       entry->priority = s->msg->priority;
+       entry->conn = kdbus_conn_ref(dst);
+       entry->gaps = kdbus_gaps_ref(s->gaps);
 
-               size += meta_size;
-       }
-
-       payload_off = size;
-       size += kmsg->pool_size;
-       size = KDBUS_ALIGN8(size);
-
-       ret = kdbus_conn_quota_inc(conn_dst, user, size,
-                                  res ? res->fds_count : 0);
-       if (ret < 0)
-               goto exit_free_entry;
-
-       entry->slice = kdbus_pool_slice_alloc(conn_dst->pool, size, true);
+       entry->slice = kdbus_staging_emit(s, src, dst);
        if (IS_ERR(entry->slice)) {
                ret = PTR_ERR(entry->slice);
                entry->slice = NULL;
-               kdbus_conn_quota_dec(conn_dst, user, size,
-                                    res ? res->fds_count : 0);
-               goto exit_free_entry;
-       }
-
-       /* we accounted for exactly 'size' bytes, make sure it didn't grow */
-       WARN_ON(kdbus_pool_slice_size(entry->slice) != size);
-       entry->user = kdbus_user_ref(user);
-
-       /* copy message header */
-       kvec[0].iov_base = (char *)msg;
-       kvec[0].iov_len = msg_size;
-
-       ret = kdbus_pool_slice_copy_kvec(entry->slice, 0, kvec, 1, msg_size);
-       if (ret < 0)
-               goto exit_free_entry;
-
-       /* 'size' will now track the write position */
-       size = msg_size;
-
-       /* create message payload items */
-       if (res) {
-               size_t dst_name_len = 0;
-               unsigned int i;
-               size_t sz = 0;
-
-               if (res->dst_name) {
-                       dst_name_len = strlen(res->dst_name) + 1;
-                       sz += KDBUS_ITEM_SIZE(dst_name_len);
-               }
-
-               for (i = 0; i < res->data_count; ++i) {
-                       struct kdbus_vec v;
-                       struct kdbus_memfd m;
-
-                       switch (res->data[i].type) {
-                       case KDBUS_MSG_DATA_VEC:
-                               sz += KDBUS_ITEM_SIZE(sizeof(v));
-                               break;
-
-                       case KDBUS_MSG_DATA_MEMFD:
-                               sz += KDBUS_ITEM_SIZE(sizeof(m));
-                               break;
-                       }
-               }
-
-               if (sz) {
-                       struct kdbus_item *items, *item;
-
-                       items = kmalloc(sz, GFP_KERNEL);
-                       if (!items) {
-                               ret = -ENOMEM;
-                               goto exit_free_entry;
-                       }
-
-                       item = items;
-
-                       if (res->dst_name)
-                               item = kdbus_item_set(item, KDBUS_ITEM_DST_NAME,
-                                                     res->dst_name,
-                                                     dst_name_len);
-
-                       for (i = 0; i < res->data_count; ++i) {
-                               struct kdbus_msg_data *d = res->data + i;
-                               struct kdbus_memfd m = {};
-                               struct kdbus_vec v = {};
-
-                               switch (d->type) {
-                               case KDBUS_MSG_DATA_VEC:
-                                       v.size = d->size;
-                                       v.offset = d->vec.off;
-                                       if (v.offset != ~0ULL)
-                                               v.offset += payload_off;
-
-                                       item = kdbus_item_set(item,
-                                                       KDBUS_ITEM_PAYLOAD_OFF,
-                                                       &v, sizeof(v));
-                                       break;
-
-                               case KDBUS_MSG_DATA_MEMFD:
-                                       /*
-                                        * Remember the location of memfds, so
-                                        * we can override the content from
-                                        * kdbus_queue_entry_install().
-                                        */
-                                       entry->memfd_offset[memfd_cnt++] =
-                                               msg_size +
-                                               (char *)item - (char *)items +
-                                               offsetof(struct kdbus_item,
-                                                        memfd);
-
-                                       item = kdbus_item_set(item,
-                                                      KDBUS_ITEM_PAYLOAD_MEMFD,
-                                                      &m, sizeof(m));
-                                       break;
-                               }
-                       }
-
-                       kvec[0].iov_base = items;
-                       kvec[0].iov_len = sz;
-
-                       ret = kdbus_pool_slice_copy_kvec(entry->slice, size,
-                                                        kvec, 1, sz);
-                       kfree(items);
-
-                       if (ret < 0)
-                               goto exit_free_entry;
-
-                       size += sz;
-               }
-
-               /*
-                * Remember the location of the FD part, so we can override the
-                * content in kdbus_queue_entry_install().
-                */
-               if (res->fds_count) {
-                       entry->fds_offset = size;
-                       size += KDBUS_ITEM_SIZE(sizeof(int) * res->fds_count);
-               }
-       }
-
-       /* finally, copy over the actual message payload */
-       if (kmsg->iov_count) {
-               ret = kdbus_pool_slice_copy_iovec(entry->slice, payload_off,
-                                                 kmsg->iov,
-                                                 kmsg->iov_count,
-                                                 kmsg->pool_size);
-               if (ret < 0)
-                       goto exit_free_entry;
+               goto error;
        }
 
+       entry->user = src ? kdbus_user_ref(src->user) : NULL;
        return entry;
 
-exit_free_entry:
+error:
        kdbus_queue_entry_free(entry);
        return ERR_PTR(ret);
 }
@@ -431,17 +232,13 @@ void kdbus_queue_entry_free(struct kdbus_queue_entry *entry)
        if (entry->slice) {
                kdbus_conn_quota_dec(entry->conn, entry->user,
                                     kdbus_pool_slice_size(entry->slice),
-                                    entry->msg_res ?
-                                               entry->msg_res->fds_count : 0);
+                                    entry->gaps ? entry->gaps->n_fds : 0);
                kdbus_pool_slice_release(entry->slice);
-               kdbus_user_unref(entry->user);
        }
 
-       kdbus_msg_resources_unref(entry->msg_res);
-       kdbus_meta_conn_unref(entry->conn_meta);
-       kdbus_meta_proc_unref(entry->proc_meta);
+       kdbus_user_unref(entry->user);
+       kdbus_gaps_unref(entry->gaps);
        kdbus_conn_unref(entry->conn);
-       kfree(entry->memfd_offset);
        kfree(entry);
 }
 
@@ -452,134 +249,22 @@ void kdbus_queue_entry_free(struct kdbus_queue_entry *entry)
  * @return_flags:      Pointer to store the return flags for userspace
  * @install_fds:       Whether or not to install associated file descriptors
  *
- * This function will create a slice to transport the message header, the
- * metadata items and other items for information stored in @entry, and
- * store it as entry->slice.
- *
- * If @install_fds is %true, file descriptors will as well be installed.
- * This function must always be called from the task context of the receiver.
- *
  * Return: 0 on success.
  */
 int kdbus_queue_entry_install(struct kdbus_queue_entry *entry,
                              u64 *return_flags, bool install_fds)
 {
-       u64 msg_size = entry->meta_offset;
-       struct kdbus_conn *conn_dst = entry->conn;
-       struct kdbus_msg_resources *res;
        bool incomplete_fds = false;
-       struct kvec kvec[2];
-       size_t memfds = 0;
-       int i, ret;
-
-       lockdep_assert_held(&conn_dst->lock);
-
-       if (entry->proc_meta || entry->conn_meta) {
-               size_t meta_size;
-
-               ret = kdbus_meta_export(entry->proc_meta,
-                                       entry->conn_meta,
-                                       entry->attach_flags,
-                                       entry->slice,
-                                       entry->meta_offset,
-                                       &meta_size);
-               if (ret < 0)
-                       return ret;
-
-               msg_size += meta_size;
-       }
+       int ret;
 
-       /* Update message size at offset 0 */
-       kvec[0].iov_base = &msg_size;
-       kvec[0].iov_len = sizeof(msg_size);
+       lockdep_assert_held(&entry->conn->lock);
 
-       ret = kdbus_pool_slice_copy_kvec(entry->slice, 0, kvec, 1,
-                                        sizeof(msg_size));
+       ret = kdbus_gaps_install(entry->gaps, entry->slice, &incomplete_fds);
        if (ret < 0)
                return ret;
 
-       res = entry->msg_res;
-
-       if (!res)
-               return 0;
-
-       if (res->fds_count) {
-               struct kdbus_item_header hdr;
-               size_t off;
-               int *fds;
-
-               fds = kmalloc_array(res->fds_count, sizeof(int), GFP_KERNEL);
-               if (!fds)
-                       return -ENOMEM;
-
-               for (i = 0; i < res->fds_count; i++) {
-                       if (install_fds) {
-                               fds[i] = get_unused_fd_flags(O_CLOEXEC);
-                               if (fds[i] >= 0)
-                                       fd_install(fds[i],
-                                                  get_file(res->fds[i]));
-                               else
-                                       incomplete_fds = true;
-                       } else {
-                               fds[i] = -1;
-                       }
-               }
-
-               off = entry->fds_offset;
-
-               hdr.type = KDBUS_ITEM_FDS;
-               hdr.size = KDBUS_ITEM_HEADER_SIZE +
-                          sizeof(int) * res->fds_count;
-
-               kvec[0].iov_base = &hdr;
-               kvec[0].iov_len = sizeof(hdr);
-
-               kvec[1].iov_base = fds;
-               kvec[1].iov_len = sizeof(int) * res->fds_count;
-
-               ret = kdbus_pool_slice_copy_kvec(entry->slice, off,
-                                                kvec, 2, hdr.size);
-               kfree(fds);
-
-               if (ret < 0)
-                       return ret;
-       }
-
-       for (i = 0; i < res->data_count; ++i) {
-               struct kdbus_msg_data *d = res->data + i;
-               struct kdbus_memfd m;
-
-               if (d->type != KDBUS_MSG_DATA_MEMFD)
-                       continue;
-
-               m.start = d->memfd.start;
-               m.size = d->size;
-               m.fd = -1;
-
-               if (install_fds) {
-                       m.fd = get_unused_fd_flags(O_CLOEXEC);
-                       if (m.fd < 0) {
-                               m.fd = -1;
-                               incomplete_fds = true;
-                       } else {
-                               fd_install(m.fd,
-                                          get_file(d->memfd.file));
-                       }
-               }
-
-               kvec[0].iov_base = &m;
-               kvec[0].iov_len = sizeof(m);
-
-               ret = kdbus_pool_slice_copy_kvec(entry->slice,
-                                                entry->memfd_offset[memfds++],
-                                                kvec, 1, sizeof(m));
-               if (ret < 0)
-                       return ret;
-       }
-
        if (incomplete_fds)
                *return_flags |= KDBUS_RECV_RETURN_INCOMPLETE_FDS;
-
        return 0;
 }
 
@@ -637,13 +322,13 @@ int kdbus_queue_entry_move(struct kdbus_queue_entry *e,
        lockdep_assert_held(&src->lock);
        lockdep_assert_held(&dst->lock);
 
-       if (WARN_ON(IS_ERR(e->user)) || WARN_ON(list_empty(&e->entry)))
+       if (WARN_ON(list_empty(&e->entry)))
                return -EINVAL;
        if (src == dst)
                return 0;
 
        size = kdbus_pool_slice_size(e->slice);
-       fds = e->msg_res ? e->msg_res->fds_count : 0;
+       fds = e->gaps ? e->gaps->n_fds : 0;
 
        ret = kdbus_conn_quota_inc(dst, e->user, size, fds);
        if (ret < 0)
index 7f2db96..bf686d1 100644 (file)
 #ifndef __KDBUS_QUEUE_H
 #define __KDBUS_QUEUE_H
 
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct kdbus_conn;
+struct kdbus_pool_slice;
+struct kdbus_reply;
+struct kdbus_staging;
 struct kdbus_user;
 
 /**
@@ -35,52 +42,37 @@ struct kdbus_queue {
  * @entry:             Entry in the connection's list
  * @prio_node:         Entry in the priority queue tree
  * @prio_entry:                Queue tree node entry in the list of one priority
- * @slice:             Slice in the receiver's pool for the message
- * @attach_flags:      Attach flags used during slice allocation
- * @meta_offset:       Offset of first metadata item in slice
- * @fds_offset:                Offset of FD item in slice
- * @memfd_offset:      Array of slice-offsets for all memfd items
  * @priority:          Message priority
  * @dst_name_id:       The sequence number of the name this message is
  *                     addressed to, 0 for messages sent to an ID
- * @msg_res:           Message resources
- * @proc_meta:         Process metadata, captured at message arrival
- * @conn_meta:         Connection metadata, captured at message arrival
- * @reply:             The reply block if a reply to this message is expected
+ * @conn:              Connection this entry is queued on
+ * @gaps:              Gaps object to fill message gaps at RECV time
  * @user:              User used for accounting
+ * @slice:             Slice in the receiver's pool for the message
+ * @reply:             The reply block if a reply to this message is expected
  */
 struct kdbus_queue_entry {
        struct list_head entry;
        struct rb_node prio_node;
        struct list_head prio_entry;
 
-       struct kdbus_pool_slice *slice;
-
-       u64 attach_flags;
-       size_t meta_offset;
-       size_t fds_offset;
-       size_t *memfd_offset;
-
        s64 priority;
        u64 dst_name_id;
 
-       struct kdbus_msg_resources *msg_res;
-       struct kdbus_meta_proc *proc_meta;
-       struct kdbus_meta_conn *conn_meta;
-       struct kdbus_reply *reply;
        struct kdbus_conn *conn;
+       struct kdbus_gaps *gaps;
        struct kdbus_user *user;
+       struct kdbus_pool_slice *slice;
+       struct kdbus_reply *reply;
 };
 
-struct kdbus_kmsg;
-
 void kdbus_queue_init(struct kdbus_queue *queue);
 struct kdbus_queue_entry *kdbus_queue_peek(struct kdbus_queue *queue,
                                           s64 priority, bool use_priority);
 
-struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *conn_dst,
-                                               const struct kdbus_kmsg *kmsg,
-                                               struct kdbus_user *user);
+struct kdbus_queue_entry *kdbus_queue_entry_new(struct kdbus_conn *src,
+                                               struct kdbus_conn *dst,
+                                               struct kdbus_staging *s);
 void kdbus_queue_entry_free(struct kdbus_queue_entry *entry);
 int kdbus_queue_entry_install(struct kdbus_queue_entry *entry,
                              u64 *return_flags, bool install_fds);
index 008dca8..e6791d8 100644 (file)
@@ -37,7 +37,7 @@ struct kdbus_reply *kdbus_reply_new(struct kdbus_conn *reply_src,
                                    bool sync)
 {
        struct kdbus_reply *r;
-       int ret = 0;
+       int ret;
 
        if (atomic_inc_return(&reply_dst->request_count) >
            KDBUS_CONN_MAX_REQUESTS_PENDING) {
@@ -64,13 +64,11 @@ struct kdbus_reply *kdbus_reply_new(struct kdbus_conn *reply_src,
                r->waiting = true;
        }
 
-exit_dec_request_count:
-       if (ret < 0) {
-               atomic_dec(&reply_dst->request_count);
-               return ERR_PTR(ret);
-       }
-
        return r;
+
+exit_dec_request_count:
+       atomic_dec(&reply_dst->request_count);
+       return ERR_PTR(ret);
 }
 
 static void __kdbus_reply_free(struct kref *kref)
@@ -140,8 +138,7 @@ void kdbus_reply_unlink(struct kdbus_reply *r)
  * @reply:     The reply object
  * @err:       Error code to set on the remote side
  *
- * Remove the synchronous reply object from its connection reply_list, and
- * wake up remote peer (method origin) with the appropriate synchronous reply
+ * Wake up remote peer (method origin) with the appropriate synchronous reply
  * code.
  */
 void kdbus_sync_reply_wakeup(struct kdbus_reply *reply, int err)
@@ -172,17 +169,15 @@ struct kdbus_reply *kdbus_reply_find(struct kdbus_conn *replying,
                                     struct kdbus_conn *reply_dst,
                                     u64 cookie)
 {
-       struct kdbus_reply *r, *reply = NULL;
+       struct kdbus_reply *r;
 
        list_for_each_entry(r, &reply_dst->reply_list, entry) {
                if (r->cookie == cookie &&
-                   (!replying || r->reply_src == replying)) {
-                       reply = r;
-                       break;
-               }
+                   (!replying || r->reply_src == replying))
+                       return r;
        }
 
-       return reply;
+       return NULL;
 }
 
 /**
index eaa806a..72b1883 100644 (file)
@@ -50,51 +50,6 @@ int kdbus_copy_from_user(void *dest, void __user *user_ptr, size_t size)
 }
 
 /**
- * kdbus_memdup_user() - copy dynamically sized object from user-space
- * @user_ptr:  user-provided source buffer
- * @sz_min:    minimum object size
- * @sz_max:    maximum object size
- *
- * This copies a dynamically sized object from user-space into kernel-space. We
- * require the object to have a 64bit size field at offset 0. We read it out
- * first, allocate a suitably sized buffer and then copy all data.
- *
- * The @sz_min and @sz_max parameters define possible min and max object sizes
- * so user-space cannot trigger un-bound kernel-space allocations.
- *
- * The same alignment-restrictions as described in kdbus_copy_from_user() apply.
- *
- * Return: pointer to dynamically allocated copy, or ERR_PTR() on failure.
- */
-void *kdbus_memdup_user(void __user *user_ptr, size_t sz_min, size_t sz_max)
-{
-       void *ptr;
-       u64 size;
-       int ret;
-
-       ret = kdbus_copy_from_user(&size, user_ptr, sizeof(size));
-       if (ret < 0)
-               return ERR_PTR(ret);
-
-       if (size < sz_min)
-               return ERR_PTR(-EINVAL);
-
-       if (size > sz_max)
-               return ERR_PTR(-EMSGSIZE);
-
-       ptr = memdup_user(user_ptr, size);
-       if (IS_ERR(ptr))
-               return ptr;
-
-       if (*(u64 *)ptr != size) {
-               kfree(ptr);
-               return ERR_PTR(-EINVAL);
-       }
-
-       return ptr;
-}
-
-/**
  * kdbus_verify_uid_prefix() - verify UID prefix of a user-supplied name
  * @name:      user-supplied name to verify
  * @user_ns:   user-namespace to act in
index 740b198..5297166 100644 (file)
@@ -40,7 +40,7 @@
 ({                                                                     \
        u64 __user *_sz =                                               \
                (void __user *)((u8 __user *)(_b) + offsetof(_t, _m));  \
-       copy_to_user(_sz, _s, sizeof(((_t *)0)->_m));                   \
+       copy_to_user(_sz, _s, FIELD_SIZEOF(_t, _m));                    \
 })
 
 /**
@@ -64,7 +64,6 @@ int kdbus_verify_uid_prefix(const char *name, struct user_namespace *user_ns,
 int kdbus_sanitize_attach_flags(u64 flags, u64 *attach_flags);
 
 int kdbus_copy_from_user(void *dest, void __user *user_ptr, size_t size);
-void *kdbus_memdup_user(void __user *user_ptr, size_t sz_min, size_t sz_max);
 
 struct kvec;