2 * Copyright (C) 2013-2015 Kay Sievers
3 * Copyright (C) 2013-2015 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
4 * Copyright (C) 2013-2015 Daniel Mack <daniel@zonque.org>
5 * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
6 * Copyright (C) 2013-2015 Linux Foundation
7 * Copyright (C) 2014-2015 Djalal Harouni <tixxdz@opendz.org>
9 * kdbus is free software; you can redistribute it and/or modify it under
10 * the terms of the GNU Lesser General Public License as published by the
11 * Free Software Foundation; either version 2.1 of the License, or (at
12 * your option) any later version.
15 #include <linux/aio.h>
16 #include <linux/file.h>
18 #include <linux/highmem.h>
19 #include <linux/init.h>
21 #include <linux/module.h>
22 #include <linux/pagemap.h>
23 #include <linux/rbtree.h>
24 #include <linux/sched.h>
25 #include <linux/shmem_fs.h>
26 #include <linux/sizes.h>
27 #include <linux/slab.h>
28 #include <linux/uaccess.h>
29 #include <linux/uio.h>
35 * struct kdbus_pool - the receiver's buffer
36 * @f: The backing shmem file
37 * @size: The size of the file
38 * @accounted_size: Currently accounted memory in bytes
39 * @lock: Pool data lock
40 * @slices: All slices sorted by address
41 * @slices_busy: Tree of allocated slices
42 * @slices_free: Tree of free slices
44 * The receiver's buffer, managed as a pool of allocated and free
45 * slices containing the queued messages.
47 * Messages sent with KDBUS_CMD_SEND are copied directly by the
48 * sending process into the receiver's pool.
50 * Messages received with KDBUS_CMD_RECV just return the offset
51 * to the data placed in the pool.
53 * The internally allocated memory needs to be returned by the receiver
54 * with KDBUS_CMD_FREE.
59 size_t accounted_size;
62 struct list_head slices;
63 struct rb_root slices_busy;
64 struct rb_root slices_free;
68 * struct kdbus_pool_slice - allocated element in kdbus_pool
69 * @pool: Pool this slice belongs to
70 * @off: Offset of slice in the shmem file
71 * @size: Size of slice
72 * @entry: Entry in "all slices" list
73 * @rb_node: Entry in free or busy list
75 * @accounted: Accounted as queue slice
76 * @ref_kernel: Kernel holds a reference
77 * @ref_user: Userspace holds a reference
79 * The pool has one or more slices, always spanning the entire size of the
82 * Every slice is an element in a list sorted by the buffer address, to
83 * provide access to the next neighbor slice.
85 * Every slice is member in either the busy or the free tree. The free
86 * tree is organized by slice size, the busy tree organized by buffer
89 struct kdbus_pool_slice {
90 struct kdbus_pool *pool;
94 struct list_head entry;
95 struct rb_node rb_node;
103 static struct kdbus_pool_slice *kdbus_pool_slice_new(struct kdbus_pool *pool,
104 size_t off, size_t size)
106 struct kdbus_pool_slice *slice;
108 slice = kzalloc(sizeof(*slice), GFP_KERNEL);
119 /* insert a slice into the free tree */
120 static void kdbus_pool_add_free_slice(struct kdbus_pool *pool,
121 struct kdbus_pool_slice *slice)
124 struct rb_node *pn = NULL;
126 n = &pool->slices_free.rb_node;
128 struct kdbus_pool_slice *pslice;
131 pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node);
132 if (slice->size < pslice->size)
138 rb_link_node(&slice->rb_node, pn, n);
139 rb_insert_color(&slice->rb_node, &pool->slices_free);
142 /* insert a slice into the busy tree */
143 static void kdbus_pool_add_busy_slice(struct kdbus_pool *pool,
144 struct kdbus_pool_slice *slice)
147 struct rb_node *pn = NULL;
149 n = &pool->slices_busy.rb_node;
151 struct kdbus_pool_slice *pslice;
154 pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node);
155 if (slice->off < pslice->off)
157 else if (slice->off > pslice->off)
163 rb_link_node(&slice->rb_node, pn, n);
164 rb_insert_color(&slice->rb_node, &pool->slices_busy);
167 static struct kdbus_pool_slice *kdbus_pool_find_slice(struct kdbus_pool *pool,
172 n = pool->slices_busy.rb_node;
174 struct kdbus_pool_slice *s;
176 s = rb_entry(n, struct kdbus_pool_slice, rb_node);
179 else if (off > s->off)
189 * kdbus_pool_slice_alloc() - allocate memory from a pool
190 * @pool: The receiver's pool
191 * @size: The number of bytes to allocate
192 * @accounted: Whether this slice should be accounted for
194 * The returned slice is used for kdbus_pool_slice_release() to
195 * free the allocated memory. If either @kvec or @iovec is non-NULL, the data
196 * will be copied from kernel or userspace memory into the new slice at
199 * Return: the allocated slice on success, ERR_PTR on failure.
201 struct kdbus_pool_slice *kdbus_pool_slice_alloc(struct kdbus_pool *pool,
202 size_t size, bool accounted)
204 size_t slice_size = KDBUS_ALIGN8(size);
205 struct rb_node *n, *found = NULL;
206 struct kdbus_pool_slice *s;
210 return ERR_PTR(-EINVAL);
212 /* search a free slice with the closest matching size */
213 mutex_lock(&pool->lock);
214 n = pool->slices_free.rb_node;
216 s = rb_entry(n, struct kdbus_pool_slice, rb_node);
217 if (slice_size < s->size) {
220 } else if (slice_size > s->size) {
228 /* no slice with the minimum size found in the pool */
234 /* no exact match, use the closest one */
236 struct kdbus_pool_slice *s_new;
238 s = rb_entry(found, struct kdbus_pool_slice, rb_node);
240 /* split-off the remainder of the size to its own slice */
241 s_new = kdbus_pool_slice_new(pool, s->off + slice_size,
242 s->size - slice_size);
248 list_add(&s_new->entry, &s->entry);
249 kdbus_pool_add_free_slice(pool, s_new);
251 /* adjust our size now that we split-off another slice */
252 s->size = slice_size;
255 /* move slice from free to the busy tree */
256 rb_erase(found, &pool->slices_free);
257 kdbus_pool_add_busy_slice(pool, s);
259 WARN_ON(s->ref_kernel || s->ref_user);
261 s->ref_kernel = true;
263 s->accounted = accounted;
265 pool->accounted_size += s->size;
266 mutex_unlock(&pool->lock);
271 mutex_unlock(&pool->lock);
275 static void __kdbus_pool_slice_release(struct kdbus_pool_slice *slice)
277 struct kdbus_pool *pool = slice->pool;
279 /* don't free the slice if either has a reference */
280 if (slice->ref_kernel || slice->ref_user)
283 if (WARN_ON(slice->free))
286 rb_erase(&slice->rb_node, &pool->slices_busy);
288 /* merge with the next free slice */
289 if (!list_is_last(&slice->entry, &pool->slices)) {
290 struct kdbus_pool_slice *s;
292 s = list_entry(slice->entry.next,
293 struct kdbus_pool_slice, entry);
295 rb_erase(&s->rb_node, &pool->slices_free);
297 slice->size += s->size;
302 /* merge with previous free slice */
303 if (pool->slices.next != &slice->entry) {
304 struct kdbus_pool_slice *s;
306 s = list_entry(slice->entry.prev,
307 struct kdbus_pool_slice, entry);
309 rb_erase(&s->rb_node, &pool->slices_free);
310 list_del(&slice->entry);
311 s->size += slice->size;
318 kdbus_pool_add_free_slice(pool, slice);
322 * kdbus_pool_slice_release() - drop kernel-reference on allocated slice
323 * @slice: Slice allocated from the pool
325 * This releases the kernel-reference on the given slice. If the
326 * kernel-reference and the user-reference on a slice are dropped, the slice is
327 * returned to the pool.
329 * So far, we do not implement full ref-counting on slices. Each, kernel and
330 * user-space can have exactly one reference to a slice. If both are dropped at
331 * the same time, the slice is released.
333 void kdbus_pool_slice_release(struct kdbus_pool_slice *slice)
335 struct kdbus_pool *pool;
340 /* @slice may be freed, so keep local ptr to @pool */
343 mutex_lock(&pool->lock);
344 /* kernel must own a ref to @slice to drop it */
345 WARN_ON(!slice->ref_kernel);
346 slice->ref_kernel = false;
347 /* no longer kernel-owned, de-account slice */
348 if (slice->accounted && !WARN_ON(pool->accounted_size < slice->size))
349 pool->accounted_size -= slice->size;
350 __kdbus_pool_slice_release(slice);
351 mutex_unlock(&pool->lock);
355 * kdbus_pool_release_offset() - release a public offset
356 * @pool: pool to operate on
357 * @off: offset to release
359 * This should be called whenever user-space frees a slice given to them. It
360 * verifies the slice is available and public, and then drops it. It ensures
361 * correct locking and barriers against queues.
363 * Return: 0 on success, ENXIO if the offset is invalid or not public.
365 int kdbus_pool_release_offset(struct kdbus_pool *pool, size_t off)
367 struct kdbus_pool_slice *slice;
370 /* 'pool->size' is used as dummy offset for empty slices */
371 if (off == pool->size)
374 mutex_lock(&pool->lock);
375 slice = kdbus_pool_find_slice(pool, off);
376 if (slice && slice->ref_user) {
377 slice->ref_user = false;
378 __kdbus_pool_slice_release(slice);
382 mutex_unlock(&pool->lock);
388 * kdbus_pool_publish_empty() - publish empty slice to user-space
389 * @pool: pool to operate on
390 * @off: output storage for offset, or NULL
391 * @size: output storage for size, or NULL
393 * This is the same as kdbus_pool_slice_publish(), but uses a dummy slice with
394 * size 0. The returned offset points to the end of the pool and is never
395 * returned on real slices.
397 void kdbus_pool_publish_empty(struct kdbus_pool *pool, u64 *off, u64 *size)
406 * kdbus_pool_slice_publish() - publish slice to user-space
408 * @out_offset: Output storage for offset, or NULL
409 * @out_size: Output storage for size, or NULL
411 * This prepares a slice to be published to user-space.
413 * This call combines the following operations:
414 * * the memory region is flushed so the user's memory view is consistent
415 * * the slice is marked as referenced by user-space, so user-space has to
416 * call KDBUS_CMD_FREE to release it
417 * * the offset and size of the slice are written to the given output
418 * arguments, if non-NULL
420 void kdbus_pool_slice_publish(struct kdbus_pool_slice *slice,
421 u64 *out_offset, u64 *out_size)
423 mutex_lock(&slice->pool->lock);
424 /* kernel must own a ref to @slice to gain a user-space ref */
425 WARN_ON(!slice->ref_kernel);
426 slice->ref_user = true;
427 mutex_unlock(&slice->pool->lock);
430 *out_offset = slice->off;
432 *out_size = slice->size;
436 * kdbus_pool_slice_offset() - Get a slice's offset inside the pool
437 * @slice: Slice to return the offset of
439 * Return: The internal offset @slice inside the pool.
441 off_t kdbus_pool_slice_offset(const struct kdbus_pool_slice *slice)
447 * kdbus_pool_slice_size() - get size of a pool slice
448 * @slice: slice to query
450 * Return: size of the given slice
452 size_t kdbus_pool_slice_size(const struct kdbus_pool_slice *slice)
458 * kdbus_pool_new() - create a new pool
459 * @name: Name of the (deleted) file which shows up in
460 * /proc, used for debugging
461 * @size: Maximum size of the pool
463 * Return: a new kdbus_pool on success, ERR_PTR on failure.
465 struct kdbus_pool *kdbus_pool_new(const char *name, size_t size)
467 struct kdbus_pool_slice *s;
468 struct kdbus_pool *p;
473 p = kzalloc(sizeof(*p), GFP_KERNEL);
475 return ERR_PTR(-ENOMEM);
478 n = kasprintf(GFP_KERNEL, KBUILD_MODNAME "-conn:%s", name);
485 f = shmem_file_setup(n ?: KBUILD_MODNAME "-conn", size, 0);
493 ret = get_write_access(file_inode(f));
497 /* allocate first slice spanning the entire pool */
498 s = kdbus_pool_slice_new(p, 0, size);
506 p->slices_free = RB_ROOT;
507 p->slices_busy = RB_ROOT;
508 mutex_init(&p->lock);
510 INIT_LIST_HEAD(&p->slices);
511 list_add(&s->entry, &p->slices);
513 kdbus_pool_add_free_slice(p, s);
517 put_write_access(file_inode(f));
526 * kdbus_pool_free() - destroy pool
527 * @pool: The receiver's pool
529 void kdbus_pool_free(struct kdbus_pool *pool)
531 struct kdbus_pool_slice *s, *tmp;
536 list_for_each_entry_safe(s, tmp, &pool->slices, entry) {
541 put_write_access(file_inode(pool->f));
547 * kdbus_pool_accounted() - retrieve accounting information
548 * @pool: pool to query
549 * @size: output for overall pool size
550 * @acc: output for currently accounted size
552 * This returns accounting information of the pool. Note that the data might
553 * change after the function returns, as the pool lock is dropped. You need to
554 * protect the data via other means, if you need reliable accounting.
556 void kdbus_pool_accounted(struct kdbus_pool *pool, size_t *size, size_t *acc)
558 mutex_lock(&pool->lock);
562 *acc = pool->accounted_size;
563 mutex_unlock(&pool->lock);
567 * kdbus_pool_slice_copy_iovec() - copy user memory to a slice
568 * @slice: The slice to write to
569 * @off: Offset in the slice to write to
570 * @iov: iovec array, pointing to data to copy
571 * @iov_len: Number of elements in @iov
572 * @total_len: Total number of bytes described in members of @iov
574 * User memory referenced by @iov will be copied into @slice at offset @off.
576 * Return: the numbers of bytes copied, negative errno on failure.
579 kdbus_pool_slice_copy_iovec(const struct kdbus_pool_slice *slice, loff_t off,
580 struct iovec *iov, size_t iov_len, size_t total_len)
582 struct iov_iter iter;
585 if (WARN_ON(off + total_len > slice->size))
589 iov_iter_init(&iter, WRITE, iov, iov_len, total_len);
590 len = vfs_iter_write(slice->pool->f, &iter, &off);
592 return (len >= 0 && len != total_len) ? -EFAULT : len;
596 * kdbus_pool_slice_copy_kvec() - copy kernel memory to a slice
597 * @slice: The slice to write to
598 * @off: Offset in the slice to write to
599 * @kvec: kvec array, pointing to data to copy
600 * @kvec_len: Number of elements in @kvec
601 * @total_len: Total number of bytes described in members of @kvec
603 * Kernel memory referenced by @kvec will be copied into @slice at offset @off.
605 * Return: the numbers of bytes copied, negative errno on failure.
607 ssize_t kdbus_pool_slice_copy_kvec(const struct kdbus_pool_slice *slice,
608 loff_t off, struct kvec *kvec,
609 size_t kvec_len, size_t total_len)
611 struct iov_iter iter;
615 if (WARN_ON(off + total_len > slice->size))
619 iov_iter_kvec(&iter, WRITE | ITER_KVEC, kvec, kvec_len, total_len);
623 len = vfs_iter_write(slice->pool->f, &iter, &off);
626 return (len >= 0 && len != total_len) ? -EFAULT : len;
630 * kdbus_pool_slice_copy() - copy data from one slice into another
631 * @slice_dst: destination slice
632 * @slice_src: source slice
634 * Return: 0 on success, negative error number on failure.
636 int kdbus_pool_slice_copy(const struct kdbus_pool_slice *slice_dst,
637 const struct kdbus_pool_slice *slice_src)
639 struct file *f_src = slice_src->pool->f;
640 struct file *f_dst = slice_dst->pool->f;
641 struct inode *i_dst = file_inode(f_dst);
642 struct address_space *mapping_dst = f_dst->f_mapping;
643 const struct address_space_operations *aops = mapping_dst->a_ops;
644 unsigned long len = slice_src->size;
645 loff_t off_src = slice_src->off;
646 loff_t off_dst = slice_dst->off;
650 if (WARN_ON(slice_src->size != slice_dst->size) ||
651 WARN_ON(slice_src->free || slice_dst->free))
654 mutex_lock(&i_dst->i_mutex);
658 unsigned long page_off;
659 unsigned long copy_len;
666 page_off = off_dst & (PAGE_CACHE_SIZE - 1);
667 copy_len = min_t(unsigned long,
668 PAGE_CACHE_SIZE - page_off, len);
670 status = aops->write_begin(f_dst, mapping_dst, off_dst,
671 copy_len, 0, &page, &fsdata);
672 if (unlikely(status < 0)) {
677 kaddr = (char __force __user *)kmap(page) + page_off;
678 n_read = __vfs_read(f_src, kaddr, copy_len, &off_src);
680 mark_page_accessed(page);
681 flush_dcache_page(page);
683 if (unlikely(n_read != copy_len)) {
688 status = aops->write_end(f_dst, mapping_dst, off_dst,
689 copy_len, copy_len, page, fsdata);
690 if (unlikely(status != copy_len)) {
699 mutex_unlock(&i_dst->i_mutex);
705 * kdbus_pool_mmap() - map the pool into the process
706 * @pool: The receiver's pool
707 * @vma: passed by mmap() syscall
709 * Return: the result of the mmap() call, negative errno on failure.
711 int kdbus_pool_mmap(const struct kdbus_pool *pool, struct vm_area_struct *vma)
713 /* deny write access to the pool */
714 if (vma->vm_flags & VM_WRITE)
716 vma->vm_flags &= ~VM_MAYWRITE;
718 /* do not allow to map more than the size of the file */
719 if ((vma->vm_end - vma->vm_start) > pool->size)
722 /* replace the connection file with our shmem file */
725 vma->vm_file = get_file(pool->f);
727 return pool->f->f_op->mmap(pool->f, vma);