2 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <rdma/rdma_user_ioctl.h>
34 #include <rdma/uverbs_ioctl.h>
35 #include "rdma_core.h"
38 struct bundle_alloc_head {
39 struct bundle_alloc_head *next;
45 struct bundle_alloc_head alloc_head;
46 struct bundle_alloc_head *allocated_mem;
47 size_t internal_avail;
50 struct radix_tree_root *radix;
51 const struct uverbs_api_ioctl_method *method_elm;
52 void __rcu **radix_slots;
53 unsigned long radix_slots_len;
56 struct ib_uverbs_attr __user *user_attrs;
57 struct ib_uverbs_attr *uattrs;
59 DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
60 DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
63 * Must be last. bundle ends in a flex array which overlaps
66 struct uverbs_attr_bundle bundle;
67 u64 internal_buffer[32];
71 * Each method has an absolute minimum amount of memory it needs to allocate,
72 * precompute that amount and determine if the onstack memory can be used or
73 * if allocation is need.
75 void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
76 unsigned int num_attrs)
78 struct bundle_priv *pbundle;
80 offsetof(struct bundle_priv, internal_buffer) +
81 sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
82 sizeof(*pbundle->uattrs) * num_attrs;
84 method_elm->use_stack = bundle_size <= sizeof(*pbundle);
85 method_elm->bundle_size =
86 ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
88 /* Do not want order-2 allocations for this. */
89 WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
93 * uverbs_alloc() - Quickly allocate memory for use with a bundle
95 * @size: Number of bytes to allocate
96 * @flags: Allocator flags
98 * The bundle allocator is intended for allocations that are connected with
99 * processing the system call related to the bundle. The allocated memory is
100 * always freed once the system call completes, and cannot be freed any other
103 * This tries to use a small pool of pre-allocated memory for performance.
105 __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
108 struct bundle_priv *pbundle =
109 container_of(bundle, struct bundle_priv, bundle);
113 if (check_add_overflow(size, pbundle->internal_used, &new_used))
114 return ERR_PTR(-EOVERFLOW);
116 if (new_used > pbundle->internal_avail) {
117 struct bundle_alloc_head *buf;
119 buf = kvmalloc(struct_size(buf, data, size), flags);
121 return ERR_PTR(-ENOMEM);
122 buf->next = pbundle->allocated_mem;
123 pbundle->allocated_mem = buf;
127 res = (void *)pbundle->internal_buffer + pbundle->internal_used;
128 pbundle->internal_used =
129 ALIGN(new_used, sizeof(*pbundle->internal_buffer));
130 if (flags & __GFP_ZERO)
131 memset(res, 0, size);
134 EXPORT_SYMBOL(_uverbs_alloc);
136 static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
139 if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
140 return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
143 return !memchr_inv((const void *)&uattr->data + len,
144 0, uattr->len - len);
147 static int uverbs_set_output(const struct uverbs_attr_bundle *bundle,
148 const struct uverbs_attr *attr)
150 struct bundle_priv *pbundle =
151 container_of(bundle, struct bundle_priv, bundle);
154 flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
155 UVERBS_ATTR_F_VALID_OUTPUT;
157 &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
162 static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
163 const struct uverbs_api_attr *attr_uapi,
164 struct uverbs_objs_arr_attr *attr,
165 struct ib_uverbs_attr *uattr,
168 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
174 if (uattr->attr_data.reserved)
177 if (uattr->len % sizeof(u32))
180 array_len = uattr->len / sizeof(u32);
181 if (array_len < spec->u2.objs_arr.min_len ||
182 array_len > spec->u2.objs_arr.max_len)
186 uverbs_alloc(&pbundle->bundle,
187 array_size(array_len, sizeof(*attr->uobjects)));
188 if (IS_ERR(attr->uobjects))
189 return PTR_ERR(attr->uobjects);
192 * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
193 * to store idrs array and avoid additional memory allocation. The
194 * idrs array is offset to the end of the uobjects array so we will be
195 * able to read idr and replace with a pointer.
197 idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
199 if (uattr->len > sizeof(uattr->data)) {
200 ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
205 memcpy(idr_vals, &uattr->data, uattr->len);
208 for (i = 0; i != array_len; i++) {
209 attr->uobjects[i] = uverbs_get_uobject_from_file(
210 spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
211 spec->u2.objs_arr.access, idr_vals[i]);
212 if (IS_ERR(attr->uobjects[i])) {
213 ret = PTR_ERR(attr->uobjects[i]);
219 __set_bit(attr_bkey, pbundle->spec_finalize);
223 static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
224 struct uverbs_objs_arr_attr *attr,
227 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
232 for (i = 0; i != attr->len; i++) {
233 current_ret = uverbs_finalize_object(
234 attr->uobjects[i], spec->u2.objs_arr.access, commit);
242 static int uverbs_process_attr(struct bundle_priv *pbundle,
243 const struct uverbs_api_attr *attr_uapi,
244 struct ib_uverbs_attr *uattr, u32 attr_bkey)
246 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
247 struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
248 const struct uverbs_attr_spec *val_spec = spec;
249 struct uverbs_obj_attr *o_attr;
251 switch (spec->type) {
252 case UVERBS_ATTR_TYPE_ENUM_IN:
253 if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
256 if (uattr->attr_data.enum_data.reserved)
259 val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
261 /* Currently we only support PTR_IN based enums */
262 if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
265 e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
267 case UVERBS_ATTR_TYPE_PTR_IN:
268 /* Ensure that any data provided by userspace beyond the known
269 * struct is zero. Userspace that knows how to use some future
270 * longer struct will fail here if used with an old kernel and
271 * non-zero content, making ABI compat/discovery simpler.
273 if (uattr->len > val_spec->u.ptr.len &&
274 val_spec->zero_trailing &&
275 !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
279 case UVERBS_ATTR_TYPE_PTR_OUT:
280 if (uattr->len < val_spec->u.ptr.min_len ||
281 (!val_spec->zero_trailing &&
282 uattr->len > val_spec->u.ptr.len))
285 if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
286 uattr->attr_data.reserved)
289 e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
290 e->ptr_attr.len = uattr->len;
292 if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
295 p = uverbs_alloc(&pbundle->bundle, uattr->len);
301 if (copy_from_user(p, u64_to_user_ptr(uattr->data),
305 e->ptr_attr.data = uattr->data;
309 case UVERBS_ATTR_TYPE_IDR:
310 case UVERBS_ATTR_TYPE_FD:
311 if (uattr->attr_data.reserved)
317 o_attr = &e->obj_attr;
318 o_attr->attr_elm = attr_uapi;
321 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
322 * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
323 * here without caring about truncation as we know that the
324 * IDR implementation today rejects negative IDs
326 o_attr->uobject = uverbs_get_uobject_from_file(
327 spec->u.obj.obj_type,
328 pbundle->bundle.ufile,
331 if (IS_ERR(o_attr->uobject))
332 return PTR_ERR(o_attr->uobject);
333 __set_bit(attr_bkey, pbundle->uobj_finalize);
335 if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
336 unsigned int uattr_idx = uattr - pbundle->uattrs;
337 s64 id = o_attr->uobject->id;
339 /* Copy the allocated id to the user-space */
340 if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
346 case UVERBS_ATTR_TYPE_IDRS_ARRAY:
347 return uverbs_process_idrs_array(pbundle, attr_uapi,
348 &e->objs_arr_attr, uattr,
358 * We search the radix tree with the method prefix and now we want to fast
359 * search the suffix bits to get a particular attribute pointer. It is not
360 * totally clear to me if this breaks the radix tree encasulation or not, but
361 * it uses the iter data to determine if the method iter points at the same
362 * chunk that will store the attribute, if so it just derefs it directly. By
363 * construction in most kernel configs the method and attrs will all fit in a
364 * single radix chunk, so in most cases this will have no search. Other cases
365 * this falls back to a full search.
367 static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
372 if (likely(attr_key < pbundle->radix_slots_len)) {
375 slot = pbundle->radix_slots + attr_key;
376 entry = rcu_dereference_raw(*slot);
377 if (likely(!radix_tree_is_internal_node(entry) && entry))
381 return radix_tree_lookup_slot(pbundle->radix,
382 pbundle->method_key | attr_key);
385 static int uverbs_set_attr(struct bundle_priv *pbundle,
386 struct ib_uverbs_attr *uattr)
388 u32 attr_key = uapi_key_attr(uattr->attr_id);
389 u32 attr_bkey = uapi_bkey_attr(attr_key);
390 const struct uverbs_api_attr *attr;
394 slot = uapi_get_attr_for_method(pbundle, attr_key);
397 * Kernel does not support the attribute but user-space says it
400 if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
401 return -EPROTONOSUPPORT;
404 attr = rcu_dereference_protected(*slot, true);
406 /* Reject duplicate attributes from user-space */
407 if (test_bit(attr_bkey, pbundle->bundle.attr_present))
410 ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
414 __set_bit(attr_bkey, pbundle->bundle.attr_present);
419 static int ib_uverbs_run_method(struct bundle_priv *pbundle,
420 unsigned int num_attrs)
422 int (*handler)(struct uverbs_attr_bundle *attrs);
423 size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
424 unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
428 /* See uverbs_disassociate_api() */
429 handler = srcu_dereference(
430 pbundle->method_elm->handler,
431 &pbundle->bundle.ufile->device->disassociate_srcu);
435 pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
436 if (IS_ERR(pbundle->uattrs))
437 return PTR_ERR(pbundle->uattrs);
438 if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
441 for (i = 0; i != num_attrs; i++) {
442 ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
447 /* User space did not provide all the mandatory attributes */
448 if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
449 pbundle->bundle.attr_present,
450 pbundle->method_elm->key_bitmap_len)))
453 if (pbundle->method_elm->has_udata)
454 uverbs_fill_udata(&pbundle->bundle,
455 &pbundle->bundle.driver_udata,
456 UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
458 if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
459 struct uverbs_obj_attr *destroy_attr =
460 &pbundle->bundle.attrs[destroy_bkey].obj_attr;
462 ret = uobj_destroy(destroy_attr->uobject);
465 __clear_bit(destroy_bkey, pbundle->uobj_finalize);
467 ret = handler(&pbundle->bundle);
468 uobj_put_destroy(destroy_attr->uobject);
470 ret = handler(&pbundle->bundle);
474 * Until the drivers are revised to use the bundle directly we have to
475 * assume that the driver wrote to its UHW_OUT and flag userspace
478 if (!ret && pbundle->method_elm->has_udata) {
479 const struct uverbs_attr *attr =
480 uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT);
483 ret = uverbs_set_output(&pbundle->bundle, attr);
487 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
488 * not invoke the method because the request is not supported. No
489 * other cases should return this code.
491 if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
497 static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
499 unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
500 struct bundle_alloc_head *memblock;
504 /* fast path for simple uobjects */
506 while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
507 i + 1)) < key_bitmap_len) {
508 struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
511 current_ret = uverbs_finalize_object(
512 attr->obj_attr.uobject,
513 attr->obj_attr.attr_elm->spec.u.obj.access, commit);
519 while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
520 i + 1)) < key_bitmap_len) {
521 struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
522 const struct uverbs_api_attr *attr_uapi;
526 slot = uapi_get_attr_for_method(
528 pbundle->method_key | uapi_bkey_to_key_attr(i));
532 attr_uapi = rcu_dereference_protected(*slot, true);
534 if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
535 current_ret = uverbs_free_idrs_array(
536 attr_uapi, &attr->objs_arr_attr, commit);
542 for (memblock = pbundle->allocated_mem; memblock;) {
543 struct bundle_alloc_head *tmp = memblock;
545 memblock = memblock->next;
552 static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
553 struct ib_uverbs_ioctl_hdr *hdr,
554 struct ib_uverbs_attr __user *user_attrs)
556 const struct uverbs_api_ioctl_method *method_elm;
557 struct uverbs_api *uapi = ufile->device->uapi;
558 struct radix_tree_iter attrs_iter;
559 struct bundle_priv *pbundle;
560 struct bundle_priv onstack;
565 if (unlikely(hdr->driver_id != uapi->driver_id))
568 slot = radix_tree_iter_lookup(
569 &uapi->radix, &attrs_iter,
570 uapi_key_obj(hdr->object_id) |
571 uapi_key_ioctl_method(hdr->method_id));
573 return -EPROTONOSUPPORT;
574 method_elm = rcu_dereference_protected(*slot, true);
576 if (!method_elm->use_stack) {
577 pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
580 pbundle->internal_avail =
581 method_elm->bundle_size -
582 offsetof(struct bundle_priv, internal_buffer);
583 pbundle->alloc_head.next = NULL;
584 pbundle->allocated_mem = &pbundle->alloc_head;
587 pbundle->internal_avail = sizeof(pbundle->internal_buffer);
588 pbundle->allocated_mem = NULL;
591 /* Space for the pbundle->bundle.attrs flex array */
592 pbundle->method_elm = method_elm;
593 pbundle->method_key = attrs_iter.index;
594 pbundle->bundle.ufile = ufile;
595 pbundle->radix = &uapi->radix;
596 pbundle->radix_slots = slot;
597 pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
598 pbundle->user_attrs = user_attrs;
600 pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
601 sizeof(*pbundle->bundle.attrs),
602 sizeof(*pbundle->internal_buffer));
603 memset(pbundle->bundle.attr_present, 0,
604 sizeof(pbundle->bundle.attr_present));
605 memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
606 memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
608 ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
609 destroy_ret = bundle_destroy(pbundle, ret == 0);
610 if (unlikely(destroy_ret && !ret))
616 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
618 struct ib_uverbs_file *file = filp->private_data;
619 struct ib_uverbs_ioctl_hdr __user *user_hdr =
620 (struct ib_uverbs_ioctl_hdr __user *)arg;
621 struct ib_uverbs_ioctl_hdr hdr;
625 if (unlikely(cmd != RDMA_VERBS_IOCTL))
628 err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
632 if (hdr.length > PAGE_SIZE ||
633 hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
636 if (hdr.reserved1 || hdr.reserved2)
637 return -EPROTONOSUPPORT;
639 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
640 err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
641 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
645 int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
646 size_t idx, u64 allowed_bits)
648 const struct uverbs_attr *attr;
651 attr = uverbs_attr_get(attrs_bundle, idx);
652 /* Missing attribute means 0 flags */
659 * New userspace code should use 8 bytes to pass flags, but we
660 * transparently support old userspaces that were using 4 bytes as
663 if (attr->ptr_attr.len == 8)
664 flags = attr->ptr_attr.data;
665 else if (attr->ptr_attr.len == 4)
666 flags = *(u32 *)&attr->ptr_attr.data;
670 if (flags & ~allowed_bits)
676 EXPORT_SYMBOL(uverbs_get_flags64);
678 int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
679 size_t idx, u64 allowed_bits)
684 ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
694 EXPORT_SYMBOL(uverbs_get_flags32);
697 * Fill a ib_udata struct (core or uhw) using the given attribute IDs.
698 * This is primarily used to convert the UVERBS_ATTR_UHW() into the
699 * ib_udata format used by the drivers.
701 void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
702 struct ib_udata *udata, unsigned int attr_in,
703 unsigned int attr_out)
705 struct bundle_priv *pbundle =
706 container_of(bundle, struct bundle_priv, bundle);
707 const struct uverbs_attr *in =
708 uverbs_attr_get(&pbundle->bundle, attr_in);
709 const struct uverbs_attr *out =
710 uverbs_attr_get(&pbundle->bundle, attr_out);
713 udata->inlen = in->ptr_attr.len;
714 if (uverbs_attr_ptr_is_inline(in))
716 &pbundle->user_attrs[in->ptr_attr.uattr_idx]
719 udata->inbuf = u64_to_user_ptr(in->ptr_attr.data);
726 udata->outbuf = u64_to_user_ptr(out->ptr_attr.data);
727 udata->outlen = out->ptr_attr.len;
729 udata->outbuf = NULL;
734 int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
735 const void *from, size_t size)
737 const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
741 return PTR_ERR(attr);
743 min_size = min_t(size_t, attr->ptr_attr.len, size);
744 if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
747 return uverbs_set_output(bundle, attr);
749 EXPORT_SYMBOL(uverbs_copy_to);
753 * This is only used if the caller has directly used copy_to_use to write the
754 * data. It signals to user space that the buffer is filled in.
756 int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx)
758 const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
761 return PTR_ERR(attr);
763 return uverbs_set_output(bundle, attr);
766 int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
767 size_t idx, s64 lower_bound, u64 upper_bound,
770 const struct uverbs_attr *attr;
772 attr = uverbs_attr_get(attrs_bundle, idx);
774 if ((PTR_ERR(attr) != -ENOENT) || !def_val)
775 return PTR_ERR(attr);
779 *to = attr->ptr_attr.data;
782 if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
787 EXPORT_SYMBOL(_uverbs_get_const);
789 int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
790 size_t idx, const void *from, size_t size)
792 const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
794 if (size < attr->ptr_attr.len) {
795 if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size,
796 attr->ptr_attr.len - size))
799 return uverbs_copy_to(bundle, idx, from, size);