io_uring/kbuf.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <linux/kernel.h>
   3 #include <linux/errno.h>
   4 #include <linux/fs.h>
   5 #include <linux/file.h>
   6 #include <linux/mm.h>
   7 #include <linux/slab.h>
   8 #include <linux/namei.h>
   9 #include <linux/poll.h>
  10 #include <linux/io_uring.h>
  11
  12 #include <uapi/linux/io_uring.h>
  13
  14 #include "io_uring.h"
  15 #include "opdef.h"
  16 #include "kbuf.h"
  17
  18 #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
  19
  20 #define BGID_ARRAY      64
  21
  22 /* BIDs are addressed by a 16-bit field in a CQE */
  23 #define MAX_BIDS_PER_BGID (1 << 16)
  24
  25 struct io_provide_buf {
  26         struct file                     *file;
  27         __u64                           addr;
  28         __u32                           len;
  29         __u32                           bgid;
  30         __u32                           nbufs;
  31         __u16                           bid;
  32 };
  33
  34 static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
  35                                                    struct io_buffer_list *bl,
  36                                                    unsigned int bgid)
  37 {
  38         if (bl && bgid < BGID_ARRAY)
  39                 return &bl[bgid];
  40
  41         return xa_load(&ctx->io_bl_xa, bgid);
  42 }
  43
  44 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
  45                                                         unsigned int bgid)
  46 {
  47         lockdep_assert_held(&ctx->uring_lock);
  48
  49         return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
  50 }
  51
  52 static int io_buffer_add_list(struct io_ring_ctx *ctx,
  53                               struct io_buffer_list *bl, unsigned int bgid)
  54 {
  55         /*
  56          * Store buffer group ID and finally mark the list as visible.
  57          * The normal lookup doesn't care about the visibility as we're
  58          * always under the ->uring_lock, but the RCU lookup from mmap does.
  59          */
  60         bl->bgid = bgid;
  61         smp_store_release(&bl->is_ready, 1);
  62
  63         if (bgid < BGID_ARRAY)
  64                 return 0;
  65
  66         return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
  67 }
  68
  69 void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
  70 {
  71         struct io_ring_ctx *ctx = req->ctx;
  72         struct io_buffer_list *bl;
  73         struct io_buffer *buf;
  74
  75         /*
  76          * For legacy provided buffer mode, don't recycle if we already did
  77          * IO to this buffer. For ring-mapped provided buffer mode, we should
  78          * increment ring->head to explicitly monopolize the buffer to avoid
  79          * multiple use.
  80          */
  81         if (req->flags & REQ_F_PARTIAL_IO)
  82                 return;
  83
  84         io_ring_submit_lock(ctx, issue_flags);
  85
  86         buf = req->kbuf;
  87         bl = io_buffer_get_list(ctx, buf->bgid);
  88         list_add(&buf->list, &bl->buf_list);
  89         req->flags &= ~REQ_F_BUFFER_SELECTED;
  90         req->buf_index = buf->bgid;
  91
  92         io_ring_submit_unlock(ctx, issue_flags);
  93         return;
  94 }
  95
  96 unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
  97 {
  98         unsigned int cflags;
  99
 100         /*
 101          * We can add this buffer back to two lists:
 102          *
 103          * 1) The io_buffers_cache list. This one is protected by the
 104          *    ctx->uring_lock. If we already hold this lock, add back to this
 105          *    list as we can grab it from issue as well.
 106          * 2) The io_buffers_comp list. This one is protected by the
 107          *    ctx->completion_lock.
 108          *
 109          * We migrate buffers from the comp_list to the issue cache list
 110          * when we need one.
 111          */
 112         if (req->flags & REQ_F_BUFFER_RING) {
 113                 /* no buffers to recycle for this case */
 114                 cflags = __io_put_kbuf_list(req, NULL);
 115         } else if (issue_flags & IO_URING_F_UNLOCKED) {
 116                 struct io_ring_ctx *ctx = req->ctx;
 117
 118                 spin_lock(&ctx->completion_lock);
 119                 cflags = __io_put_kbuf_list(req, &ctx->io_buffers_comp);
 120                 spin_unlock(&ctx->completion_lock);
 121         } else {
 122                 lockdep_assert_held(&req->ctx->uring_lock);
 123
 124                 cflags = __io_put_kbuf_list(req, &req->ctx->io_buffers_cache);
 125         }
 126         return cflags;
 127 }
 128
 129 static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
 130                                               struct io_buffer_list *bl)
 131 {
 132         if (!list_empty(&bl->buf_list)) {
 133                 struct io_buffer *kbuf;
 134
 135                 kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
 136                 list_del(&kbuf->list);
 137                 if (*len == 0 || *len > kbuf->len)
 138                         *len = kbuf->len;
 139                 req->flags |= REQ_F_BUFFER_SELECTED;
 140                 req->kbuf = kbuf;
 141                 req->buf_index = kbuf->bid;
 142                 return u64_to_user_ptr(kbuf->addr);
 143         }
 144         return NULL;
 145 }
 146
 147 static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
 148                                           struct io_buffer_list *bl,
 149                                           unsigned int issue_flags)
 150 {
 151         struct io_uring_buf_ring *br = bl->buf_ring;
 152         struct io_uring_buf *buf;
 153         __u16 head = bl->head;
 154
 155         if (unlikely(smp_load_acquire(&br->tail) == head))
 156                 return NULL;
 157
 158         head &= bl->mask;
 159         /* mmaped buffers are always contig */
 160         if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
 161                 buf = &br->bufs[head];
 162         } else {
 163                 int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
 164                 int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
 165                 buf = page_address(bl->buf_pages[index]);
 166                 buf += off;
 167         }
 168         if (*len == 0 || *len > buf->len)
 169                 *len = buf->len;
 170         req->flags |= REQ_F_BUFFER_RING;
 171         req->buf_list = bl;
 172         req->buf_index = buf->bid;
 173
 174         if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
 175                 /*
 176                  * If we came in unlocked, we have no choice but to consume the
 177                  * buffer here, otherwise nothing ensures that the buffer won't
 178                  * get used by others. This does mean it'll be pinned until the
 179                  * IO completes, coming in unlocked means we're being called from
 180                  * io-wq context and there may be further retries in async hybrid
 181                  * mode. For the locked case, the caller must call commit when
 182                  * the transfer completes (or if we get -EAGAIN and must poll of
 183                  * retry).
 184                  */
 185                 req->buf_list = NULL;
 186                 bl->head++;
 187         }
 188         return u64_to_user_ptr(buf->addr);
 189 }
 190
 191 void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
 192                               unsigned int issue_flags)
 193 {
 194         struct io_ring_ctx *ctx = req->ctx;
 195         struct io_buffer_list *bl;
 196         void __user *ret = NULL;
 197
 198         io_ring_submit_lock(req->ctx, issue_flags);
 199
 200         bl = io_buffer_get_list(ctx, req->buf_index);
 201         if (likely(bl)) {
 202                 if (bl->is_mapped)
 203                         ret = io_ring_buffer_select(req, len, bl, issue_flags);
 204                 else
 205                         ret = io_provided_buffer_select(req, len, bl);
 206         }
 207         io_ring_submit_unlock(req->ctx, issue_flags);
 208         return ret;
 209 }
 210
 211 static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
 212 {
 213         struct io_buffer_list *bl;
 214         int i;
 215
 216         bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
 217         if (!bl)
 218                 return -ENOMEM;
 219
 220         for (i = 0; i < BGID_ARRAY; i++) {
 221                 INIT_LIST_HEAD(&bl[i].buf_list);
 222                 bl[i].bgid = i;
 223         }
 224
 225         smp_store_release(&ctx->io_bl, bl);
 226         return 0;
 227 }
 228
 229 static int __io_remove_buffers(struct io_ring_ctx *ctx,
 230                                struct io_buffer_list *bl, unsigned nbufs)
 231 {
 232         unsigned i = 0;
 233
 234         /* shouldn't happen */
 235         if (!nbufs)
 236                 return 0;
 237
 238         if (bl->is_mapped) {
 239                 i = bl->buf_ring->tail - bl->head;
 240                 if (bl->is_mmap) {
 241                         folio_put(virt_to_folio(bl->buf_ring));
 242                         bl->buf_ring = NULL;
 243                         bl->is_mmap = 0;
 244                 } else if (bl->buf_nr_pages) {
 245                         int j;
 246
 247                         for (j = 0; j < bl->buf_nr_pages; j++)
 248                                 unpin_user_page(bl->buf_pages[j]);
 249                         kvfree(bl->buf_pages);
 250                         bl->buf_pages = NULL;
 251                         bl->buf_nr_pages = 0;
 252                 }
 253                 /* make sure it's seen as empty */
 254                 INIT_LIST_HEAD(&bl->buf_list);
 255                 bl->is_mapped = 0;
 256                 return i;
 257         }
 258
 259         /* protects io_buffers_cache */
 260         lockdep_assert_held(&ctx->uring_lock);
 261
 262         while (!list_empty(&bl->buf_list)) {
 263                 struct io_buffer *nxt;
 264
 265                 nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
 266                 list_move(&nxt->list, &ctx->io_buffers_cache);
 267                 if (++i == nbufs)
 268                         return i;
 269                 cond_resched();
 270         }
 271
 272         return i;
 273 }
 274
 275 void io_destroy_buffers(struct io_ring_ctx *ctx)
 276 {
 277         struct io_buffer_list *bl;
 278         unsigned long index;
 279         int i;
 280
 281         for (i = 0; i < BGID_ARRAY; i++) {
 282                 if (!ctx->io_bl)
 283                         break;
 284                 __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
 285         }
 286
 287         xa_for_each(&ctx->io_bl_xa, index, bl) {
 288                 xa_erase(&ctx->io_bl_xa, bl->bgid);
 289                 __io_remove_buffers(ctx, bl, -1U);
 290                 kfree_rcu(bl, rcu);
 291         }
 292
 293         while (!list_empty(&ctx->io_buffers_pages)) {
 294                 struct page *page;
 295
 296                 page = list_first_entry(&ctx->io_buffers_pages, struct page, lru);
 297                 list_del_init(&page->lru);
 298                 __free_page(page);
 299         }
 300 }
 301
 302 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 303 {
 304         struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
 305         u64 tmp;
 306
 307         if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
 308             sqe->splice_fd_in)
 309                 return -EINVAL;
 310
 311         tmp = READ_ONCE(sqe->fd);
 312         if (!tmp || tmp > MAX_BIDS_PER_BGID)
 313                 return -EINVAL;
 314
 315         memset(p, 0, sizeof(*p));
 316         p->nbufs = tmp;
 317         p->bgid = READ_ONCE(sqe->buf_group);
 318         return 0;
 319 }
 320
 321 int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 322 {
 323         struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
 324         struct io_ring_ctx *ctx = req->ctx;
 325         struct io_buffer_list *bl;
 326         int ret = 0;
 327
 328         io_ring_submit_lock(ctx, issue_flags);
 329
 330         ret = -ENOENT;
 331         bl = io_buffer_get_list(ctx, p->bgid);
 332         if (bl) {
 333                 ret = -EINVAL;
 334                 /* can't use provide/remove buffers command on mapped buffers */
 335                 if (!bl->is_mapped)
 336                         ret = __io_remove_buffers(ctx, bl, p->nbufs);
 337         }
 338         io_ring_submit_unlock(ctx, issue_flags);
 339         if (ret < 0)
 340                 req_set_fail(req);
 341         io_req_set_res(req, ret, 0);
 342         return IOU_OK;
 343 }
 344
 345 int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 346 {
 347         unsigned long size, tmp_check;
 348         struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
 349         u64 tmp;
 350
 351         if (sqe->rw_flags || sqe->splice_fd_in)
 352                 return -EINVAL;
 353
 354         tmp = READ_ONCE(sqe->fd);
 355         if (!tmp || tmp > MAX_BIDS_PER_BGID)
 356                 return -E2BIG;
 357         p->nbufs = tmp;
 358         p->addr = READ_ONCE(sqe->addr);
 359         p->len = READ_ONCE(sqe->len);
 360
 361         if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
 362                                 &size))
 363                 return -EOVERFLOW;
 364         if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
 365                 return -EOVERFLOW;
 366
 367         size = (unsigned long)p->len * p->nbufs;
 368         if (!access_ok(u64_to_user_ptr(p->addr), size))
 369                 return -EFAULT;
 370
 371         p->bgid = READ_ONCE(sqe->buf_group);
 372         tmp = READ_ONCE(sqe->off);
 373         if (tmp > USHRT_MAX)
 374                 return -E2BIG;
 375         if (tmp + p->nbufs > MAX_BIDS_PER_BGID)
 376                 return -EINVAL;
 377         p->bid = tmp;
 378         return 0;
 379 }
 380
 381 static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
 382 {
 383         struct io_buffer *buf;
 384         struct page *page;
 385         int bufs_in_page;
 386
 387         /*
 388          * Completions that don't happen inline (eg not under uring_lock) will
 389          * add to ->io_buffers_comp. If we don't have any free buffers, check
 390          * the completion list and splice those entries first.
 391          */
 392         if (!list_empty_careful(&ctx->io_buffers_comp)) {
 393                 spin_lock(&ctx->completion_lock);
 394                 if (!list_empty(&ctx->io_buffers_comp)) {
 395                         list_splice_init(&ctx->io_buffers_comp,
 396                                                 &ctx->io_buffers_cache);
 397                         spin_unlock(&ctx->completion_lock);
 398                         return 0;
 399                 }
 400                 spin_unlock(&ctx->completion_lock);
 401         }
 402
 403         /*
 404          * No free buffers and no completion entries either. Allocate a new
 405          * page worth of buffer entries and add those to our freelist.
 406          */
 407         page = alloc_page(GFP_KERNEL_ACCOUNT);
 408         if (!page)
 409                 return -ENOMEM;
 410
 411         list_add(&page->lru, &ctx->io_buffers_pages);
 412
 413         buf = page_address(page);
 414         bufs_in_page = PAGE_SIZE / sizeof(*buf);
 415         while (bufs_in_page) {
 416                 list_add_tail(&buf->list, &ctx->io_buffers_cache);
 417                 buf++;
 418                 bufs_in_page--;
 419         }
 420
 421         return 0;
 422 }
 423
 424 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 425                           struct io_buffer_list *bl)
 426 {
 427         struct io_buffer *buf;
 428         u64 addr = pbuf->addr;
 429         int i, bid = pbuf->bid;
 430
 431         for (i = 0; i < pbuf->nbufs; i++) {
 432                 if (list_empty(&ctx->io_buffers_cache) &&
 433                     io_refill_buffer_cache(ctx))
 434                         break;
 435                 buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
 436                                         list);
 437                 list_move_tail(&buf->list, &bl->buf_list);
 438                 buf->addr = addr;
 439                 buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 440                 buf->bid = bid;
 441                 buf->bgid = pbuf->bgid;
 442                 addr += pbuf->len;
 443                 bid++;
 444                 cond_resched();
 445         }
 446
 447         return i ? 0 : -ENOMEM;
 448 }
 449
 450 int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 451 {
 452         struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
 453         struct io_ring_ctx *ctx = req->ctx;
 454         struct io_buffer_list *bl;
 455         int ret = 0;
 456
 457         io_ring_submit_lock(ctx, issue_flags);
 458
 459         if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
 460                 ret = io_init_bl_list(ctx);
 461                 if (ret)
 462                         goto err;
 463         }
 464
 465         bl = io_buffer_get_list(ctx, p->bgid);
 466         if (unlikely(!bl)) {
 467                 bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
 468                 if (!bl) {
 469                         ret = -ENOMEM;
 470                         goto err;
 471                 }
 472                 INIT_LIST_HEAD(&bl->buf_list);
 473                 ret = io_buffer_add_list(ctx, bl, p->bgid);
 474                 if (ret) {
 475                         /*
 476                          * Doesn't need rcu free as it was never visible, but
 477                          * let's keep it consistent throughout. Also can't
 478                          * be a lower indexed array group, as adding one
 479                          * where lookup failed cannot happen.
 480                          */
 481                         if (p->bgid >= BGID_ARRAY)
 482                                 kfree_rcu(bl, rcu);
 483                         else
 484                                 WARN_ON_ONCE(1);
 485                         goto err;
 486                 }
 487         }
 488         /* can't add buffers via this command for a mapped buffer ring */
 489         if (bl->is_mapped) {
 490                 ret = -EINVAL;
 491                 goto err;
 492         }
 493
 494         ret = io_add_buffers(ctx, p, bl);
 495 err:
 496         io_ring_submit_unlock(ctx, issue_flags);
 497
 498         if (ret < 0)
 499                 req_set_fail(req);
 500         io_req_set_res(req, ret, 0);
 501         return IOU_OK;
 502 }
 503
 504 static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 505                             struct io_buffer_list *bl)
 506 {
 507         struct io_uring_buf_ring *br;
 508         struct page **pages;
 509         int i, nr_pages;
 510
 511         pages = io_pin_pages(reg->ring_addr,
 512                              flex_array_size(br, bufs, reg->ring_entries),
 513                              &nr_pages);
 514         if (IS_ERR(pages))
 515                 return PTR_ERR(pages);
 516
 517         /*
 518          * Apparently some 32-bit boxes (ARM) will return highmem pages,
 519          * which then need to be mapped. We could support that, but it'd
 520          * complicate the code and slowdown the common cases quite a bit.
 521          * So just error out, returning -EINVAL just like we did on kernels
 522          * that didn't support mapped buffer rings.
 523          */
 524         for (i = 0; i < nr_pages; i++)
 525                 if (PageHighMem(pages[i]))
 526                         goto error_unpin;
 527
 528         br = page_address(pages[0]);
 529 #ifdef SHM_COLOUR
 530         /*
 531          * On platforms that have specific aliasing requirements, SHM_COLOUR
 532          * is set and we must guarantee that the kernel and user side align
 533          * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
 534          * the application mmap's the provided ring buffer. Fail the request
 535          * if we, by chance, don't end up with aligned addresses. The app
 536          * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
 537          * this transparently.
 538          */
 539         if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
 540                 goto error_unpin;
 541 #endif
 542         bl->buf_pages = pages;
 543         bl->buf_nr_pages = nr_pages;
 544         bl->buf_ring = br;
 545         bl->is_mapped = 1;
 546         bl->is_mmap = 0;
 547         return 0;
 548 error_unpin:
 549         for (i = 0; i < nr_pages; i++)
 550                 unpin_user_page(pages[i]);
 551         kvfree(pages);
 552         return -EINVAL;
 553 }
 554
 555 static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
 556                               struct io_buffer_list *bl)
 557 {
 558         gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
 559         size_t ring_size;
 560         void *ptr;
 561
 562         ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
 563         ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
 564         if (!ptr)
 565                 return -ENOMEM;
 566
 567         bl->buf_ring = ptr;
 568         bl->is_mapped = 1;
 569         bl->is_mmap = 1;
 570         return 0;
 571 }
 572
 573 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 574 {
 575         struct io_uring_buf_reg reg;
 576         struct io_buffer_list *bl, *free_bl = NULL;
 577         int ret;
 578
 579         lockdep_assert_held(&ctx->uring_lock);
 580
 581         if (copy_from_user(&reg, arg, sizeof(reg)))
 582                 return -EFAULT;
 583
 584         if (reg.resv[0] || reg.resv[1] || reg.resv[2])
 585                 return -EINVAL;
 586         if (reg.flags & ~IOU_PBUF_RING_MMAP)
 587                 return -EINVAL;
 588         if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
 589                 if (!reg.ring_addr)
 590                         return -EFAULT;
 591                 if (reg.ring_addr & ~PAGE_MASK)
 592                         return -EINVAL;
 593         } else {
 594                 if (reg.ring_addr)
 595                         return -EINVAL;
 596         }
 597
 598         if (!is_power_of_2(reg.ring_entries))
 599                 return -EINVAL;
 600
 601         /* cannot disambiguate full vs empty due to head/tail size */
 602         if (reg.ring_entries >= 65536)
 603                 return -EINVAL;
 604
 605         if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
 606                 int ret = io_init_bl_list(ctx);
 607                 if (ret)
 608                         return ret;
 609         }
 610
 611         bl = io_buffer_get_list(ctx, reg.bgid);
 612         if (bl) {
 613                 /* if mapped buffer ring OR classic exists, don't allow */
 614                 if (bl->is_mapped || !list_empty(&bl->buf_list))
 615                         return -EEXIST;
 616         } else {
 617                 free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
 618                 if (!bl)
 619                         return -ENOMEM;
 620         }
 621
 622         if (!(reg.flags & IOU_PBUF_RING_MMAP))
 623                 ret = io_pin_pbuf_ring(&reg, bl);
 624         else
 625                 ret = io_alloc_pbuf_ring(&reg, bl);
 626
 627         if (!ret) {
 628                 bl->nr_entries = reg.ring_entries;
 629                 bl->mask = reg.ring_entries - 1;
 630
 631                 io_buffer_add_list(ctx, bl, reg.bgid);
 632                 return 0;
 633         }
 634
 635         kfree_rcu(free_bl, rcu);
 636         return ret;
 637 }
 638
 639 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 640 {
 641         struct io_uring_buf_reg reg;
 642         struct io_buffer_list *bl;
 643
 644         lockdep_assert_held(&ctx->uring_lock);
 645
 646         if (copy_from_user(&reg, arg, sizeof(reg)))
 647                 return -EFAULT;
 648         if (reg.resv[0] || reg.resv[1] || reg.resv[2])
 649                 return -EINVAL;
 650         if (reg.flags)
 651                 return -EINVAL;
 652
 653         bl = io_buffer_get_list(ctx, reg.bgid);
 654         if (!bl)
 655                 return -ENOENT;
 656         if (!bl->is_mapped)
 657                 return -EINVAL;
 658
 659         __io_remove_buffers(ctx, bl, -1U);
 660         if (bl->bgid >= BGID_ARRAY) {
 661                 xa_erase(&ctx->io_bl_xa, bl->bgid);
 662                 kfree_rcu(bl, rcu);
 663         }
 664         return 0;
 665 }
 666
 667 void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
 668 {
 669         struct io_buffer_list *bl;
 670
 671         bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
 672
 673         /*
 674          * Ensure the list is fully setup. Only strictly needed for RCU lookup
 675          * via mmap, and in that case only for the array indexed groups. For
 676          * the xarray lookups, it's either visible and ready, or not at all.
 677          */
 678         if (!smp_load_acquire(&bl->is_ready))
 679                 return NULL;
 680         if (!bl || !bl->is_mmap)
 681                 return NULL;
 682
 683         return bl->buf_ring;
 684 }