+static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
+{
+ int i, j;
+
+ if (!ctx->user_bufs)
+ return -ENXIO;
+
+ for (i = 0; i < ctx->nr_user_bufs; i++) {
+ struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
+
+ for (j = 0; j < imu->nr_bvecs; j++)
+ put_page(imu->bvec[j].bv_page);
+
+ if (ctx->account_mem)
+ io_unaccount_mem(ctx->user, imu->nr_bvecs);
+ kfree(imu->bvec);
+ imu->nr_bvecs = 0;
+ }
+
+ kfree(ctx->user_bufs);
+ ctx->user_bufs = NULL;
+ ctx->nr_user_bufs = 0;
+ return 0;
+}
+
+static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
+ void __user *arg, unsigned index)
+{
+ struct iovec __user *src;
+
+#ifdef CONFIG_COMPAT
+ if (ctx->compat) {
+ struct compat_iovec __user *ciovs;
+ struct compat_iovec ciov;
+
+ ciovs = (struct compat_iovec __user *) arg;
+ if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
+ return -EFAULT;
+
+ dst->iov_base = (void __user *) (unsigned long) ciov.iov_base;
+ dst->iov_len = ciov.iov_len;
+ return 0;
+ }
+#endif
+ src = (struct iovec __user *) arg;
+ if (copy_from_user(dst, &src[index], sizeof(*dst)))
+ return -EFAULT;
+ return 0;
+}
+
+static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args)
+{
+ struct vm_area_struct **vmas = NULL;
+ struct page **pages = NULL;
+ int i, j, got_pages = 0;
+ int ret = -EINVAL;
+
+ if (ctx->user_bufs)
+ return -EBUSY;
+ if (!nr_args || nr_args > UIO_MAXIOV)
+ return -EINVAL;
+
+ ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
+ GFP_KERNEL);
+ if (!ctx->user_bufs)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_args; i++) {
+ struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
+ unsigned long off, start, end, ubuf;
+ int pret, nr_pages;
+ struct iovec iov;
+ size_t size;
+
+ ret = io_copy_iov(ctx, &iov, arg, i);
+ if (ret)
+ break;
+
+ /*
+ * Don't impose further limits on the size and buffer
+ * constraints here, we'll -EINVAL later when IO is
+ * submitted if they are wrong.
+ */
+ ret = -EFAULT;
+ if (!iov.iov_base || !iov.iov_len)
+ goto err;
+
+ /* arbitrary limit, but we need something */
+ if (iov.iov_len > SZ_1G)
+ goto err;
+
+ ubuf = (unsigned long) iov.iov_base;
+ end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = ubuf >> PAGE_SHIFT;
+ nr_pages = end - start;
+
+ if (ctx->account_mem) {
+ ret = io_account_mem(ctx->user, nr_pages);
+ if (ret)
+ goto err;
+ }
+
+ ret = 0;
+ if (!pages || nr_pages > got_pages) {
+ kfree(vmas);
+ kfree(pages);
+ pages = kmalloc_array(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ vmas = kmalloc_array(nr_pages,
+ sizeof(struct vm_area_struct *),
+ GFP_KERNEL);
+ if (!pages || !vmas) {
+ ret = -ENOMEM;
+ if (ctx->account_mem)
+ io_unaccount_mem(ctx->user, nr_pages);
+ goto err;
+ }
+ got_pages = nr_pages;
+ }
+
+ imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!imu->bvec) {
+ if (ctx->account_mem)
+ io_unaccount_mem(ctx->user, nr_pages);
+ goto err;
+ }
+
+ ret = 0;
+ down_read(¤t->mm->mmap_sem);
+ pret = get_user_pages_longterm(ubuf, nr_pages, FOLL_WRITE,
+ pages, vmas);
+ if (pret == nr_pages) {
+ /* don't support file backed memory */
+ for (j = 0; j < nr_pages; j++) {
+ struct vm_area_struct *vma = vmas[j];
+
+ if (vma->vm_file &&
+ !is_file_hugepages(vma->vm_file)) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ }
+ } else {
+ ret = pret < 0 ? pret : -EFAULT;
+ }
+ up_read(¤t->mm->mmap_sem);
+ if (ret) {
+ /*
+ * if we did partial map, or found file backed vmas,
+ * release any pages we did get
+ */
+ if (pret > 0) {
+ for (j = 0; j < pret; j++)
+ put_page(pages[j]);
+ }
+ if (ctx->account_mem)
+ io_unaccount_mem(ctx->user, nr_pages);
+ goto err;
+ }
+
+ off = ubuf & ~PAGE_MASK;
+ size = iov.iov_len;
+ for (j = 0; j < nr_pages; j++) {
+ size_t vec_len;
+
+ vec_len = min_t(size_t, size, PAGE_SIZE - off);
+ imu->bvec[j].bv_page = pages[j];
+ imu->bvec[j].bv_len = vec_len;
+ imu->bvec[j].bv_offset = off;
+ off = 0;
+ size -= vec_len;
+ }
+ /* store original address for later verification */
+ imu->ubuf = ubuf;
+ imu->len = iov.iov_len;
+ imu->nr_bvecs = nr_pages;
+
+ ctx->nr_user_bufs++;
+ }
+ kfree(pages);
+ kfree(vmas);
+ return 0;
+err:
+ kfree(pages);
+ kfree(vmas);
+ io_sqe_buffer_unregister(ctx);
+ return ret;
+}
+