block: change request end_io handler to pass back a return value
[platform/kernel/linux-starfive.git] / drivers / nvme / host / ioctl.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2011-2014, Intel Corporation.
4  * Copyright (c) 2017-2021 Christoph Hellwig.
5  */
6 #include <linux/ptrace.h>       /* for force_successful_syscall_return */
7 #include <linux/nvme_ioctl.h>
8 #include <linux/io_uring.h>
9 #include "nvme.h"
10
11 /*
12  * Convert integer values from ioctl structures to user pointers, silently
13  * ignoring the upper bits in the compat case to match behaviour of 32-bit
14  * kernels.
15  */
16 static void __user *nvme_to_user_ptr(uintptr_t ptrval)
17 {
18         if (in_compat_syscall())
19                 ptrval = (compat_uptr_t)ptrval;
20         return (void __user *)ptrval;
21 }
22
23 static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
24                 unsigned len, u32 seed, bool write)
25 {
26         struct bio_integrity_payload *bip;
27         int ret = -ENOMEM;
28         void *buf;
29
30         buf = kmalloc(len, GFP_KERNEL);
31         if (!buf)
32                 goto out;
33
34         ret = -EFAULT;
35         if (write && copy_from_user(buf, ubuf, len))
36                 goto out_free_meta;
37
38         bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
39         if (IS_ERR(bip)) {
40                 ret = PTR_ERR(bip);
41                 goto out_free_meta;
42         }
43
44         bip->bip_iter.bi_size = len;
45         bip->bip_iter.bi_sector = seed;
46         ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
47                         offset_in_page(buf));
48         if (ret == len)
49                 return buf;
50         ret = -ENOMEM;
51 out_free_meta:
52         kfree(buf);
53 out:
54         return ERR_PTR(ret);
55 }
56
57 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
58                 void *meta, unsigned len, int ret)
59 {
60         if (!ret && req_op(req) == REQ_OP_DRV_IN &&
61             copy_to_user(ubuf, meta, len))
62                 ret = -EFAULT;
63         kfree(meta);
64         return ret;
65 }
66
67 static struct request *nvme_alloc_user_request(struct request_queue *q,
68                 struct nvme_command *cmd, void __user *ubuffer,
69                 unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
70                 u32 meta_seed, void **metap, unsigned timeout, bool vec,
71                 blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags)
72 {
73         bool write = nvme_is_write(cmd);
74         struct nvme_ns *ns = q->queuedata;
75         struct block_device *bdev = ns ? ns->disk->part0 : NULL;
76         struct request *req;
77         struct bio *bio = NULL;
78         void *meta = NULL;
79         int ret;
80
81         req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
82         if (IS_ERR(req))
83                 return req;
84         nvme_init_request(req, cmd);
85
86         if (timeout)
87                 req->timeout = timeout;
88         nvme_req(req)->flags |= NVME_REQ_USERCMD;
89
90         if (ubuffer && bufflen) {
91                 if (!vec)
92                         ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
93                                 GFP_KERNEL);
94                 else {
95                         struct iovec fast_iov[UIO_FASTIOV];
96                         struct iovec *iov = fast_iov;
97                         struct iov_iter iter;
98
99                         ret = import_iovec(rq_data_dir(req), ubuffer, bufflen,
100                                         UIO_FASTIOV, &iov, &iter);
101                         if (ret < 0)
102                                 goto out;
103                         ret = blk_rq_map_user_iov(q, req, NULL, &iter,
104                                         GFP_KERNEL);
105                         kfree(iov);
106                 }
107                 if (ret)
108                         goto out;
109                 bio = req->bio;
110                 if (bdev)
111                         bio_set_dev(bio, bdev);
112                 if (bdev && meta_buffer && meta_len) {
113                         meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
114                                         meta_seed, write);
115                         if (IS_ERR(meta)) {
116                                 ret = PTR_ERR(meta);
117                                 goto out_unmap;
118                         }
119                         req->cmd_flags |= REQ_INTEGRITY;
120                         *metap = meta;
121                 }
122         }
123
124         return req;
125
126 out_unmap:
127         if (bio)
128                 blk_rq_unmap_user(bio);
129 out:
130         blk_mq_free_request(req);
131         return ERR_PTR(ret);
132 }
133
134 static int nvme_submit_user_cmd(struct request_queue *q,
135                 struct nvme_command *cmd, void __user *ubuffer,
136                 unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
137                 u32 meta_seed, u64 *result, unsigned timeout, bool vec)
138 {
139         struct nvme_ctrl *ctrl;
140         struct request *req;
141         void *meta = NULL;
142         struct bio *bio;
143         u32 effects;
144         int ret;
145
146         req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
147                         meta_len, meta_seed, &meta, timeout, vec, 0, 0);
148         if (IS_ERR(req))
149                 return PTR_ERR(req);
150
151         bio = req->bio;
152         ctrl = nvme_req(req)->ctrl;
153
154         ret = nvme_execute_passthru_rq(req, &effects);
155
156         if (result)
157                 *result = le64_to_cpu(nvme_req(req)->result.u64);
158         if (meta)
159                 ret = nvme_finish_user_metadata(req, meta_buffer, meta,
160                                                 meta_len, ret);
161         if (bio)
162                 blk_rq_unmap_user(bio);
163         blk_mq_free_request(req);
164
165         if (effects)
166                 nvme_passthru_end(ctrl, effects, cmd, ret);
167
168         return ret;
169 }
170
171 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
172 {
173         struct nvme_user_io io;
174         struct nvme_command c;
175         unsigned length, meta_len;
176         void __user *metadata;
177
178         if (copy_from_user(&io, uio, sizeof(io)))
179                 return -EFAULT;
180         if (io.flags)
181                 return -EINVAL;
182
183         switch (io.opcode) {
184         case nvme_cmd_write:
185         case nvme_cmd_read:
186         case nvme_cmd_compare:
187                 break;
188         default:
189                 return -EINVAL;
190         }
191
192         length = (io.nblocks + 1) << ns->lba_shift;
193
194         if ((io.control & NVME_RW_PRINFO_PRACT) &&
195             ns->ms == sizeof(struct t10_pi_tuple)) {
196                 /*
197                  * Protection information is stripped/inserted by the
198                  * controller.
199                  */
200                 if (nvme_to_user_ptr(io.metadata))
201                         return -EINVAL;
202                 meta_len = 0;
203                 metadata = NULL;
204         } else {
205                 meta_len = (io.nblocks + 1) * ns->ms;
206                 metadata = nvme_to_user_ptr(io.metadata);
207         }
208
209         if (ns->features & NVME_NS_EXT_LBAS) {
210                 length += meta_len;
211                 meta_len = 0;
212         } else if (meta_len) {
213                 if ((io.metadata & 3) || !io.metadata)
214                         return -EINVAL;
215         }
216
217         memset(&c, 0, sizeof(c));
218         c.rw.opcode = io.opcode;
219         c.rw.flags = io.flags;
220         c.rw.nsid = cpu_to_le32(ns->head->ns_id);
221         c.rw.slba = cpu_to_le64(io.slba);
222         c.rw.length = cpu_to_le16(io.nblocks);
223         c.rw.control = cpu_to_le16(io.control);
224         c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
225         c.rw.reftag = cpu_to_le32(io.reftag);
226         c.rw.apptag = cpu_to_le16(io.apptag);
227         c.rw.appmask = cpu_to_le16(io.appmask);
228
229         return nvme_submit_user_cmd(ns->queue, &c,
230                         nvme_to_user_ptr(io.addr), length,
231                         metadata, meta_len, lower_32_bits(io.slba), NULL, 0,
232                         false);
233 }
234
235 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
236                                         struct nvme_ns *ns, __u32 nsid)
237 {
238         if (ns && nsid != ns->head->ns_id) {
239                 dev_err(ctrl->device,
240                         "%s: nsid (%u) in cmd does not match nsid (%u)"
241                         "of namespace\n",
242                         current->comm, nsid, ns->head->ns_id);
243                 return false;
244         }
245
246         return true;
247 }
248
249 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
250                         struct nvme_passthru_cmd __user *ucmd)
251 {
252         struct nvme_passthru_cmd cmd;
253         struct nvme_command c;
254         unsigned timeout = 0;
255         u64 result;
256         int status;
257
258         if (!capable(CAP_SYS_ADMIN))
259                 return -EACCES;
260         if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
261                 return -EFAULT;
262         if (cmd.flags)
263                 return -EINVAL;
264         if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
265                 return -EINVAL;
266
267         memset(&c, 0, sizeof(c));
268         c.common.opcode = cmd.opcode;
269         c.common.flags = cmd.flags;
270         c.common.nsid = cpu_to_le32(cmd.nsid);
271         c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
272         c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
273         c.common.cdw10 = cpu_to_le32(cmd.cdw10);
274         c.common.cdw11 = cpu_to_le32(cmd.cdw11);
275         c.common.cdw12 = cpu_to_le32(cmd.cdw12);
276         c.common.cdw13 = cpu_to_le32(cmd.cdw13);
277         c.common.cdw14 = cpu_to_le32(cmd.cdw14);
278         c.common.cdw15 = cpu_to_le32(cmd.cdw15);
279
280         if (cmd.timeout_ms)
281                 timeout = msecs_to_jiffies(cmd.timeout_ms);
282
283         status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
284                         nvme_to_user_ptr(cmd.addr), cmd.data_len,
285                         nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
286                         0, &result, timeout, false);
287
288         if (status >= 0) {
289                 if (put_user(result, &ucmd->result))
290                         return -EFAULT;
291         }
292
293         return status;
294 }
295
296 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
297                         struct nvme_passthru_cmd64 __user *ucmd, bool vec)
298 {
299         struct nvme_passthru_cmd64 cmd;
300         struct nvme_command c;
301         unsigned timeout = 0;
302         int status;
303
304         if (!capable(CAP_SYS_ADMIN))
305                 return -EACCES;
306         if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
307                 return -EFAULT;
308         if (cmd.flags)
309                 return -EINVAL;
310         if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
311                 return -EINVAL;
312
313         memset(&c, 0, sizeof(c));
314         c.common.opcode = cmd.opcode;
315         c.common.flags = cmd.flags;
316         c.common.nsid = cpu_to_le32(cmd.nsid);
317         c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
318         c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
319         c.common.cdw10 = cpu_to_le32(cmd.cdw10);
320         c.common.cdw11 = cpu_to_le32(cmd.cdw11);
321         c.common.cdw12 = cpu_to_le32(cmd.cdw12);
322         c.common.cdw13 = cpu_to_le32(cmd.cdw13);
323         c.common.cdw14 = cpu_to_le32(cmd.cdw14);
324         c.common.cdw15 = cpu_to_le32(cmd.cdw15);
325
326         if (cmd.timeout_ms)
327                 timeout = msecs_to_jiffies(cmd.timeout_ms);
328
329         status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
330                         nvme_to_user_ptr(cmd.addr), cmd.data_len,
331                         nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
332                         0, &cmd.result, timeout, vec);
333
334         if (status >= 0) {
335                 if (put_user(cmd.result, &ucmd->result))
336                         return -EFAULT;
337         }
338
339         return status;
340 }
341
342 struct nvme_uring_data {
343         __u64   metadata;
344         __u64   addr;
345         __u32   data_len;
346         __u32   metadata_len;
347         __u32   timeout_ms;
348 };
349
350 /*
351  * This overlays struct io_uring_cmd pdu.
352  * Expect build errors if this grows larger than that.
353  */
354 struct nvme_uring_cmd_pdu {
355         union {
356                 struct bio *bio;
357                 struct request *req;
358         };
359         void *meta; /* kernel-resident buffer */
360         void __user *meta_buffer;
361         u32 meta_len;
362 };
363
364 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
365                 struct io_uring_cmd *ioucmd)
366 {
367         return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
368 }
369
370 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
371 {
372         struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
373         struct request *req = pdu->req;
374         struct bio *bio = req->bio;
375         int status;
376         u64 result;
377
378         if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
379                 status = -EINTR;
380         else
381                 status = nvme_req(req)->status;
382
383         result = le64_to_cpu(nvme_req(req)->result.u64);
384
385         if (pdu->meta)
386                 status = nvme_finish_user_metadata(req, pdu->meta_buffer,
387                                         pdu->meta, pdu->meta_len, status);
388         if (bio)
389                 blk_rq_unmap_user(bio);
390         blk_mq_free_request(req);
391
392         io_uring_cmd_done(ioucmd, status, result);
393 }
394
395 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
396                                                 blk_status_t err)
397 {
398         struct io_uring_cmd *ioucmd = req->end_io_data;
399         struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
400         /* extract bio before reusing the same field for request */
401         struct bio *bio = pdu->bio;
402         void *cookie = READ_ONCE(ioucmd->cookie);
403
404         pdu->req = req;
405         req->bio = bio;
406
407         /*
408          * For iopoll, complete it directly.
409          * Otherwise, move the completion to task work.
410          */
411         if (cookie != NULL && blk_rq_is_poll(req))
412                 nvme_uring_task_cb(ioucmd);
413         else
414                 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
415
416         return RQ_END_IO_NONE;
417 }
418
419 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
420                 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
421 {
422         struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
423         const struct nvme_uring_cmd *cmd = ioucmd->cmd;
424         struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
425         struct nvme_uring_data d;
426         struct nvme_command c;
427         struct request *req;
428         blk_opf_t rq_flags = 0;
429         blk_mq_req_flags_t blk_flags = 0;
430         void *meta = NULL;
431
432         if (!capable(CAP_SYS_ADMIN))
433                 return -EACCES;
434
435         c.common.opcode = READ_ONCE(cmd->opcode);
436         c.common.flags = READ_ONCE(cmd->flags);
437         if (c.common.flags)
438                 return -EINVAL;
439
440         c.common.command_id = 0;
441         c.common.nsid = cpu_to_le32(cmd->nsid);
442         if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
443                 return -EINVAL;
444
445         c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
446         c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
447         c.common.metadata = 0;
448         c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
449         c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
450         c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
451         c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
452         c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
453         c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
454         c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
455
456         d.metadata = READ_ONCE(cmd->metadata);
457         d.addr = READ_ONCE(cmd->addr);
458         d.data_len = READ_ONCE(cmd->data_len);
459         d.metadata_len = READ_ONCE(cmd->metadata_len);
460         d.timeout_ms = READ_ONCE(cmd->timeout_ms);
461
462         if (issue_flags & IO_URING_F_NONBLOCK) {
463                 rq_flags = REQ_NOWAIT;
464                 blk_flags = BLK_MQ_REQ_NOWAIT;
465         }
466         if (issue_flags & IO_URING_F_IOPOLL)
467                 rq_flags |= REQ_POLLED;
468
469 retry:
470         req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
471                         d.data_len, nvme_to_user_ptr(d.metadata),
472                         d.metadata_len, 0, &meta, d.timeout_ms ?
473                         msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
474                         blk_flags);
475         if (IS_ERR(req))
476                 return PTR_ERR(req);
477         req->end_io = nvme_uring_cmd_end_io;
478         req->end_io_data = ioucmd;
479
480         if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
481                 if (unlikely(!req->bio)) {
482                         /* we can't poll this, so alloc regular req instead */
483                         blk_mq_free_request(req);
484                         rq_flags &= ~REQ_POLLED;
485                         goto retry;
486                 } else {
487                         WRITE_ONCE(ioucmd->cookie, req->bio);
488                         req->bio->bi_opf |= REQ_POLLED;
489                 }
490         }
491         /* to free bio on completion, as req->bio will be null at that time */
492         pdu->bio = req->bio;
493         pdu->meta = meta;
494         pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
495         pdu->meta_len = d.metadata_len;
496
497         blk_execute_rq_nowait(req, false);
498         return -EIOCBQUEUED;
499 }
500
501 static bool is_ctrl_ioctl(unsigned int cmd)
502 {
503         if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
504                 return true;
505         if (is_sed_ioctl(cmd))
506                 return true;
507         return false;
508 }
509
510 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
511                 void __user *argp)
512 {
513         switch (cmd) {
514         case NVME_IOCTL_ADMIN_CMD:
515                 return nvme_user_cmd(ctrl, NULL, argp);
516         case NVME_IOCTL_ADMIN64_CMD:
517                 return nvme_user_cmd64(ctrl, NULL, argp, false);
518         default:
519                 return sed_ioctl(ctrl->opal_dev, cmd, argp);
520         }
521 }
522
523 #ifdef COMPAT_FOR_U64_ALIGNMENT
524 struct nvme_user_io32 {
525         __u8    opcode;
526         __u8    flags;
527         __u16   control;
528         __u16   nblocks;
529         __u16   rsvd;
530         __u64   metadata;
531         __u64   addr;
532         __u64   slba;
533         __u32   dsmgmt;
534         __u32   reftag;
535         __u16   apptag;
536         __u16   appmask;
537 } __attribute__((__packed__));
538 #define NVME_IOCTL_SUBMIT_IO32  _IOW('N', 0x42, struct nvme_user_io32)
539 #endif /* COMPAT_FOR_U64_ALIGNMENT */
540
541 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
542                 void __user *argp)
543 {
544         switch (cmd) {
545         case NVME_IOCTL_ID:
546                 force_successful_syscall_return();
547                 return ns->head->ns_id;
548         case NVME_IOCTL_IO_CMD:
549                 return nvme_user_cmd(ns->ctrl, ns, argp);
550         /*
551          * struct nvme_user_io can have different padding on some 32-bit ABIs.
552          * Just accept the compat version as all fields that are used are the
553          * same size and at the same offset.
554          */
555 #ifdef COMPAT_FOR_U64_ALIGNMENT
556         case NVME_IOCTL_SUBMIT_IO32:
557 #endif
558         case NVME_IOCTL_SUBMIT_IO:
559                 return nvme_submit_io(ns, argp);
560         case NVME_IOCTL_IO64_CMD:
561                 return nvme_user_cmd64(ns->ctrl, ns, argp, false);
562         case NVME_IOCTL_IO64_CMD_VEC:
563                 return nvme_user_cmd64(ns->ctrl, ns, argp, true);
564         default:
565                 return -ENOTTY;
566         }
567 }
568
569 static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
570 {
571        if (is_ctrl_ioctl(cmd))
572                return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
573        return nvme_ns_ioctl(ns, cmd, arg);
574 }
575
576 int nvme_ioctl(struct block_device *bdev, fmode_t mode,
577                 unsigned int cmd, unsigned long arg)
578 {
579         struct nvme_ns *ns = bdev->bd_disk->private_data;
580
581         return __nvme_ioctl(ns, cmd, (void __user *)arg);
582 }
583
584 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
585 {
586         struct nvme_ns *ns =
587                 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
588
589         return __nvme_ioctl(ns, cmd, (void __user *)arg);
590 }
591
592 static int nvme_uring_cmd_checks(unsigned int issue_flags)
593 {
594
595         /* NVMe passthrough requires big SQE/CQE support */
596         if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
597             (IO_URING_F_SQE128|IO_URING_F_CQE32))
598                 return -EOPNOTSUPP;
599         return 0;
600 }
601
602 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
603                              unsigned int issue_flags)
604 {
605         struct nvme_ctrl *ctrl = ns->ctrl;
606         int ret;
607
608         BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
609
610         ret = nvme_uring_cmd_checks(issue_flags);
611         if (ret)
612                 return ret;
613
614         switch (ioucmd->cmd_op) {
615         case NVME_URING_CMD_IO:
616                 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
617                 break;
618         case NVME_URING_CMD_IO_VEC:
619                 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
620                 break;
621         default:
622                 ret = -ENOTTY;
623         }
624
625         return ret;
626 }
627
628 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
629 {
630         struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
631                         struct nvme_ns, cdev);
632
633         return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
634 }
635
636 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
637                                  struct io_comp_batch *iob,
638                                  unsigned int poll_flags)
639 {
640         struct bio *bio;
641         int ret = 0;
642         struct nvme_ns *ns;
643         struct request_queue *q;
644
645         rcu_read_lock();
646         bio = READ_ONCE(ioucmd->cookie);
647         ns = container_of(file_inode(ioucmd->file)->i_cdev,
648                         struct nvme_ns, cdev);
649         q = ns->queue;
650         if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
651                 ret = bio_poll(bio, iob, poll_flags);
652         rcu_read_unlock();
653         return ret;
654 }
655 #ifdef CONFIG_NVME_MULTIPATH
656 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
657                 void __user *argp, struct nvme_ns_head *head, int srcu_idx)
658         __releases(&head->srcu)
659 {
660         struct nvme_ctrl *ctrl = ns->ctrl;
661         int ret;
662
663         nvme_get_ctrl(ns->ctrl);
664         srcu_read_unlock(&head->srcu, srcu_idx);
665         ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
666
667         nvme_put_ctrl(ctrl);
668         return ret;
669 }
670
671 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
672                 unsigned int cmd, unsigned long arg)
673 {
674         struct nvme_ns_head *head = bdev->bd_disk->private_data;
675         void __user *argp = (void __user *)arg;
676         struct nvme_ns *ns;
677         int srcu_idx, ret = -EWOULDBLOCK;
678
679         srcu_idx = srcu_read_lock(&head->srcu);
680         ns = nvme_find_path(head);
681         if (!ns)
682                 goto out_unlock;
683
684         /*
685          * Handle ioctls that apply to the controller instead of the namespace
686          * seperately and drop the ns SRCU reference early.  This avoids a
687          * deadlock when deleting namespaces using the passthrough interface.
688          */
689         if (is_ctrl_ioctl(cmd))
690                 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
691
692         ret = nvme_ns_ioctl(ns, cmd, argp);
693 out_unlock:
694         srcu_read_unlock(&head->srcu, srcu_idx);
695         return ret;
696 }
697
698 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
699                 unsigned long arg)
700 {
701         struct cdev *cdev = file_inode(file)->i_cdev;
702         struct nvme_ns_head *head =
703                 container_of(cdev, struct nvme_ns_head, cdev);
704         void __user *argp = (void __user *)arg;
705         struct nvme_ns *ns;
706         int srcu_idx, ret = -EWOULDBLOCK;
707
708         srcu_idx = srcu_read_lock(&head->srcu);
709         ns = nvme_find_path(head);
710         if (!ns)
711                 goto out_unlock;
712
713         if (is_ctrl_ioctl(cmd))
714                 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
715
716         ret = nvme_ns_ioctl(ns, cmd, argp);
717 out_unlock:
718         srcu_read_unlock(&head->srcu, srcu_idx);
719         return ret;
720 }
721
722 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
723                 unsigned int issue_flags)
724 {
725         struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
726         struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
727         int srcu_idx = srcu_read_lock(&head->srcu);
728         struct nvme_ns *ns = nvme_find_path(head);
729         int ret = -EINVAL;
730
731         if (ns)
732                 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
733         srcu_read_unlock(&head->srcu, srcu_idx);
734         return ret;
735 }
736
737 int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
738                                       struct io_comp_batch *iob,
739                                       unsigned int poll_flags)
740 {
741         struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
742         struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
743         int srcu_idx = srcu_read_lock(&head->srcu);
744         struct nvme_ns *ns = nvme_find_path(head);
745         struct bio *bio;
746         int ret = 0;
747         struct request_queue *q;
748
749         if (ns) {
750                 rcu_read_lock();
751                 bio = READ_ONCE(ioucmd->cookie);
752                 q = ns->queue;
753                 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio
754                                 && bio->bi_bdev)
755                         ret = bio_poll(bio, iob, poll_flags);
756                 rcu_read_unlock();
757         }
758         srcu_read_unlock(&head->srcu, srcu_idx);
759         return ret;
760 }
761 #endif /* CONFIG_NVME_MULTIPATH */
762
763 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
764 {
765         struct nvme_ctrl *ctrl = ioucmd->file->private_data;
766         int ret;
767
768         /* IOPOLL not supported yet */
769         if (issue_flags & IO_URING_F_IOPOLL)
770                 return -EOPNOTSUPP;
771
772         ret = nvme_uring_cmd_checks(issue_flags);
773         if (ret)
774                 return ret;
775
776         switch (ioucmd->cmd_op) {
777         case NVME_URING_CMD_ADMIN:
778                 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
779                 break;
780         case NVME_URING_CMD_ADMIN_VEC:
781                 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
782                 break;
783         default:
784                 ret = -ENOTTY;
785         }
786
787         return ret;
788 }
789
790 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
791 {
792         struct nvme_ns *ns;
793         int ret;
794
795         down_read(&ctrl->namespaces_rwsem);
796         if (list_empty(&ctrl->namespaces)) {
797                 ret = -ENOTTY;
798                 goto out_unlock;
799         }
800
801         ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
802         if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
803                 dev_warn(ctrl->device,
804                         "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
805                 ret = -EINVAL;
806                 goto out_unlock;
807         }
808
809         dev_warn(ctrl->device,
810                 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
811         kref_get(&ns->kref);
812         up_read(&ctrl->namespaces_rwsem);
813
814         ret = nvme_user_cmd(ctrl, ns, argp);
815         nvme_put_ns(ns);
816         return ret;
817
818 out_unlock:
819         up_read(&ctrl->namespaces_rwsem);
820         return ret;
821 }
822
823 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
824                 unsigned long arg)
825 {
826         struct nvme_ctrl *ctrl = file->private_data;
827         void __user *argp = (void __user *)arg;
828
829         switch (cmd) {
830         case NVME_IOCTL_ADMIN_CMD:
831                 return nvme_user_cmd(ctrl, NULL, argp);
832         case NVME_IOCTL_ADMIN64_CMD:
833                 return nvme_user_cmd64(ctrl, NULL, argp, false);
834         case NVME_IOCTL_IO_CMD:
835                 return nvme_dev_user_cmd(ctrl, argp);
836         case NVME_IOCTL_RESET:
837                 if (!capable(CAP_SYS_ADMIN))
838                         return -EACCES;
839                 dev_warn(ctrl->device, "resetting controller\n");
840                 return nvme_reset_ctrl_sync(ctrl);
841         case NVME_IOCTL_SUBSYS_RESET:
842                 if (!capable(CAP_SYS_ADMIN))
843                         return -EACCES;
844                 return nvme_reset_subsystem(ctrl);
845         case NVME_IOCTL_RESCAN:
846                 if (!capable(CAP_SYS_ADMIN))
847                         return -EACCES;
848                 nvme_queue_scan(ctrl);
849                 return 0;
850         default:
851                 return -ENOTTY;
852         }
853 }