From 855b7717f44b13e0990aa5ad36bbf9aa35051516 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Mon, 31 Oct 2022 21:53:50 +0530 Subject: [PATCH] nvme: fine-granular CAP_SYS_ADMIN for nvme io commands Currently both io and admin commands are kept under a coarse-granular CAP_SYS_ADMIN check, disregarding file mode completely. $ ls -l /dev/ng* crw-rw-rw- 1 root root 242, 0 Sep 9 19:20 /dev/ng0n1 crw------- 1 root root 242, 1 Sep 9 19:20 /dev/ng0n2 In the example above, ng0n1 appears as if it may allow unprivileged read/write operation but it does not and behaves same as ng0n2. This patch implements a shift from CAP_SYS_ADMIN to more fine-granular control for io-commands. If CAP_SYS_ADMIN is present, nothing else is checked as before. Otherwise, following rules are in place - any admin-cmd is not allowed - vendor-specific and fabric commmand are not allowed - io-commands that can write are allowed if matching FMODE_WRITE permission is present - io-commands that read are allowed Add a helper nvme_cmd_allowed that implements above policy. Change all the callers of CAP_SYS_ADMIN to go through nvme_cmd_allowed for any decision making. Since file open mode is counted for any approval/denial, change at various places to keep file-mode information handy. Signed-off-by: Kanchan Joshi Reviewed-by: Jens Axboe Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/ioctl.c | 102 +++++++++++++++++++++++++++++++--------------- include/linux/nvme.h | 1 + 2 files changed, 70 insertions(+), 33 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 81f5550..1d68f16 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -8,6 +8,34 @@ #include #include "nvme.h" +static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, + fmode_t mode) +{ + if (capable(CAP_SYS_ADMIN)) + return true; + + /* + * Do not allow unprivileged processes to send vendor specific or fabrics + * commands as we can't be sure about their effects. + */ + if (c->common.opcode >= nvme_cmd_vendor_start || + c->common.opcode == nvme_fabrics_command) + return false; + + /* do not allow unprivileged admin commands */ + if (!ns) + return false; + + /* + * Only allow I/O commands that transfer data to the controller if the + * special file is open for writing, but always allow I/O commands that + * transfer data from the controller. + */ + if (nvme_is_write(c)) + return mode & FMODE_WRITE; + return true; +} + /* * Convert integer values from ioctl structures to user pointers, silently * ignoring the upper bits in the compat case to match behaviour of 32-bit @@ -261,7 +289,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd) + struct nvme_passthru_cmd __user *ucmd, fmode_t mode) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -269,8 +297,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u64 result; int status; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -291,6 +317,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); + if (!nvme_cmd_allowed(ns, &c, mode)) + return -EACCES; + if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); @@ -308,15 +337,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd, bool vec) + struct nvme_passthru_cmd64 __user *ucmd, bool vec, + fmode_t mode) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; unsigned timeout = 0; int status; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -337,6 +365,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); + if (!nvme_cmd_allowed(ns, &c, mode)) + return -EACCES; + if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); @@ -483,9 +514,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, void *meta = NULL; int ret; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - c.common.opcode = READ_ONCE(cmd->opcode); c.common.flags = READ_ONCE(cmd->flags); if (c.common.flags) @@ -507,6 +535,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); + if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) + return -EACCES; + d.metadata = READ_ONCE(cmd->metadata); d.addr = READ_ONCE(cmd->addr); d.data_len = READ_ONCE(cmd->data_len); @@ -570,13 +601,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) } static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, - void __user *argp) + void __user *argp, fmode_t mode) { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); + return nvme_user_cmd(ctrl, NULL, argp, mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false); + return nvme_user_cmd64(ctrl, NULL, argp, false, mode); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -601,14 +632,14 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) + void __user *argp, fmode_t mode) { switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp); + return nvme_user_cmd(ns->ctrl, ns, argp, mode); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -620,19 +651,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, false); + return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); case NVME_IOCTL_IO64_CMD_VEC: - return nvme_user_cmd64(ns->ctrl, ns, argp, true); + return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); default: return -ENOTTY; } } -static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) +static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, + fmode_t mode) { - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); - return nvme_ns_ioctl(ns, cmd, arg); + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); + return nvme_ns_ioctl(ns, cmd, arg, mode); } int nvme_ioctl(struct block_device *bdev, fmode_t mode, @@ -640,7 +672,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, { struct nvme_ns *ns = bdev->bd_disk->private_data; - return __nvme_ioctl(ns, cmd, (void __user *)arg); + return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -648,7 +680,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); - return __nvme_ioctl(ns, cmd, (void __user *)arg); + return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); } static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -716,7 +748,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, } #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, struct nvme_ns_head *head, int srcu_idx) + void __user *argp, struct nvme_ns_head *head, int srcu_idx, + fmode_t mode) __releases(&head->srcu) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -724,7 +757,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, nvme_get_ctrl(ns->ctrl); srcu_read_unlock(&head->srcu, srcu_idx); - ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); nvme_put_ctrl(ctrl); return ret; @@ -749,9 +782,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, * deadlock when deleting namespaces using the passthrough interface. */ if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, + mode); - ret = nvme_ns_ioctl(ns, cmd, argp); + ret = nvme_ns_ioctl(ns, cmd, argp, mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -773,9 +807,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, goto out_unlock; if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, + file->f_mode); - ret = nvme_ns_ioctl(ns, cmd, argp); + ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -849,7 +884,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) return ret; } -static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) +static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, + fmode_t mode) { struct nvme_ns *ns; int ret; @@ -873,7 +909,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem); - ret = nvme_user_cmd(ctrl, ns, argp); + ret = nvme_user_cmd(ctrl, ns, argp, mode); nvme_put_ns(ns); return ret; @@ -890,11 +926,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); + return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false); + return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); case NVME_IOCTL_IO_CMD: - return nvme_dev_user_cmd(ctrl, argp); + return nvme_dev_user_cmd(ctrl, argp, file->f_mode); case NVME_IOCTL_RESET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 050d7d0..1d102b6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -797,6 +797,7 @@ enum nvme_opcode { nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_append = 0x7d, + nvme_cmd_vendor_start = 0x80, }; #define nvme_opcode_name(opcode) { opcode, #opcode } -- 2.7.4