vhost/vsock: add IOTLB API support
authorStefano Garzarella <sgarzare@redhat.com>
Wed, 23 Dec 2020 14:36:38 +0000 (15:36 +0100)
committerMichael S. Tsirkin <mst@redhat.com>
Sun, 27 Dec 2020 10:49:01 +0000 (05:49 -0500)
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.

These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
  device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
  VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
  metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
  chardev; they are used by the userspace to exchange IOTLB messages

This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
    $ qemu -M q35,accel=kvm,kernel-irqchip=split \
           -drive file=fedora.qcow2,format=qcow2,if=virtio \
           -device intel-iommu,intremap=on,device-iotlb=on \
           -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on

[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
drivers/vhost/vsock.c

index a483cec31d5cbd2f9d69f5caa869b28a98d3fd6c..5e78fb719602dc3888b93ec0f762b453caa4387e 100644 (file)
 #define VHOST_VSOCK_PKT_WEIGHT 256
 
 enum {
-       VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+       VHOST_VSOCK_FEATURES = VHOST_FEATURES |
+                              (1ULL << VIRTIO_F_ACCESS_PLATFORM)
+};
+
+enum {
+       VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
 };
 
 /* Used to track all the vhost_vsock instances on the system. */
@@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
        if (!vhost_vq_get_backend(vq))
                goto out;
 
+       if (!vq_meta_prefetch(vq))
+               goto out;
+
        /* Avoid further vmexits, we're already processing the virtqueue */
        vhost_disable_notify(&vsock->dev, vq);
 
@@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
        if (!vhost_vq_get_backend(vq))
                goto out;
 
+       if (!vq_meta_prefetch(vq))
+               goto out;
+
        vhost_disable_notify(&vsock->dev, vq);
        do {
                u32 len;
@@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
        mutex_lock(&vsock->dev.mutex);
        if ((features & (1 << VHOST_F_LOG_ALL)) &&
            !vhost_log_access_ok(&vsock->dev)) {
-               mutex_unlock(&vsock->dev.mutex);
-               return -EFAULT;
+               goto err;
+       }
+
+       if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
+               if (vhost_init_device_iotlb(&vsock->dev, true))
+                       goto err;
        }
 
        for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
@@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
        }
        mutex_unlock(&vsock->dev.mutex);
        return 0;
+
+err:
+       mutex_unlock(&vsock->dev.mutex);
+       return -EFAULT;
 }
 
 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
@@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
                if (copy_from_user(&features, argp, sizeof(features)))
                        return -EFAULT;
                return vhost_vsock_set_features(vsock, features);
+       case VHOST_GET_BACKEND_FEATURES:
+               features = VHOST_VSOCK_BACKEND_FEATURES;
+               if (copy_to_user(argp, &features, sizeof(features)))
+                       return -EFAULT;
+               return 0;
+       case VHOST_SET_BACKEND_FEATURES:
+               if (copy_from_user(&features, argp, sizeof(features)))
+                       return -EFAULT;
+               if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
+                       return -EOPNOTSUPP;
+               vhost_set_backend_features(&vsock->dev, features);
+               return 0;
        default:
                mutex_lock(&vsock->dev.mutex);
                r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
@@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
        }
 }
 
+static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct file *file = iocb->ki_filp;
+       struct vhost_vsock *vsock = file->private_data;
+       struct vhost_dev *dev = &vsock->dev;
+       int noblock = file->f_flags & O_NONBLOCK;
+
+       return vhost_chr_read_iter(dev, to, noblock);
+}
+
+static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
+                                       struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct vhost_vsock *vsock = file->private_data;
+       struct vhost_dev *dev = &vsock->dev;
+
+       return vhost_chr_write_iter(dev, from);
+}
+
+static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
+{
+       struct vhost_vsock *vsock = file->private_data;
+       struct vhost_dev *dev = &vsock->dev;
+
+       return vhost_chr_poll(file, dev, wait);
+}
+
 static const struct file_operations vhost_vsock_fops = {
        .owner          = THIS_MODULE,
        .open           = vhost_vsock_dev_open,
@@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = {
        .llseek         = noop_llseek,
        .unlocked_ioctl = vhost_vsock_dev_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .read_iter      = vhost_vsock_chr_read_iter,
+       .write_iter     = vhost_vsock_chr_write_iter,
+       .poll           = vhost_vsock_chr_poll,
 };
 
 static struct miscdevice vhost_vsock_misc = {