vfio-pci: Avoid recursive read-lock usage
authorAlex Williamson <alex.williamson@redhat.com>
Mon, 17 Aug 2020 17:08:18 +0000 (11:08 -0600)
committerAlex Williamson <alex.williamson@redhat.com>
Mon, 17 Aug 2020 17:08:18 +0000 (11:08 -0600)
A down_read on memory_lock is held when performing read/write accesses
to MMIO BAR space, including across the copy_to/from_user() callouts
which may fault.  If the user buffer for these copies resides in an
mmap of device MMIO space, the mmap fault handler will acquire a
recursive read-lock on memory_lock.  Avoid this by reducing the lock
granularity.  Sequential accesses requiring multiple ioread/iowrite
cycles are expected to be rare, therefore typical accesses should not
see additional overhead.

VGA MMIO accesses are expected to be non-fatal regardless of the PCI
memory enable bit to allow legacy probing, this behavior remains with
a comment added.  ioeventfds are now included in memory access testing,
with writes dropped while memory space is disabled.

Fixes: abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access on disabled memory")
Reported-by: Zhiyi Guo <zhguo@redhat.com>
Tested-by: Zhiyi Guo <zhguo@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/pci/vfio_pci_private.h
drivers/vfio/pci/vfio_pci_rdwr.c

index 86a02af..61ca8ab 100644 (file)
 
 struct vfio_pci_ioeventfd {
        struct list_head        next;
+       struct vfio_pci_device  *vdev;
        struct virqfd           *virqfd;
        void __iomem            *addr;
        uint64_t                data;
        loff_t                  pos;
        int                     bar;
        int                     count;
+       bool                    test_mem;
 };
 
 struct vfio_pci_irq_ctx {
index 916b184..9e353c4 100644 (file)
 #define vfio_ioread8   ioread8
 #define vfio_iowrite8  iowrite8
 
+#define VFIO_IOWRITE(size) \
+static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev,                \
+                       bool test_mem, u##size val, void __iomem *io)   \
+{                                                                      \
+       if (test_mem) {                                                 \
+               down_read(&vdev->memory_lock);                          \
+               if (!__vfio_pci_memory_enabled(vdev)) {                 \
+                       up_read(&vdev->memory_lock);                    \
+                       return -EIO;                                    \
+               }                                                       \
+       }                                                               \
+                                                                       \
+       vfio_iowrite##size(val, io);                                    \
+                                                                       \
+       if (test_mem)                                                   \
+               up_read(&vdev->memory_lock);                            \
+                                                                       \
+       return 0;                                                       \
+}
+
+VFIO_IOWRITE(8)
+VFIO_IOWRITE(16)
+VFIO_IOWRITE(32)
+#ifdef iowrite64
+VFIO_IOWRITE(64)
+#endif
+
+#define VFIO_IOREAD(size) \
+static int vfio_pci_ioread##size(struct vfio_pci_device *vdev,         \
+                       bool test_mem, u##size *val, void __iomem *io)  \
+{                                                                      \
+       if (test_mem) {                                                 \
+               down_read(&vdev->memory_lock);                          \
+               if (!__vfio_pci_memory_enabled(vdev)) {                 \
+                       up_read(&vdev->memory_lock);                    \
+                       return -EIO;                                    \
+               }                                                       \
+       }                                                               \
+                                                                       \
+       *val = vfio_ioread##size(io);                                   \
+                                                                       \
+       if (test_mem)                                                   \
+               up_read(&vdev->memory_lock);                            \
+                                                                       \
+       return 0;                                                       \
+}
+
+VFIO_IOREAD(8)
+VFIO_IOREAD(16)
+VFIO_IOREAD(32)
+
 /*
  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  * range which is inaccessible.  The excluded range drops writes and fills
  * reads with -1.  This is intended for handling MSI-X vector tables and
  * leftover space for ROM BARs.
  */
-static ssize_t do_io_rw(void __iomem *io, char __user *buf,
+static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,
+                       void __iomem *io, char __user *buf,
                        loff_t off, size_t count, size_t x_start,
                        size_t x_end, bool iswrite)
 {
        ssize_t done = 0;
+       int ret;
 
        while (count) {
                size_t fillable, filled;
@@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
                                if (copy_from_user(&val, buf, 4))
                                        return -EFAULT;
 
-                               vfio_iowrite32(val, io + off);
+                               ret = vfio_pci_iowrite32(vdev, test_mem,
+                                                        val, io + off);
+                               if (ret)
+                                       return ret;
                        } else {
-                               val = vfio_ioread32(io + off);
+                               ret = vfio_pci_ioread32(vdev, test_mem,
+                                                       &val, io + off);
+                               if (ret)
+                                       return ret;
 
                                if (copy_to_user(buf, &val, 4))
                                        return -EFAULT;
@@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
                                if (copy_from_user(&val, buf, 2))
                                        return -EFAULT;
 
-                               vfio_iowrite16(val, io + off);
+                               ret = vfio_pci_iowrite16(vdev, test_mem,
+                                                        val, io + off);
+                               if (ret)
+                                       return ret;
                        } else {
-                               val = vfio_ioread16(io + off);
+                               ret = vfio_pci_ioread16(vdev, test_mem,
+                                                       &val, io + off);
+                               if (ret)
+                                       return ret;
 
                                if (copy_to_user(buf, &val, 2))
                                        return -EFAULT;
@@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
                                if (copy_from_user(&val, buf, 1))
                                        return -EFAULT;
 
-                               vfio_iowrite8(val, io + off);
+                               ret = vfio_pci_iowrite8(vdev, test_mem,
+                                                       val, io + off);
+                               if (ret)
+                                       return ret;
                        } else {
-                               val = vfio_ioread8(io + off);
+                               ret = vfio_pci_ioread8(vdev, test_mem,
+                                                      &val, io + off);
+                               if (ret)
+                                       return ret;
 
                                if (copy_to_user(buf, &val, 1))
                                        return -EFAULT;
@@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
 
        count = min(count, (size_t)(end - pos));
 
-       if (res->flags & IORESOURCE_MEM) {
-               down_read(&vdev->memory_lock);
-               if (!__vfio_pci_memory_enabled(vdev)) {
-                       up_read(&vdev->memory_lock);
-                       return -EIO;
-               }
-       }
-
        if (bar == PCI_ROM_RESOURCE) {
                /*
                 * The ROM can fill less space than the BAR, so we start the
@@ -213,7 +276,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
                x_end = vdev->msix_offset + vdev->msix_size;
        }
 
-       done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
+       done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
+                       count, x_start, x_end, iswrite);
 
        if (done >= 0)
                *ppos += done;
@@ -221,9 +285,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
        if (bar == PCI_ROM_RESOURCE)
                pci_unmap_rom(pdev, io);
 out:
-       if (res->flags & IORESOURCE_MEM)
-               up_read(&vdev->memory_lock);
-
        return done;
 }
 
@@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
                return ret;
        }
 
-       done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
+       /*
+        * VGA MMIO is a legacy, non-BAR resource that hopefully allows
+        * probing, so we don't currently worry about access in relation
+        * to the memory enable bit in the command register.
+        */
+       done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
 
        vga_put(vdev->pdev, rsrc);
 
@@ -296,17 +362,21 @@ static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
 
        switch (ioeventfd->count) {
        case 1:
-               vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
+               vfio_pci_iowrite8(ioeventfd->vdev, ioeventfd->test_mem,
+                                 ioeventfd->data, ioeventfd->addr);
                break;
        case 2:
-               vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
+               vfio_pci_iowrite16(ioeventfd->vdev, ioeventfd->test_mem,
+                                  ioeventfd->data, ioeventfd->addr);
                break;
        case 4:
-               vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
+               vfio_pci_iowrite32(ioeventfd->vdev, ioeventfd->test_mem,
+                                  ioeventfd->data, ioeventfd->addr);
                break;
 #ifdef iowrite64
        case 8:
-               vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
+               vfio_pci_iowrite64(ioeventfd->vdev, ioeventfd->test_mem,
+                                  ioeventfd->data, ioeventfd->addr);
                break;
 #endif
        }
@@ -378,11 +448,13 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
                goto out_unlock;
        }
 
+       ioeventfd->vdev = vdev;
        ioeventfd->addr = vdev->barmap[bar] + pos;
        ioeventfd->data = data;
        ioeventfd->pos = pos;
        ioeventfd->bar = bar;
        ioeventfd->count = count;
+       ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
 
        ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
                                 NULL, NULL, &ioeventfd->virqfd, fd);