virtio-pci: Use ioeventfd for virtqueue notify

author Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>

Fri, 17 Dec 2010 12:01:50 +0000 (12:01 +0000)

committer Michael S. Tsirkin <mst@redhat.com>

Mon, 10 Jan 2011 12:44:16 +0000 (14:44 +0200)
author Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Fri, 17 Dec 2010 12:01:50 +0000 (12:01 +0000)
committer Michael S. Tsirkin <mst@redhat.com>
Mon, 10 Jan 2011 12:44:16 +0000 (14:44 +0200)
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c

index 13dd391..70c40ee 100644 (file)
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -83,6 +83,11 @@
  /* Flags track per-device state like workarounds for quirks in older guests. */
  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
  
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
+
  /* QEMU doesn't strictly need write barriers since everything runs in
   * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
   * KVM or if kqemu gets SMP support.
@@ -107,6 +112,8 @@ typedef struct {
      /* Max. number of ports we can have for a the virtio-serial device */
      uint32_t max_virtserial_ports;
      virtio_net_conf net;
+    bool ioeventfd_disabled;
+    bool ioeventfd_started;
  } VirtIOPCIProxy;
  
  /* virtio device */
@@ -179,12 +186,132 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
      return 0;
  }
  
+static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
+                                                 int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r;
+    if (assign) {
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            return r;
+        }
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            event_notifier_cleanup(notifier);
+        }
+    } else {
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            return r;
+        }
+
+        /* Handle the race condition where the guest kicked and we deassigned
+         * before we got around to handling the kick.
+         */
+        if (event_notifier_test_and_clear(notifier)) {
+            virtio_queue_notify_vq(vq);
+        }
+
+        event_notifier_cleanup(notifier);
+    }
+    return r;
+}
+
+static void virtio_pci_host_notifier_read(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *n = virtio_queue_get_host_notifier(vq);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
+                                                    int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    if (assign) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_pci_host_notifier_read, NULL, vq);
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            NULL, NULL, NULL);
+    }
+}
+
+static int virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int n, r;
+
+    if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) ||
+        proxy->ioeventfd_disabled ||
+        proxy->ioeventfd_started) {
+        return 0;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        r = virtio_pci_set_host_notifier_internal(proxy, n, true);
+        if (r < 0) {
+            goto assign_error;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
+    }
+    proxy->ioeventfd_started = true;
+    return 0;
+
+assign_error:
+    while (--n >= 0) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        virtio_pci_set_host_notifier_internal(proxy, n, false);
+    }
+    proxy->ioeventfd_started = false;
+    proxy->ioeventfd_disabled = true;
+    return r;
+}
+
+static int virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int n;
+
+    if (!proxy->ioeventfd_started) {
+        return 0;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        virtio_pci_set_host_notifier_internal(proxy, n, false);
+    }
+    proxy->ioeventfd_started = false;
+    return 0;
+}
+
  static void virtio_pci_reset(DeviceState *d)
  {
      VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_reset(proxy->vdev);
      msix_reset(&proxy->pci_dev);
-    proxy->flags = 0;
+    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
  }
  
  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -209,6 +336,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
      case VIRTIO_PCI_QUEUE_PFN:
          pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
          if (pa == 0) {
+            virtio_pci_stop_ioeventfd(proxy);
              virtio_reset(proxy->vdev);
              msix_unuse_all_vectors(&proxy->pci_dev);
          }
@@ -223,7 +351,16 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
          virtio_queue_notify(vdev, val);
          break;
      case VIRTIO_PCI_STATUS:
+        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_pci_stop_ioeventfd(proxy);
+        }
+
          virtio_set_status(vdev, val & 0xFF);
+
+        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_pci_start_ioeventfd(proxy);
+        }
+
          if (vdev->status == 0) {
              virtio_reset(proxy->vdev);
              msix_unuse_all_vectors(&proxy->pci_dev);
@@ -403,6 +540,7 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
      if (PCI_COMMAND == address) {
          if (!(val & PCI_COMMAND_MASTER)) {
              if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
+                virtio_pci_stop_ioeventfd(proxy);
                  virtio_set_status(proxy->vdev,
                                    proxy->vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
              }
@@ -480,30 +618,30 @@ assign_error:
  static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
  {
      VirtIOPCIProxy *proxy = opaque;
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    int r;
+
+    /* Stop using ioeventfd for virtqueue kick if the device starts using host
+     * notifiers.  This makes it easy to avoid stepping on each others' toes.
+     */
+    proxy->ioeventfd_disabled = assign;
      if (assign) {
-        r = event_notifier_init(notifier, 1);
-        if (r < 0) {
-            return r;
-        }
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            event_notifier_cleanup(notifier);
-        }
+        virtio_pci_stop_ioeventfd(proxy);
+    }
+    /* We don't need to start here: it's not needed because backend
+     * currently only stops on status change away from ok,
+     * reset, vmstop and such. If we do add code to start here,
+     * need to check vmstate, device state etc. */
+    return virtio_pci_set_host_notifier_internal(proxy, n, assign);
+}
+
+static void virtio_pci_vmstate_change(void *opaque, bool running)
+{
+    VirtIOPCIProxy *proxy = opaque;
+
+    if (running) {
+        virtio_pci_start_ioeventfd(proxy);
      } else {
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            return r;
-        }
-        event_notifier_cleanup(notifier);
+        virtio_pci_stop_ioeventfd(proxy);
      }
-    return r;
  }
  
  static const VirtIOBindings virtio_pci_bindings = {
@@ -515,6 +653,7 @@ static const VirtIOBindings virtio_pci_bindings = {
      .get_features = virtio_pci_get_features,
      .set_host_notifier = virtio_pci_set_host_notifier,
      .set_guest_notifiers = virtio_pci_set_guest_notifiers,
+    .vmstate_change = virtio_pci_vmstate_change,
  };
  
  static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -559,10 +698,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
      pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO,
                             virtio_map);
  
+    if (!kvm_has_many_ioeventfds()) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
      virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
      proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
      proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
      proxy->host_features = vdev->get_features(vdev, proxy->host_features);
+
  }
  
  static int virtio_blk_init_pci(PCIDevice *pci_dev)
@@ -597,6 +741,7 @@ static int virtio_blk_exit_pci(PCIDevice *pci_dev)
  {
      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
  
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_blk_exit(proxy->vdev);
      blockdev_mark_auto_del(proxy->block.bs);
      return virtio_exit_pci(pci_dev);
@@ -658,6 +803,7 @@ static int virtio_net_exit_pci(PCIDevice *pci_dev)
  {
      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
  
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_net_exit(proxy->vdev);
      return virtio_exit_pci(pci_dev);
  }
@@ -705,6 +851,8 @@ static PCIDeviceInfo virtio_info[] = {
          .qdev.props = (Property[]) {
              DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
              DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
              DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
              DEFINE_PROP_END_OF_LIST(),
@@ -717,6 +865,8 @@ static PCIDeviceInfo virtio_info[] = {
          .exit       = virtio_net_exit_pci,
          .romfile    = "pxe-virtio.bin",
          .qdev.props = (Property[]) {
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
diff --git a/hw/virtio.c b/hw/virtio.c

index 1d20be2..31bd9e3 100644 (file)
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -575,11 +575,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
      return vdev->vq[n].vring.num;
  }
  
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+    if (vq->vring.desc) {
+        VirtIODevice *vdev = vq->vdev;
+        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+        vq->handle_output(vdev, vq);
+    }
+}
+
  void virtio_queue_notify(VirtIODevice *vdev, int n)
  {
-    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
-        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
-        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+    if (n < VIRTIO_PCI_QUEUE_MAX) {
+        virtio_queue_notify_vq(&vdev->vq[n]);
      }
  }
  
diff --git a/hw/virtio.h b/hw/virtio.h

index bd52742..d8546d5 100644 (file)
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -222,5 +222,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_notify_vq(VirtQueue *vq);
  void virtio_irq(VirtQueue *vq);
  #endif
author	Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
	Fri, 17 Dec 2010 12:01:50 +0000 (12:01 +0000)
committer	Michael S. Tsirkin <mst@redhat.com>
	Mon, 10 Jan 2011 12:44:16 +0000 (14:44 +0200)
hw/virtio-pci.c		patch \| blob \| history
hw/virtio.c		patch \| blob \| history
hw/virtio.h		patch \| blob \| history