1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
4 * Copyright 2007 Rusty Russell IBM Corporation
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/spinlock.h>
18 /* For development, we want to crash whenever the ring is screwed. */
19 #define BAD_RING(_vq, fmt, args...) \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
25 /* Caller is supposed to guarantee no reentry. */
26 #define START_USE(_vq) \
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
31 (_vq)->in_use = __LINE__; \
33 #define END_USE(_vq) \
34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
35 #define LAST_ADD_TIME_UPDATE(_vq) \
37 ktime_t now = ktime_get(); \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
46 #define LAST_ADD_TIME_CHECK(_vq) \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
53 #define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
56 #define BAD_RING(_vq, fmt, args...) \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
64 #define LAST_ADD_TIME_UPDATE(vq)
65 #define LAST_ADD_TIME_CHECK(vq)
66 #define LAST_ADD_TIME_INVALID(vq)
69 struct vring_desc_state_split {
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
74 struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
78 u16 last; /* The last desc state in a list. */
81 struct vring_desc_extra {
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
84 u16 flags; /* Descriptor flags. */
85 u16 next; /* The next desc state in a list. */
88 struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
96 * Last written value to avail->idx in
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
110 * The parameters for creating vrings are reserved for creating new
117 struct vring_virtqueue_packed {
118 /* Actual memory layout for this queue. */
121 struct vring_packed_desc *desc;
122 struct vring_packed_desc_event *driver;
123 struct vring_packed_desc_event *device;
126 /* Driver ring wrap counter. */
127 bool avail_wrap_counter;
129 /* Avail used flags. */
130 u16 avail_used_flags;
132 /* Index of the next avail descriptor. */
136 * Last written value to driver->flags in
139 u16 event_flags_shadow;
141 /* Per-descriptor state. */
142 struct vring_desc_state_packed *desc_state;
143 struct vring_desc_extra *desc_extra;
145 /* DMA address and size information */
146 dma_addr_t ring_dma_addr;
147 dma_addr_t driver_event_dma_addr;
148 dma_addr_t device_event_dma_addr;
149 size_t ring_size_in_bytes;
150 size_t event_size_in_bytes;
153 struct vring_virtqueue {
156 /* Is this a packed ring? */
159 /* Is DMA API used? */
162 /* Can we use weak barriers? */
165 /* Other side has made a mess, don't try any more. */
168 /* Host supports indirect buffers */
171 /* Host publishes avail event idx */
174 /* Head of free buffer list. */
175 unsigned int free_head;
176 /* Number we've added since last sync. */
177 unsigned int num_added;
179 /* Last used index we've seen.
180 * for split ring, it just contains last used index
182 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
183 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
187 /* Hint for event idx: already triggered no need to disable. */
188 bool event_triggered;
191 /* Available for split ring */
192 struct vring_virtqueue_split split;
194 /* Available for packed ring */
195 struct vring_virtqueue_packed packed;
198 /* How to notify other side. FIXME: commonalize hcalls! */
199 bool (*notify)(struct virtqueue *vq);
201 /* DMA, allocation, and size information */
205 /* They're supposed to lock for us. */
208 /* Figure out if their kicks are too delayed. */
209 bool last_add_time_valid;
210 ktime_t last_add_time;
214 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
215 struct vring_virtqueue_split *vring_split,
216 struct virtio_device *vdev,
219 bool (*notify)(struct virtqueue *),
220 void (*callback)(struct virtqueue *),
222 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
223 static void vring_free(struct virtqueue *_vq);
229 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
231 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
232 unsigned int total_sg)
235 * If the host supports indirect descriptor tables, and we have multiple
236 * buffers, then go indirect. FIXME: tune this threshold
238 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
242 * Modern virtio devices have feature bits to specify whether they need a
243 * quirk and bypass the IOMMU. If not there, just use the DMA API.
245 * If there, the interaction between virtio and DMA API is messy.
247 * On most systems with virtio, physical addresses match bus addresses,
248 * and it doesn't particularly matter whether we use the DMA API.
250 * On some systems, including Xen and any system with a physical device
251 * that speaks virtio behind a physical IOMMU, we must use the DMA API
252 * for virtio DMA to work at all.
254 * On other systems, including SPARC and PPC64, virtio-pci devices are
255 * enumerated as though they are behind an IOMMU, but the virtio host
256 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
257 * there or somehow map everything as the identity.
259 * For the time being, we preserve historic behavior and bypass the DMA
262 * TODO: install a per-device DMA ops structure that does the right thing
263 * taking into account all the above quirks, and use the DMA API
264 * unconditionally on data path.
267 static bool vring_use_dma_api(struct virtio_device *vdev)
269 if (!virtio_has_dma_quirk(vdev))
272 /* Otherwise, we are left to guess. */
274 * In theory, it's possible to have a buggy QEMU-supposed
275 * emulated Q35 IOMMU and Xen enabled at the same time. On
276 * such a configuration, virtio has never worked and will
277 * not work without an even larger kludge. Instead, enable
278 * the DMA API if we're a Xen guest, which at least allows
279 * all of the sensible Xen configurations to work correctly.
287 size_t virtio_max_dma_size(struct virtio_device *vdev)
289 size_t max_segment_size = SIZE_MAX;
291 if (vring_use_dma_api(vdev))
292 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
294 return max_segment_size;
296 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
298 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
299 dma_addr_t *dma_handle, gfp_t flag)
301 if (vring_use_dma_api(vdev)) {
302 return dma_alloc_coherent(vdev->dev.parent, size,
305 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
308 phys_addr_t phys_addr = virt_to_phys(queue);
309 *dma_handle = (dma_addr_t)phys_addr;
312 * Sanity check: make sure we dind't truncate
313 * the address. The only arches I can find that
314 * have 64-bit phys_addr_t but 32-bit dma_addr_t
315 * are certain non-highmem MIPS and x86
316 * configurations, but these configurations
317 * should never allocate physical pages above 32
318 * bits, so this is fine. Just in case, throw a
319 * warning and abort if we end up with an
320 * unrepresentable address.
322 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
323 free_pages_exact(queue, PAGE_ALIGN(size));
331 static void vring_free_queue(struct virtio_device *vdev, size_t size,
332 void *queue, dma_addr_t dma_handle)
334 if (vring_use_dma_api(vdev))
335 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
337 free_pages_exact(queue, PAGE_ALIGN(size));
341 * The DMA ops on various arches are rather gnarly right now, and
342 * making all of the arch DMA ops work on the vring device itself
343 * is a mess. For now, we use the parent device for DMA ops.
345 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
347 return vq->vq.vdev->dev.parent;
350 /* Map one sg entry. */
351 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
352 struct scatterlist *sg,
353 enum dma_data_direction direction)
355 if (!vq->use_dma_api)
356 return (dma_addr_t)sg_phys(sg);
359 * We can't use dma_map_sg, because we don't use scatterlists in
360 * the way it expects (we don't guarantee that the scatterlist
361 * will exist for the lifetime of the mapping).
363 return dma_map_page(vring_dma_dev(vq),
364 sg_page(sg), sg->offset, sg->length,
368 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
369 void *cpu_addr, size_t size,
370 enum dma_data_direction direction)
372 if (!vq->use_dma_api)
373 return (dma_addr_t)virt_to_phys(cpu_addr);
375 return dma_map_single(vring_dma_dev(vq),
376 cpu_addr, size, direction);
379 static int vring_mapping_error(const struct vring_virtqueue *vq,
382 if (!vq->use_dma_api)
385 return dma_mapping_error(vring_dma_dev(vq), addr);
388 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
390 vq->vq.num_free = num;
393 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
395 vq->last_used_idx = 0;
397 vq->event_triggered = false;
402 vq->last_add_time_valid = false;
408 * Split ring specific functions - *_split().
411 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
412 struct vring_desc *desc)
416 if (!vq->use_dma_api)
419 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
421 dma_unmap_page(vring_dma_dev(vq),
422 virtio64_to_cpu(vq->vq.vdev, desc->addr),
423 virtio32_to_cpu(vq->vq.vdev, desc->len),
424 (flags & VRING_DESC_F_WRITE) ?
425 DMA_FROM_DEVICE : DMA_TO_DEVICE);
428 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
431 struct vring_desc_extra *extra = vq->split.desc_extra;
434 if (!vq->use_dma_api)
437 flags = extra[i].flags;
439 if (flags & VRING_DESC_F_INDIRECT) {
440 dma_unmap_single(vring_dma_dev(vq),
443 (flags & VRING_DESC_F_WRITE) ?
444 DMA_FROM_DEVICE : DMA_TO_DEVICE);
446 dma_unmap_page(vring_dma_dev(vq),
449 (flags & VRING_DESC_F_WRITE) ?
450 DMA_FROM_DEVICE : DMA_TO_DEVICE);
454 return extra[i].next;
457 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
458 unsigned int total_sg,
461 struct vring_desc *desc;
465 * We require lowmem mappings for the descriptors because
466 * otherwise virt_to_phys will give us bogus addresses in the
469 gfp &= ~__GFP_HIGHMEM;
471 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
475 for (i = 0; i < total_sg; i++)
476 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
480 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
481 struct vring_desc *desc,
488 struct vring_virtqueue *vring = to_vvq(vq);
489 struct vring_desc_extra *extra = vring->split.desc_extra;
492 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
493 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
494 desc[i].len = cpu_to_virtio32(vq->vdev, len);
497 next = extra[i].next;
498 desc[i].next = cpu_to_virtio16(vq->vdev, next);
500 extra[i].addr = addr;
502 extra[i].flags = flags;
504 next = virtio16_to_cpu(vq->vdev, desc[i].next);
509 static inline int virtqueue_add_split(struct virtqueue *_vq,
510 struct scatterlist *sgs[],
511 unsigned int total_sg,
512 unsigned int out_sgs,
518 struct vring_virtqueue *vq = to_vvq(_vq);
519 struct scatterlist *sg;
520 struct vring_desc *desc;
521 unsigned int i, n, avail, descs_used, prev, err_idx;
527 BUG_ON(data == NULL);
528 BUG_ON(ctx && vq->indirect);
530 if (unlikely(vq->broken)) {
535 LAST_ADD_TIME_UPDATE(vq);
537 BUG_ON(total_sg == 0);
539 head = vq->free_head;
541 if (virtqueue_use_indirect(vq, total_sg))
542 desc = alloc_indirect_split(_vq, total_sg, gfp);
545 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
549 /* Use a single buffer which doesn't continue */
551 /* Set up rest to use this indirect table. */
556 desc = vq->split.vring.desc;
558 descs_used = total_sg;
561 if (unlikely(vq->vq.num_free < descs_used)) {
562 pr_debug("Can't add buf len %i - avail = %i\n",
563 descs_used, vq->vq.num_free);
564 /* FIXME: for historical reasons, we force a notify here if
565 * there are outgoing parts to the buffer. Presumably the
566 * host should service the ring ASAP. */
575 for (n = 0; n < out_sgs; n++) {
576 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
577 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
578 if (vring_mapping_error(vq, addr))
582 /* Note that we trust indirect descriptor
583 * table since it use stream DMA mapping.
585 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
590 for (; n < (out_sgs + in_sgs); n++) {
591 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
592 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
593 if (vring_mapping_error(vq, addr))
597 /* Note that we trust indirect descriptor
598 * table since it use stream DMA mapping.
600 i = virtqueue_add_desc_split(_vq, desc, i, addr,
607 /* Last one doesn't continue. */
608 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
609 if (!indirect && vq->use_dma_api)
610 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
614 /* Now that the indirect table is filled in, map it. */
615 dma_addr_t addr = vring_map_single(
616 vq, desc, total_sg * sizeof(struct vring_desc),
618 if (vring_mapping_error(vq, addr))
621 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
623 total_sg * sizeof(struct vring_desc),
624 VRING_DESC_F_INDIRECT,
628 /* We're using some buffers from the free list. */
629 vq->vq.num_free -= descs_used;
631 /* Update free pointer */
633 vq->free_head = vq->split.desc_extra[head].next;
637 /* Store token and indirect buffer state. */
638 vq->split.desc_state[head].data = data;
640 vq->split.desc_state[head].indir_desc = desc;
642 vq->split.desc_state[head].indir_desc = ctx;
644 /* Put entry in available array (but don't update avail->idx until they
646 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
647 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
649 /* Descriptors and available array need to be set before we expose the
650 * new available array entries. */
651 virtio_wmb(vq->weak_barriers);
652 vq->split.avail_idx_shadow++;
653 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
654 vq->split.avail_idx_shadow);
657 pr_debug("Added buffer head %i to %p\n", head, vq);
660 /* This is very unlikely, but theoretically possible. Kick
662 if (unlikely(vq->num_added == (1 << 16) - 1))
675 for (n = 0; n < total_sg; n++) {
679 vring_unmap_one_split_indirect(vq, &desc[i]);
680 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
682 i = vring_unmap_one_split(vq, i);
692 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
694 struct vring_virtqueue *vq = to_vvq(_vq);
699 /* We need to expose available array entries before checking avail
701 virtio_mb(vq->weak_barriers);
703 old = vq->split.avail_idx_shadow - vq->num_added;
704 new = vq->split.avail_idx_shadow;
707 LAST_ADD_TIME_CHECK(vq);
708 LAST_ADD_TIME_INVALID(vq);
711 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
712 vring_avail_event(&vq->split.vring)),
715 needs_kick = !(vq->split.vring.used->flags &
716 cpu_to_virtio16(_vq->vdev,
717 VRING_USED_F_NO_NOTIFY));
723 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
727 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
729 /* Clear data ptr. */
730 vq->split.desc_state[head].data = NULL;
732 /* Put back on free list: unmap first-level descriptors and find end */
735 while (vq->split.vring.desc[i].flags & nextflag) {
736 vring_unmap_one_split(vq, i);
737 i = vq->split.desc_extra[i].next;
741 vring_unmap_one_split(vq, i);
742 vq->split.desc_extra[i].next = vq->free_head;
743 vq->free_head = head;
745 /* Plus final descriptor */
749 struct vring_desc *indir_desc =
750 vq->split.desc_state[head].indir_desc;
753 /* Free the indirect table, if any, now that it's unmapped. */
757 len = vq->split.desc_extra[head].len;
759 BUG_ON(!(vq->split.desc_extra[head].flags &
760 VRING_DESC_F_INDIRECT));
761 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
763 for (j = 0; j < len / sizeof(struct vring_desc); j++)
764 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
767 vq->split.desc_state[head].indir_desc = NULL;
769 *ctx = vq->split.desc_state[head].indir_desc;
773 static inline bool more_used_split(const struct vring_virtqueue *vq)
775 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
776 vq->split.vring.used->idx);
779 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
783 struct vring_virtqueue *vq = to_vvq(_vq);
790 if (unlikely(vq->broken)) {
795 if (!more_used_split(vq)) {
796 pr_debug("No more buffers in queue\n");
801 /* Only get used array entries after they have been exposed by host. */
802 virtio_rmb(vq->weak_barriers);
804 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
805 i = virtio32_to_cpu(_vq->vdev,
806 vq->split.vring.used->ring[last_used].id);
807 *len = virtio32_to_cpu(_vq->vdev,
808 vq->split.vring.used->ring[last_used].len);
810 if (unlikely(i >= vq->split.vring.num)) {
811 BAD_RING(vq, "id %u out of range\n", i);
814 if (unlikely(!vq->split.desc_state[i].data)) {
815 BAD_RING(vq, "id %u is not a head!\n", i);
819 /* detach_buf_split clears data, so grab it now. */
820 ret = vq->split.desc_state[i].data;
821 detach_buf_split(vq, i, ctx);
823 /* If we expect an interrupt for the next entry, tell host
824 * by writing event index and flush out the write before
825 * the read in the next get_buf call. */
826 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
827 virtio_store_mb(vq->weak_barriers,
828 &vring_used_event(&vq->split.vring),
829 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
831 LAST_ADD_TIME_INVALID(vq);
837 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
839 struct vring_virtqueue *vq = to_vvq(_vq);
841 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
842 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
844 /* TODO: this is a hack. Figure out a cleaner value to write. */
845 vring_used_event(&vq->split.vring) = 0x0;
847 vq->split.vring.avail->flags =
848 cpu_to_virtio16(_vq->vdev,
849 vq->split.avail_flags_shadow);
853 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
855 struct vring_virtqueue *vq = to_vvq(_vq);
860 /* We optimistically turn back on interrupts, then check if there was
862 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
863 * either clear the flags bit or point the event index at the next
864 * entry. Always do both to keep code simple. */
865 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
866 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
868 vq->split.vring.avail->flags =
869 cpu_to_virtio16(_vq->vdev,
870 vq->split.avail_flags_shadow);
872 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
873 last_used_idx = vq->last_used_idx);
875 return last_used_idx;
878 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
880 struct vring_virtqueue *vq = to_vvq(_vq);
882 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
883 vq->split.vring.used->idx);
886 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
888 struct vring_virtqueue *vq = to_vvq(_vq);
893 /* We optimistically turn back on interrupts, then check if there was
895 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
896 * either clear the flags bit or point the event index at the next
897 * entry. Always update the event index to keep code simple. */
898 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
899 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
901 vq->split.vring.avail->flags =
902 cpu_to_virtio16(_vq->vdev,
903 vq->split.avail_flags_shadow);
905 /* TODO: tune this threshold */
906 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
908 virtio_store_mb(vq->weak_barriers,
909 &vring_used_event(&vq->split.vring),
910 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
912 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
913 - vq->last_used_idx) > bufs)) {
922 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
924 struct vring_virtqueue *vq = to_vvq(_vq);
930 for (i = 0; i < vq->split.vring.num; i++) {
931 if (!vq->split.desc_state[i].data)
933 /* detach_buf_split clears data, so grab it now. */
934 buf = vq->split.desc_state[i].data;
935 detach_buf_split(vq, i, NULL);
936 vq->split.avail_idx_shadow--;
937 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
938 vq->split.avail_idx_shadow);
942 /* That should have freed everything. */
943 BUG_ON(vq->vq.num_free != vq->split.vring.num);
949 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
950 struct vring_virtqueue *vq)
952 struct virtio_device *vdev;
956 vring_split->avail_flags_shadow = 0;
957 vring_split->avail_idx_shadow = 0;
959 /* No callback? Tell other side not to bother us. */
960 if (!vq->vq.callback) {
961 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
963 vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
964 vring_split->avail_flags_shadow);
968 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
972 num = vq->split.vring.num;
974 vq->split.vring.avail->flags = 0;
975 vq->split.vring.avail->idx = 0;
977 /* reset avail event */
978 vq->split.vring.avail->ring[num] = 0;
980 vq->split.vring.used->flags = 0;
981 vq->split.vring.used->idx = 0;
983 /* reset used event */
984 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
986 virtqueue_init(vq, num);
988 virtqueue_vring_init_split(&vq->split, vq);
991 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
992 struct vring_virtqueue_split *vring_split)
994 vq->split = *vring_split;
996 /* Put everything in free lists. */
1000 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1002 struct vring_desc_state_split *state;
1003 struct vring_desc_extra *extra;
1004 u32 num = vring_split->vring.num;
1006 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1010 extra = vring_alloc_desc_extra(num);
1014 memset(state, 0, num * sizeof(struct vring_desc_state_split));
1016 vring_split->desc_state = state;
1017 vring_split->desc_extra = extra;
1026 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1027 struct virtio_device *vdev)
1029 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1030 vring_split->vring.desc,
1031 vring_split->queue_dma_addr);
1033 kfree(vring_split->desc_state);
1034 kfree(vring_split->desc_extra);
1037 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1038 struct virtio_device *vdev,
1040 unsigned int vring_align,
1041 bool may_reduce_num)
1044 dma_addr_t dma_addr;
1046 /* We assume num is a power of 2. */
1047 if (num & (num - 1)) {
1048 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1052 /* TODO: allocate each queue chunk individually */
1053 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1054 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1056 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
1059 if (!may_reduce_num)
1067 /* Try to get a single page. You are my only hope! */
1068 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1069 &dma_addr, GFP_KERNEL | __GFP_ZERO);
1074 vring_init(&vring_split->vring, num, queue, vring_align);
1076 vring_split->queue_dma_addr = dma_addr;
1077 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1079 vring_split->vring_align = vring_align;
1080 vring_split->may_reduce_num = may_reduce_num;
1085 static struct virtqueue *vring_create_virtqueue_split(
1088 unsigned int vring_align,
1089 struct virtio_device *vdev,
1091 bool may_reduce_num,
1093 bool (*notify)(struct virtqueue *),
1094 void (*callback)(struct virtqueue *),
1097 struct vring_virtqueue_split vring_split = {};
1098 struct virtqueue *vq;
1101 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1106 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1107 context, notify, callback, name);
1109 vring_free_split(&vring_split, vdev);
1113 to_vvq(vq)->we_own_ring = true;
1118 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1120 struct vring_virtqueue_split vring_split = {};
1121 struct vring_virtqueue *vq = to_vvq(_vq);
1122 struct virtio_device *vdev = _vq->vdev;
1125 err = vring_alloc_queue_split(&vring_split, vdev, num,
1126 vq->split.vring_align,
1127 vq->split.may_reduce_num);
1131 err = vring_alloc_state_extra_split(&vring_split);
1133 goto err_state_extra;
1135 vring_free(&vq->vq);
1137 virtqueue_vring_init_split(&vring_split, vq);
1139 virtqueue_init(vq, vring_split.vring.num);
1140 virtqueue_vring_attach_split(vq, &vring_split);
1145 vring_free_split(&vring_split, vdev);
1147 virtqueue_reinit_split(vq);
1153 * Packed ring specific functions - *_packed().
1155 static inline bool packed_used_wrap_counter(u16 last_used_idx)
1157 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1160 static inline u16 packed_last_used(u16 last_used_idx)
1162 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1165 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1166 struct vring_desc_extra *extra)
1170 if (!vq->use_dma_api)
1173 flags = extra->flags;
1175 if (flags & VRING_DESC_F_INDIRECT) {
1176 dma_unmap_single(vring_dma_dev(vq),
1177 extra->addr, extra->len,
1178 (flags & VRING_DESC_F_WRITE) ?
1179 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1181 dma_unmap_page(vring_dma_dev(vq),
1182 extra->addr, extra->len,
1183 (flags & VRING_DESC_F_WRITE) ?
1184 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1188 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1189 struct vring_packed_desc *desc)
1193 if (!vq->use_dma_api)
1196 flags = le16_to_cpu(desc->flags);
1198 dma_unmap_page(vring_dma_dev(vq),
1199 le64_to_cpu(desc->addr),
1200 le32_to_cpu(desc->len),
1201 (flags & VRING_DESC_F_WRITE) ?
1202 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1205 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1208 struct vring_packed_desc *desc;
1211 * We require lowmem mappings for the descriptors because
1212 * otherwise virt_to_phys will give us bogus addresses in the
1215 gfp &= ~__GFP_HIGHMEM;
1217 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1222 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1223 struct scatterlist *sgs[],
1224 unsigned int total_sg,
1225 unsigned int out_sgs,
1226 unsigned int in_sgs,
1230 struct vring_packed_desc *desc;
1231 struct scatterlist *sg;
1232 unsigned int i, n, err_idx;
1236 head = vq->packed.next_avail_idx;
1237 desc = alloc_indirect_packed(total_sg, gfp);
1241 if (unlikely(vq->vq.num_free < 1)) {
1242 pr_debug("Can't add buf len 1 - avail = 0\n");
1250 BUG_ON(id == vq->packed.vring.num);
1252 for (n = 0; n < out_sgs + in_sgs; n++) {
1253 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256 if (vring_mapping_error(vq, addr))
1259 desc[i].flags = cpu_to_le16(n < out_sgs ?
1260 0 : VRING_DESC_F_WRITE);
1261 desc[i].addr = cpu_to_le64(addr);
1262 desc[i].len = cpu_to_le32(sg->length);
1267 /* Now that the indirect table is filled in, map it. */
1268 addr = vring_map_single(vq, desc,
1269 total_sg * sizeof(struct vring_packed_desc),
1271 if (vring_mapping_error(vq, addr))
1274 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1275 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1276 sizeof(struct vring_packed_desc));
1277 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1279 if (vq->use_dma_api) {
1280 vq->packed.desc_extra[id].addr = addr;
1281 vq->packed.desc_extra[id].len = total_sg *
1282 sizeof(struct vring_packed_desc);
1283 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1284 vq->packed.avail_used_flags;
1288 * A driver MUST NOT make the first descriptor in the list
1289 * available before all subsequent descriptors comprising
1290 * the list are made available.
1292 virtio_wmb(vq->weak_barriers);
1293 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1294 vq->packed.avail_used_flags);
1296 /* We're using some buffers from the free list. */
1297 vq->vq.num_free -= 1;
1299 /* Update free pointer */
1301 if (n >= vq->packed.vring.num) {
1303 vq->packed.avail_wrap_counter ^= 1;
1304 vq->packed.avail_used_flags ^=
1305 1 << VRING_PACKED_DESC_F_AVAIL |
1306 1 << VRING_PACKED_DESC_F_USED;
1308 vq->packed.next_avail_idx = n;
1309 vq->free_head = vq->packed.desc_extra[id].next;
1311 /* Store token and indirect buffer state. */
1312 vq->packed.desc_state[id].num = 1;
1313 vq->packed.desc_state[id].data = data;
1314 vq->packed.desc_state[id].indir_desc = desc;
1315 vq->packed.desc_state[id].last = id;
1319 pr_debug("Added buffer head %i to %p\n", head, vq);
1327 for (i = 0; i < err_idx; i++)
1328 vring_unmap_desc_packed(vq, &desc[i]);
1336 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1337 struct scatterlist *sgs[],
1338 unsigned int total_sg,
1339 unsigned int out_sgs,
1340 unsigned int in_sgs,
1345 struct vring_virtqueue *vq = to_vvq(_vq);
1346 struct vring_packed_desc *desc;
1347 struct scatterlist *sg;
1348 unsigned int i, n, c, descs_used, err_idx;
1349 __le16 head_flags, flags;
1350 u16 head, id, prev, curr, avail_used_flags;
1355 BUG_ON(data == NULL);
1356 BUG_ON(ctx && vq->indirect);
1358 if (unlikely(vq->broken)) {
1363 LAST_ADD_TIME_UPDATE(vq);
1365 BUG_ON(total_sg == 0);
1367 if (virtqueue_use_indirect(vq, total_sg)) {
1368 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1370 if (err != -ENOMEM) {
1375 /* fall back on direct */
1378 head = vq->packed.next_avail_idx;
1379 avail_used_flags = vq->packed.avail_used_flags;
1381 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1383 desc = vq->packed.vring.desc;
1385 descs_used = total_sg;
1387 if (unlikely(vq->vq.num_free < descs_used)) {
1388 pr_debug("Can't add buf len %i - avail = %i\n",
1389 descs_used, vq->vq.num_free);
1395 BUG_ON(id == vq->packed.vring.num);
1399 for (n = 0; n < out_sgs + in_sgs; n++) {
1400 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1401 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1402 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1403 if (vring_mapping_error(vq, addr))
1406 flags = cpu_to_le16(vq->packed.avail_used_flags |
1407 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1408 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1412 desc[i].flags = flags;
1414 desc[i].addr = cpu_to_le64(addr);
1415 desc[i].len = cpu_to_le32(sg->length);
1416 desc[i].id = cpu_to_le16(id);
1418 if (unlikely(vq->use_dma_api)) {
1419 vq->packed.desc_extra[curr].addr = addr;
1420 vq->packed.desc_extra[curr].len = sg->length;
1421 vq->packed.desc_extra[curr].flags =
1425 curr = vq->packed.desc_extra[curr].next;
1427 if ((unlikely(++i >= vq->packed.vring.num))) {
1429 vq->packed.avail_used_flags ^=
1430 1 << VRING_PACKED_DESC_F_AVAIL |
1431 1 << VRING_PACKED_DESC_F_USED;
1437 vq->packed.avail_wrap_counter ^= 1;
1439 /* We're using some buffers from the free list. */
1440 vq->vq.num_free -= descs_used;
1442 /* Update free pointer */
1443 vq->packed.next_avail_idx = i;
1444 vq->free_head = curr;
1447 vq->packed.desc_state[id].num = descs_used;
1448 vq->packed.desc_state[id].data = data;
1449 vq->packed.desc_state[id].indir_desc = ctx;
1450 vq->packed.desc_state[id].last = prev;
1453 * A driver MUST NOT make the first descriptor in the list
1454 * available before all subsequent descriptors comprising
1455 * the list are made available.
1457 virtio_wmb(vq->weak_barriers);
1458 vq->packed.vring.desc[head].flags = head_flags;
1459 vq->num_added += descs_used;
1461 pr_debug("Added buffer head %i to %p\n", head, vq);
1469 curr = vq->free_head;
1471 vq->packed.avail_used_flags = avail_used_flags;
1473 for (n = 0; n < total_sg; n++) {
1476 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1477 curr = vq->packed.desc_extra[curr].next;
1479 if (i >= vq->packed.vring.num)
1487 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1489 struct vring_virtqueue *vq = to_vvq(_vq);
1490 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1503 * We need to expose the new flags value before checking notification
1506 virtio_mb(vq->weak_barriers);
1508 old = vq->packed.next_avail_idx - vq->num_added;
1509 new = vq->packed.next_avail_idx;
1512 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1513 flags = le16_to_cpu(snapshot.flags);
1515 LAST_ADD_TIME_CHECK(vq);
1516 LAST_ADD_TIME_INVALID(vq);
1518 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1519 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1523 off_wrap = le16_to_cpu(snapshot.off_wrap);
1525 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1526 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1527 if (wrap_counter != vq->packed.avail_wrap_counter)
1528 event_idx -= vq->packed.vring.num;
1530 needs_kick = vring_need_event(event_idx, new, old);
1536 static void detach_buf_packed(struct vring_virtqueue *vq,
1537 unsigned int id, void **ctx)
1539 struct vring_desc_state_packed *state = NULL;
1540 struct vring_packed_desc *desc;
1541 unsigned int i, curr;
1543 state = &vq->packed.desc_state[id];
1545 /* Clear data ptr. */
1548 vq->packed.desc_extra[state->last].next = vq->free_head;
1550 vq->vq.num_free += state->num;
1552 if (unlikely(vq->use_dma_api)) {
1554 for (i = 0; i < state->num; i++) {
1555 vring_unmap_extra_packed(vq,
1556 &vq->packed.desc_extra[curr]);
1557 curr = vq->packed.desc_extra[curr].next;
1564 /* Free the indirect table, if any, now that it's unmapped. */
1565 desc = state->indir_desc;
1569 if (vq->use_dma_api) {
1570 len = vq->packed.desc_extra[id].len;
1571 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1573 vring_unmap_desc_packed(vq, &desc[i]);
1576 state->indir_desc = NULL;
1578 *ctx = state->indir_desc;
1582 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1583 u16 idx, bool used_wrap_counter)
1588 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1589 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1590 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1592 return avail == used && used == used_wrap_counter;
1595 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1599 bool used_wrap_counter;
1601 last_used_idx = READ_ONCE(vq->last_used_idx);
1602 last_used = packed_last_used(last_used_idx);
1603 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1604 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1607 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1611 struct vring_virtqueue *vq = to_vvq(_vq);
1612 u16 last_used, id, last_used_idx;
1613 bool used_wrap_counter;
1618 if (unlikely(vq->broken)) {
1623 if (!more_used_packed(vq)) {
1624 pr_debug("No more buffers in queue\n");
1629 /* Only get used elements after they have been exposed by host. */
1630 virtio_rmb(vq->weak_barriers);
1632 last_used_idx = READ_ONCE(vq->last_used_idx);
1633 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1634 last_used = packed_last_used(last_used_idx);
1635 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1636 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1638 if (unlikely(id >= vq->packed.vring.num)) {
1639 BAD_RING(vq, "id %u out of range\n", id);
1642 if (unlikely(!vq->packed.desc_state[id].data)) {
1643 BAD_RING(vq, "id %u is not a head!\n", id);
1647 /* detach_buf_packed clears data, so grab it now. */
1648 ret = vq->packed.desc_state[id].data;
1649 detach_buf_packed(vq, id, ctx);
1651 last_used += vq->packed.desc_state[id].num;
1652 if (unlikely(last_used >= vq->packed.vring.num)) {
1653 last_used -= vq->packed.vring.num;
1654 used_wrap_counter ^= 1;
1657 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1658 WRITE_ONCE(vq->last_used_idx, last_used);
1661 * If we expect an interrupt for the next entry, tell host
1662 * by writing event index and flush out the write before
1663 * the read in the next get_buf call.
1665 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1666 virtio_store_mb(vq->weak_barriers,
1667 &vq->packed.vring.driver->off_wrap,
1668 cpu_to_le16(vq->last_used_idx));
1670 LAST_ADD_TIME_INVALID(vq);
1676 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1678 struct vring_virtqueue *vq = to_vvq(_vq);
1680 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1681 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1682 vq->packed.vring.driver->flags =
1683 cpu_to_le16(vq->packed.event_flags_shadow);
1687 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1689 struct vring_virtqueue *vq = to_vvq(_vq);
1694 * We optimistically turn back on interrupts, then check if there was
1699 vq->packed.vring.driver->off_wrap =
1700 cpu_to_le16(vq->last_used_idx);
1702 * We need to update event offset and event wrap
1703 * counter first before updating event flags.
1705 virtio_wmb(vq->weak_barriers);
1708 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1709 vq->packed.event_flags_shadow = vq->event ?
1710 VRING_PACKED_EVENT_FLAG_DESC :
1711 VRING_PACKED_EVENT_FLAG_ENABLE;
1712 vq->packed.vring.driver->flags =
1713 cpu_to_le16(vq->packed.event_flags_shadow);
1717 return vq->last_used_idx;
1720 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1722 struct vring_virtqueue *vq = to_vvq(_vq);
1726 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1727 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1729 return is_used_desc_packed(vq, used_idx, wrap_counter);
1732 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1734 struct vring_virtqueue *vq = to_vvq(_vq);
1735 u16 used_idx, wrap_counter, last_used_idx;
1741 * We optimistically turn back on interrupts, then check if there was
1746 /* TODO: tune this threshold */
1747 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1748 last_used_idx = READ_ONCE(vq->last_used_idx);
1749 wrap_counter = packed_used_wrap_counter(last_used_idx);
1751 used_idx = packed_last_used(last_used_idx) + bufs;
1752 if (used_idx >= vq->packed.vring.num) {
1753 used_idx -= vq->packed.vring.num;
1757 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1758 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1761 * We need to update event offset and event wrap
1762 * counter first before updating event flags.
1764 virtio_wmb(vq->weak_barriers);
1767 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1768 vq->packed.event_flags_shadow = vq->event ?
1769 VRING_PACKED_EVENT_FLAG_DESC :
1770 VRING_PACKED_EVENT_FLAG_ENABLE;
1771 vq->packed.vring.driver->flags =
1772 cpu_to_le16(vq->packed.event_flags_shadow);
1776 * We need to update event suppression structure first
1777 * before re-checking for more used buffers.
1779 virtio_mb(vq->weak_barriers);
1781 last_used_idx = READ_ONCE(vq->last_used_idx);
1782 wrap_counter = packed_used_wrap_counter(last_used_idx);
1783 used_idx = packed_last_used(last_used_idx);
1784 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1793 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1795 struct vring_virtqueue *vq = to_vvq(_vq);
1801 for (i = 0; i < vq->packed.vring.num; i++) {
1802 if (!vq->packed.desc_state[i].data)
1804 /* detach_buf clears data, so grab it now. */
1805 buf = vq->packed.desc_state[i].data;
1806 detach_buf_packed(vq, i, NULL);
1810 /* That should have freed everything. */
1811 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1817 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1819 struct vring_desc_extra *desc_extra;
1822 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1827 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1829 for (i = 0; i < num - 1; i++)
1830 desc_extra[i].next = i + 1;
1835 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1836 struct virtio_device *vdev)
1838 if (vring_packed->vring.desc)
1839 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1840 vring_packed->vring.desc,
1841 vring_packed->ring_dma_addr);
1843 if (vring_packed->vring.driver)
1844 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1845 vring_packed->vring.driver,
1846 vring_packed->driver_event_dma_addr);
1848 if (vring_packed->vring.device)
1849 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1850 vring_packed->vring.device,
1851 vring_packed->device_event_dma_addr);
1853 kfree(vring_packed->desc_state);
1854 kfree(vring_packed->desc_extra);
1857 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1858 struct virtio_device *vdev,
1861 struct vring_packed_desc *ring;
1862 struct vring_packed_desc_event *driver, *device;
1863 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1864 size_t ring_size_in_bytes, event_size_in_bytes;
1866 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1868 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1870 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1874 vring_packed->vring.desc = ring;
1875 vring_packed->ring_dma_addr = ring_dma_addr;
1876 vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1878 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1880 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1881 &driver_event_dma_addr,
1882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1886 vring_packed->vring.driver = driver;
1887 vring_packed->event_size_in_bytes = event_size_in_bytes;
1888 vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1890 device = vring_alloc_queue(vdev, event_size_in_bytes,
1891 &device_event_dma_addr,
1892 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1896 vring_packed->vring.device = device;
1897 vring_packed->device_event_dma_addr = device_event_dma_addr;
1899 vring_packed->vring.num = num;
1904 vring_free_packed(vring_packed, vdev);
1908 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1910 struct vring_desc_state_packed *state;
1911 struct vring_desc_extra *extra;
1912 u32 num = vring_packed->vring.num;
1914 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1916 goto err_desc_state;
1918 memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1920 extra = vring_alloc_desc_extra(num);
1922 goto err_desc_extra;
1924 vring_packed->desc_state = state;
1925 vring_packed->desc_extra = extra;
1935 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1938 vring_packed->next_avail_idx = 0;
1939 vring_packed->avail_wrap_counter = 1;
1940 vring_packed->event_flags_shadow = 0;
1941 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1943 /* No callback? Tell other side not to bother us. */
1945 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1946 vring_packed->vring.driver->flags =
1947 cpu_to_le16(vring_packed->event_flags_shadow);
1951 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1952 struct vring_virtqueue_packed *vring_packed)
1954 vq->packed = *vring_packed;
1956 /* Put everything in free lists. */
1960 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
1962 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
1963 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
1965 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
1966 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
1968 virtqueue_init(vq, vq->packed.vring.num);
1969 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
1972 static struct virtqueue *vring_create_virtqueue_packed(
1975 unsigned int vring_align,
1976 struct virtio_device *vdev,
1978 bool may_reduce_num,
1980 bool (*notify)(struct virtqueue *),
1981 void (*callback)(struct virtqueue *),
1984 struct vring_virtqueue_packed vring_packed = {};
1985 struct vring_virtqueue *vq;
1988 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
1991 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1995 vq->vq.callback = callback;
1998 vq->vq.index = index;
1999 vq->vq.reset = false;
2000 vq->we_own_ring = true;
2001 vq->notify = notify;
2002 vq->weak_barriers = weak_barriers;
2003 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2008 vq->packed_ring = true;
2009 vq->use_dma_api = vring_use_dma_api(vdev);
2011 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2013 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2015 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2016 vq->weak_barriers = false;
2018 err = vring_alloc_state_extra_packed(&vring_packed);
2020 goto err_state_extra;
2022 virtqueue_vring_init_packed(&vring_packed, !!callback);
2024 virtqueue_init(vq, num);
2025 virtqueue_vring_attach_packed(vq, &vring_packed);
2027 spin_lock(&vdev->vqs_list_lock);
2028 list_add_tail(&vq->vq.list, &vdev->vqs);
2029 spin_unlock(&vdev->vqs_list_lock);
2035 vring_free_packed(&vring_packed, vdev);
2040 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2042 struct vring_virtqueue_packed vring_packed = {};
2043 struct vring_virtqueue *vq = to_vvq(_vq);
2044 struct virtio_device *vdev = _vq->vdev;
2047 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
2050 err = vring_alloc_state_extra_packed(&vring_packed);
2052 goto err_state_extra;
2054 vring_free(&vq->vq);
2056 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2058 virtqueue_init(vq, vring_packed.vring.num);
2059 virtqueue_vring_attach_packed(vq, &vring_packed);
2064 vring_free_packed(&vring_packed, vdev);
2066 virtqueue_reinit_packed(vq);
2072 * Generic functions and exported symbols.
2075 static inline int virtqueue_add(struct virtqueue *_vq,
2076 struct scatterlist *sgs[],
2077 unsigned int total_sg,
2078 unsigned int out_sgs,
2079 unsigned int in_sgs,
2084 struct vring_virtqueue *vq = to_vvq(_vq);
2086 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2087 out_sgs, in_sgs, data, ctx, gfp) :
2088 virtqueue_add_split(_vq, sgs, total_sg,
2089 out_sgs, in_sgs, data, ctx, gfp);
2093 * virtqueue_add_sgs - expose buffers to other end
2094 * @_vq: the struct virtqueue we're talking about.
2095 * @sgs: array of terminated scatterlists.
2096 * @out_sgs: the number of scatterlists readable by other side
2097 * @in_sgs: the number of scatterlists which are writable (after readable ones)
2098 * @data: the token identifying the buffer.
2099 * @gfp: how to do memory allocations (if necessary).
2101 * Caller must ensure we don't call this with other virtqueue operations
2102 * at the same time (except where noted).
2104 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2106 int virtqueue_add_sgs(struct virtqueue *_vq,
2107 struct scatterlist *sgs[],
2108 unsigned int out_sgs,
2109 unsigned int in_sgs,
2113 unsigned int i, total_sg = 0;
2115 /* Count them first. */
2116 for (i = 0; i < out_sgs + in_sgs; i++) {
2117 struct scatterlist *sg;
2119 for (sg = sgs[i]; sg; sg = sg_next(sg))
2122 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2125 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2128 * virtqueue_add_outbuf - expose output buffers to other end
2129 * @vq: the struct virtqueue we're talking about.
2130 * @sg: scatterlist (must be well-formed and terminated!)
2131 * @num: the number of entries in @sg readable by other side
2132 * @data: the token identifying the buffer.
2133 * @gfp: how to do memory allocations (if necessary).
2135 * Caller must ensure we don't call this with other virtqueue operations
2136 * at the same time (except where noted).
2138 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2140 int virtqueue_add_outbuf(struct virtqueue *vq,
2141 struct scatterlist *sg, unsigned int num,
2145 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2147 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2150 * virtqueue_add_inbuf - expose input buffers to other end
2151 * @vq: the struct virtqueue we're talking about.
2152 * @sg: scatterlist (must be well-formed and terminated!)
2153 * @num: the number of entries in @sg writable by other side
2154 * @data: the token identifying the buffer.
2155 * @gfp: how to do memory allocations (if necessary).
2157 * Caller must ensure we don't call this with other virtqueue operations
2158 * at the same time (except where noted).
2160 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2162 int virtqueue_add_inbuf(struct virtqueue *vq,
2163 struct scatterlist *sg, unsigned int num,
2167 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2169 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2172 * virtqueue_add_inbuf_ctx - expose input buffers to other end
2173 * @vq: the struct virtqueue we're talking about.
2174 * @sg: scatterlist (must be well-formed and terminated!)
2175 * @num: the number of entries in @sg writable by other side
2176 * @data: the token identifying the buffer.
2177 * @ctx: extra context for the token
2178 * @gfp: how to do memory allocations (if necessary).
2180 * Caller must ensure we don't call this with other virtqueue operations
2181 * at the same time (except where noted).
2183 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2185 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2186 struct scatterlist *sg, unsigned int num,
2191 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2193 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2196 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2197 * @_vq: the struct virtqueue
2199 * Instead of virtqueue_kick(), you can do:
2200 * if (virtqueue_kick_prepare(vq))
2201 * virtqueue_notify(vq);
2203 * This is sometimes useful because the virtqueue_kick_prepare() needs
2204 * to be serialized, but the actual virtqueue_notify() call does not.
2206 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2208 struct vring_virtqueue *vq = to_vvq(_vq);
2210 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2211 virtqueue_kick_prepare_split(_vq);
2213 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2216 * virtqueue_notify - second half of split virtqueue_kick call.
2217 * @_vq: the struct virtqueue
2219 * This does not need to be serialized.
2221 * Returns false if host notify failed or queue is broken, otherwise true.
2223 bool virtqueue_notify(struct virtqueue *_vq)
2225 struct vring_virtqueue *vq = to_vvq(_vq);
2227 if (unlikely(vq->broken))
2230 /* Prod other side to tell it about changes. */
2231 if (!vq->notify(_vq)) {
2237 EXPORT_SYMBOL_GPL(virtqueue_notify);
2240 * virtqueue_kick - update after add_buf
2241 * @vq: the struct virtqueue
2243 * After one or more virtqueue_add_* calls, invoke this to kick
2246 * Caller must ensure we don't call this with other virtqueue
2247 * operations at the same time (except where noted).
2249 * Returns false if kick failed, otherwise true.
2251 bool virtqueue_kick(struct virtqueue *vq)
2253 if (virtqueue_kick_prepare(vq))
2254 return virtqueue_notify(vq);
2257 EXPORT_SYMBOL_GPL(virtqueue_kick);
2260 * virtqueue_get_buf_ctx - get the next used buffer
2261 * @_vq: the struct virtqueue we're talking about.
2262 * @len: the length written into the buffer
2263 * @ctx: extra context for the token
2265 * If the device wrote data into the buffer, @len will be set to the
2266 * amount written. This means you don't need to clear the buffer
2267 * beforehand to ensure there's no data leakage in the case of short
2270 * Caller must ensure we don't call this with other virtqueue
2271 * operations at the same time (except where noted).
2273 * Returns NULL if there are no used buffers, or the "data" token
2274 * handed to virtqueue_add_*().
2276 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2279 struct vring_virtqueue *vq = to_vvq(_vq);
2281 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2282 virtqueue_get_buf_ctx_split(_vq, len, ctx);
2284 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2286 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2288 return virtqueue_get_buf_ctx(_vq, len, NULL);
2290 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2292 * virtqueue_disable_cb - disable callbacks
2293 * @_vq: the struct virtqueue we're talking about.
2295 * Note that this is not necessarily synchronous, hence unreliable and only
2296 * useful as an optimization.
2298 * Unlike other operations, this need not be serialized.
2300 void virtqueue_disable_cb(struct virtqueue *_vq)
2302 struct vring_virtqueue *vq = to_vvq(_vq);
2304 /* If device triggered an event already it won't trigger one again:
2305 * no need to disable.
2307 if (vq->event_triggered)
2310 if (vq->packed_ring)
2311 virtqueue_disable_cb_packed(_vq);
2313 virtqueue_disable_cb_split(_vq);
2315 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2318 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2319 * @_vq: the struct virtqueue we're talking about.
2321 * This re-enables callbacks; it returns current queue state
2322 * in an opaque unsigned value. This value should be later tested by
2323 * virtqueue_poll, to detect a possible race between the driver checking for
2324 * more work, and enabling callbacks.
2326 * Caller must ensure we don't call this with other virtqueue
2327 * operations at the same time (except where noted).
2329 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2331 struct vring_virtqueue *vq = to_vvq(_vq);
2333 if (vq->event_triggered)
2334 vq->event_triggered = false;
2336 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2337 virtqueue_enable_cb_prepare_split(_vq);
2339 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2342 * virtqueue_poll - query pending used buffers
2343 * @_vq: the struct virtqueue we're talking about.
2344 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2346 * Returns "true" if there are pending used buffers in the queue.
2348 * This does not need to be serialized.
2350 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2352 struct vring_virtqueue *vq = to_vvq(_vq);
2354 if (unlikely(vq->broken))
2357 virtio_mb(vq->weak_barriers);
2358 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2359 virtqueue_poll_split(_vq, last_used_idx);
2361 EXPORT_SYMBOL_GPL(virtqueue_poll);
2364 * virtqueue_enable_cb - restart callbacks after disable_cb.
2365 * @_vq: the struct virtqueue we're talking about.
2367 * This re-enables callbacks; it returns "false" if there are pending
2368 * buffers in the queue, to detect a possible race between the driver
2369 * checking for more work, and enabling callbacks.
2371 * Caller must ensure we don't call this with other virtqueue
2372 * operations at the same time (except where noted).
2374 bool virtqueue_enable_cb(struct virtqueue *_vq)
2376 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2378 return !virtqueue_poll(_vq, last_used_idx);
2380 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2383 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2384 * @_vq: the struct virtqueue we're talking about.
2386 * This re-enables callbacks but hints to the other side to delay
2387 * interrupts until most of the available buffers have been processed;
2388 * it returns "false" if there are many pending buffers in the queue,
2389 * to detect a possible race between the driver checking for more work,
2390 * and enabling callbacks.
2392 * Caller must ensure we don't call this with other virtqueue
2393 * operations at the same time (except where noted).
2395 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2397 struct vring_virtqueue *vq = to_vvq(_vq);
2399 if (vq->event_triggered)
2400 vq->event_triggered = false;
2402 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2403 virtqueue_enable_cb_delayed_split(_vq);
2405 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2408 * virtqueue_detach_unused_buf - detach first unused buffer
2409 * @_vq: the struct virtqueue we're talking about.
2411 * Returns NULL or the "data" token handed to virtqueue_add_*().
2412 * This is not valid on an active queue; it is useful for device
2413 * shutdown or the reset queue.
2415 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2417 struct vring_virtqueue *vq = to_vvq(_vq);
2419 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2420 virtqueue_detach_unused_buf_split(_vq);
2422 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2424 static inline bool more_used(const struct vring_virtqueue *vq)
2426 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2430 * vring_interrupt - notify a virtqueue on an interrupt
2431 * @irq: the IRQ number (ignored)
2432 * @_vq: the struct virtqueue to notify
2434 * Calls the callback function of @_vq to process the virtqueue
2437 irqreturn_t vring_interrupt(int irq, void *_vq)
2439 struct vring_virtqueue *vq = to_vvq(_vq);
2441 if (!more_used(vq)) {
2442 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2446 if (unlikely(vq->broken)) {
2447 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2448 dev_warn_once(&vq->vq.vdev->dev,
2449 "virtio vring IRQ raised before DRIVER_OK");
2456 /* Just a hint for performance: so it's ok that this can be racy! */
2458 vq->event_triggered = true;
2460 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2461 if (vq->vq.callback)
2462 vq->vq.callback(&vq->vq);
2466 EXPORT_SYMBOL_GPL(vring_interrupt);
2468 /* Only available for split ring */
2469 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2470 struct vring_virtqueue_split *vring_split,
2471 struct virtio_device *vdev,
2474 bool (*notify)(struct virtqueue *),
2475 void (*callback)(struct virtqueue *),
2478 struct vring_virtqueue *vq;
2481 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2484 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2488 vq->packed_ring = false;
2489 vq->vq.callback = callback;
2492 vq->vq.index = index;
2493 vq->vq.reset = false;
2494 vq->we_own_ring = false;
2495 vq->notify = notify;
2496 vq->weak_barriers = weak_barriers;
2497 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2502 vq->use_dma_api = vring_use_dma_api(vdev);
2504 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2506 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2508 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2509 vq->weak_barriers = false;
2511 err = vring_alloc_state_extra_split(vring_split);
2517 virtqueue_vring_init_split(vring_split, vq);
2519 virtqueue_init(vq, vring_split->vring.num);
2520 virtqueue_vring_attach_split(vq, vring_split);
2522 spin_lock(&vdev->vqs_list_lock);
2523 list_add_tail(&vq->vq.list, &vdev->vqs);
2524 spin_unlock(&vdev->vqs_list_lock);
2528 struct virtqueue *vring_create_virtqueue(
2531 unsigned int vring_align,
2532 struct virtio_device *vdev,
2534 bool may_reduce_num,
2536 bool (*notify)(struct virtqueue *),
2537 void (*callback)(struct virtqueue *),
2541 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2542 return vring_create_virtqueue_packed(index, num, vring_align,
2543 vdev, weak_barriers, may_reduce_num,
2544 context, notify, callback, name);
2546 return vring_create_virtqueue_split(index, num, vring_align,
2547 vdev, weak_barriers, may_reduce_num,
2548 context, notify, callback, name);
2550 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2553 * virtqueue_resize - resize the vring of vq
2554 * @_vq: the struct virtqueue we're talking about.
2555 * @num: new ring num
2556 * @recycle: callback for recycle the useless buffer
2558 * When it is really necessary to create a new vring, it will set the current vq
2559 * into the reset state. Then call the passed callback to recycle the buffer
2560 * that is no longer used. Only after the new vring is successfully created, the
2561 * old vring will be released.
2563 * Caller must ensure we don't call this with other virtqueue operations
2564 * at the same time (except where noted).
2566 * Returns zero or a negative error.
2568 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2569 * vq can still work normally
2570 * -EBUSY: Failed to sync with device, vq may not work properly
2571 * -ENOENT: Transport or device not supported
2572 * -E2BIG/-EINVAL: num error
2573 * -EPERM: Operation not permitted
2576 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2577 void (*recycle)(struct virtqueue *vq, void *buf))
2579 struct vring_virtqueue *vq = to_vvq(_vq);
2580 struct virtio_device *vdev = vq->vq.vdev;
2584 if (!vq->we_own_ring)
2587 if (num > vq->vq.num_max)
2593 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2596 if (!vdev->config->disable_vq_and_reset)
2599 if (!vdev->config->enable_vq_after_reset)
2602 err = vdev->config->disable_vq_and_reset(_vq);
2606 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2609 if (vq->packed_ring)
2610 err = virtqueue_resize_packed(_vq, num);
2612 err = virtqueue_resize_split(_vq, num);
2614 if (vdev->config->enable_vq_after_reset(_vq))
2619 EXPORT_SYMBOL_GPL(virtqueue_resize);
2621 /* Only available for split ring */
2622 struct virtqueue *vring_new_virtqueue(unsigned int index,
2624 unsigned int vring_align,
2625 struct virtio_device *vdev,
2629 bool (*notify)(struct virtqueue *vq),
2630 void (*callback)(struct virtqueue *vq),
2633 struct vring_virtqueue_split vring_split = {};
2635 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2638 vring_init(&vring_split.vring, num, pages, vring_align);
2639 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2640 context, notify, callback, name);
2642 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2644 static void vring_free(struct virtqueue *_vq)
2646 struct vring_virtqueue *vq = to_vvq(_vq);
2648 if (vq->we_own_ring) {
2649 if (vq->packed_ring) {
2650 vring_free_queue(vq->vq.vdev,
2651 vq->packed.ring_size_in_bytes,
2652 vq->packed.vring.desc,
2653 vq->packed.ring_dma_addr);
2655 vring_free_queue(vq->vq.vdev,
2656 vq->packed.event_size_in_bytes,
2657 vq->packed.vring.driver,
2658 vq->packed.driver_event_dma_addr);
2660 vring_free_queue(vq->vq.vdev,
2661 vq->packed.event_size_in_bytes,
2662 vq->packed.vring.device,
2663 vq->packed.device_event_dma_addr);
2665 kfree(vq->packed.desc_state);
2666 kfree(vq->packed.desc_extra);
2668 vring_free_queue(vq->vq.vdev,
2669 vq->split.queue_size_in_bytes,
2670 vq->split.vring.desc,
2671 vq->split.queue_dma_addr);
2674 if (!vq->packed_ring) {
2675 kfree(vq->split.desc_state);
2676 kfree(vq->split.desc_extra);
2680 void vring_del_virtqueue(struct virtqueue *_vq)
2682 struct vring_virtqueue *vq = to_vvq(_vq);
2684 spin_lock(&vq->vq.vdev->vqs_list_lock);
2685 list_del(&_vq->list);
2686 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2692 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2694 /* Manipulates transport-specific feature bits. */
2695 void vring_transport_features(struct virtio_device *vdev)
2699 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2701 case VIRTIO_RING_F_INDIRECT_DESC:
2703 case VIRTIO_RING_F_EVENT_IDX:
2705 case VIRTIO_F_VERSION_1:
2707 case VIRTIO_F_ACCESS_PLATFORM:
2709 case VIRTIO_F_RING_PACKED:
2711 case VIRTIO_F_ORDER_PLATFORM:
2714 /* We don't understand this bit. */
2715 __virtio_clear_bit(vdev, i);
2719 EXPORT_SYMBOL_GPL(vring_transport_features);
2722 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2723 * @_vq: the struct virtqueue containing the vring of interest.
2725 * Returns the size of the vring. This is mainly used for boasting to
2726 * userspace. Unlike other operations, this need not be serialized.
2728 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2731 struct vring_virtqueue *vq = to_vvq(_vq);
2733 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2735 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2738 * This function should only be called by the core, not directly by the driver.
2740 void __virtqueue_break(struct virtqueue *_vq)
2742 struct vring_virtqueue *vq = to_vvq(_vq);
2744 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2745 WRITE_ONCE(vq->broken, true);
2747 EXPORT_SYMBOL_GPL(__virtqueue_break);
2750 * This function should only be called by the core, not directly by the driver.
2752 void __virtqueue_unbreak(struct virtqueue *_vq)
2754 struct vring_virtqueue *vq = to_vvq(_vq);
2756 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2757 WRITE_ONCE(vq->broken, false);
2759 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2761 bool virtqueue_is_broken(struct virtqueue *_vq)
2763 struct vring_virtqueue *vq = to_vvq(_vq);
2765 return READ_ONCE(vq->broken);
2767 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2770 * This should prevent the device from being used, allowing drivers to
2771 * recover. You may need to grab appropriate locks to flush.
2773 void virtio_break_device(struct virtio_device *dev)
2775 struct virtqueue *_vq;
2777 spin_lock(&dev->vqs_list_lock);
2778 list_for_each_entry(_vq, &dev->vqs, list) {
2779 struct vring_virtqueue *vq = to_vvq(_vq);
2781 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2782 WRITE_ONCE(vq->broken, true);
2784 spin_unlock(&dev->vqs_list_lock);
2786 EXPORT_SYMBOL_GPL(virtio_break_device);
2789 * This should allow the device to be used by the driver. You may
2790 * need to grab appropriate locks to flush the write to
2791 * vq->broken. This should only be used in some specific case e.g
2792 * (probing and restoring). This function should only be called by the
2793 * core, not directly by the driver.
2795 void __virtio_unbreak_device(struct virtio_device *dev)
2797 struct virtqueue *_vq;
2799 spin_lock(&dev->vqs_list_lock);
2800 list_for_each_entry(_vq, &dev->vqs, list) {
2801 struct vring_virtqueue *vq = to_vvq(_vq);
2803 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2804 WRITE_ONCE(vq->broken, false);
2806 spin_unlock(&dev->vqs_list_lock);
2808 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2810 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2812 struct vring_virtqueue *vq = to_vvq(_vq);
2814 BUG_ON(!vq->we_own_ring);
2816 if (vq->packed_ring)
2817 return vq->packed.ring_dma_addr;
2819 return vq->split.queue_dma_addr;
2821 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2823 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2825 struct vring_virtqueue *vq = to_vvq(_vq);
2827 BUG_ON(!vq->we_own_ring);
2829 if (vq->packed_ring)
2830 return vq->packed.driver_event_dma_addr;
2832 return vq->split.queue_dma_addr +
2833 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2835 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2837 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2839 struct vring_virtqueue *vq = to_vvq(_vq);
2841 BUG_ON(!vq->we_own_ring);
2843 if (vq->packed_ring)
2844 return vq->packed.device_event_dma_addr;
2846 return vq->split.queue_dma_addr +
2847 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2849 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2851 /* Only available for split ring */
2852 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2854 return &to_vvq(vq)->split.vring;
2856 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2858 MODULE_LICENSE("GPL");