Merge tag 'perf-tools-for-v6.4-3-2023-05-06' of git://git.kernel.org/pub/scm/linux...
[platform/kernel/linux-rpi.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
73 };
74
75 struct vring_desc_state_packed {
76         void *data;                     /* Data for callback. */
77         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
78         u16 num;                        /* Descriptor list length. */
79         u16 last;                       /* The last desc state in a list. */
80 };
81
82 struct vring_desc_extra {
83         dma_addr_t addr;                /* Descriptor DMA addr. */
84         u32 len;                        /* Descriptor length. */
85         u16 flags;                      /* Descriptor flags. */
86         u16 next;                       /* The next desc state in a list. */
87 };
88
89 struct vring_virtqueue_split {
90         /* Actual memory layout for this queue. */
91         struct vring vring;
92
93         /* Last written value to avail->flags */
94         u16 avail_flags_shadow;
95
96         /*
97          * Last written value to avail->idx in
98          * guest byte order.
99          */
100         u16 avail_idx_shadow;
101
102         /* Per-descriptor state. */
103         struct vring_desc_state_split *desc_state;
104         struct vring_desc_extra *desc_extra;
105
106         /* DMA address and size information */
107         dma_addr_t queue_dma_addr;
108         size_t queue_size_in_bytes;
109
110         /*
111          * The parameters for creating vrings are reserved for creating new
112          * vring.
113          */
114         u32 vring_align;
115         bool may_reduce_num;
116 };
117
118 struct vring_virtqueue_packed {
119         /* Actual memory layout for this queue. */
120         struct {
121                 unsigned int num;
122                 struct vring_packed_desc *desc;
123                 struct vring_packed_desc_event *driver;
124                 struct vring_packed_desc_event *device;
125         } vring;
126
127         /* Driver ring wrap counter. */
128         bool avail_wrap_counter;
129
130         /* Avail used flags. */
131         u16 avail_used_flags;
132
133         /* Index of the next avail descriptor. */
134         u16 next_avail_idx;
135
136         /*
137          * Last written value to driver->flags in
138          * guest byte order.
139          */
140         u16 event_flags_shadow;
141
142         /* Per-descriptor state. */
143         struct vring_desc_state_packed *desc_state;
144         struct vring_desc_extra *desc_extra;
145
146         /* DMA address and size information */
147         dma_addr_t ring_dma_addr;
148         dma_addr_t driver_event_dma_addr;
149         dma_addr_t device_event_dma_addr;
150         size_t ring_size_in_bytes;
151         size_t event_size_in_bytes;
152 };
153
154 struct vring_virtqueue {
155         struct virtqueue vq;
156
157         /* Is this a packed ring? */
158         bool packed_ring;
159
160         /* Is DMA API used? */
161         bool use_dma_api;
162
163         /* Can we use weak barriers? */
164         bool weak_barriers;
165
166         /* Other side has made a mess, don't try any more. */
167         bool broken;
168
169         /* Host supports indirect buffers */
170         bool indirect;
171
172         /* Host publishes avail event idx */
173         bool event;
174
175         /* Head of free buffer list. */
176         unsigned int free_head;
177         /* Number we've added since last sync. */
178         unsigned int num_added;
179
180         /* Last used index  we've seen.
181          * for split ring, it just contains last used index
182          * for packed ring:
183          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
184          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
185          */
186         u16 last_used_idx;
187
188         /* Hint for event idx: already triggered no need to disable. */
189         bool event_triggered;
190
191         union {
192                 /* Available for split ring */
193                 struct vring_virtqueue_split split;
194
195                 /* Available for packed ring */
196                 struct vring_virtqueue_packed packed;
197         };
198
199         /* How to notify other side. FIXME: commonalize hcalls! */
200         bool (*notify)(struct virtqueue *vq);
201
202         /* DMA, allocation, and size information */
203         bool we_own_ring;
204
205         /* Device used for doing DMA */
206         struct device *dma_dev;
207
208 #ifdef DEBUG
209         /* They're supposed to lock for us. */
210         unsigned int in_use;
211
212         /* Figure out if their kicks are too delayed. */
213         bool last_add_time_valid;
214         ktime_t last_add_time;
215 #endif
216 };
217
218 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
219                                                struct vring_virtqueue_split *vring_split,
220                                                struct virtio_device *vdev,
221                                                bool weak_barriers,
222                                                bool context,
223                                                bool (*notify)(struct virtqueue *),
224                                                void (*callback)(struct virtqueue *),
225                                                const char *name,
226                                                struct device *dma_dev);
227 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
228 static void vring_free(struct virtqueue *_vq);
229
230 /*
231  * Helpers.
232  */
233
234 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
235
236 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
237                                    unsigned int total_sg)
238 {
239         /*
240          * If the host supports indirect descriptor tables, and we have multiple
241          * buffers, then go indirect. FIXME: tune this threshold
242          */
243         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
244 }
245
246 /*
247  * Modern virtio devices have feature bits to specify whether they need a
248  * quirk and bypass the IOMMU. If not there, just use the DMA API.
249  *
250  * If there, the interaction between virtio and DMA API is messy.
251  *
252  * On most systems with virtio, physical addresses match bus addresses,
253  * and it doesn't particularly matter whether we use the DMA API.
254  *
255  * On some systems, including Xen and any system with a physical device
256  * that speaks virtio behind a physical IOMMU, we must use the DMA API
257  * for virtio DMA to work at all.
258  *
259  * On other systems, including SPARC and PPC64, virtio-pci devices are
260  * enumerated as though they are behind an IOMMU, but the virtio host
261  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
262  * there or somehow map everything as the identity.
263  *
264  * For the time being, we preserve historic behavior and bypass the DMA
265  * API.
266  *
267  * TODO: install a per-device DMA ops structure that does the right thing
268  * taking into account all the above quirks, and use the DMA API
269  * unconditionally on data path.
270  */
271
272 static bool vring_use_dma_api(const struct virtio_device *vdev)
273 {
274         if (!virtio_has_dma_quirk(vdev))
275                 return true;
276
277         /* Otherwise, we are left to guess. */
278         /*
279          * In theory, it's possible to have a buggy QEMU-supposed
280          * emulated Q35 IOMMU and Xen enabled at the same time.  On
281          * such a configuration, virtio has never worked and will
282          * not work without an even larger kludge.  Instead, enable
283          * the DMA API if we're a Xen guest, which at least allows
284          * all of the sensible Xen configurations to work correctly.
285          */
286         if (xen_domain())
287                 return true;
288
289         return false;
290 }
291
292 size_t virtio_max_dma_size(const struct virtio_device *vdev)
293 {
294         size_t max_segment_size = SIZE_MAX;
295
296         if (vring_use_dma_api(vdev))
297                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
298
299         return max_segment_size;
300 }
301 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
302
303 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
304                                dma_addr_t *dma_handle, gfp_t flag,
305                                struct device *dma_dev)
306 {
307         if (vring_use_dma_api(vdev)) {
308                 return dma_alloc_coherent(dma_dev, size,
309                                           dma_handle, flag);
310         } else {
311                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
312
313                 if (queue) {
314                         phys_addr_t phys_addr = virt_to_phys(queue);
315                         *dma_handle = (dma_addr_t)phys_addr;
316
317                         /*
318                          * Sanity check: make sure we dind't truncate
319                          * the address.  The only arches I can find that
320                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
321                          * are certain non-highmem MIPS and x86
322                          * configurations, but these configurations
323                          * should never allocate physical pages above 32
324                          * bits, so this is fine.  Just in case, throw a
325                          * warning and abort if we end up with an
326                          * unrepresentable address.
327                          */
328                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
329                                 free_pages_exact(queue, PAGE_ALIGN(size));
330                                 return NULL;
331                         }
332                 }
333                 return queue;
334         }
335 }
336
337 static void vring_free_queue(struct virtio_device *vdev, size_t size,
338                              void *queue, dma_addr_t dma_handle,
339                              struct device *dma_dev)
340 {
341         if (vring_use_dma_api(vdev))
342                 dma_free_coherent(dma_dev, size, queue, dma_handle);
343         else
344                 free_pages_exact(queue, PAGE_ALIGN(size));
345 }
346
347 /*
348  * The DMA ops on various arches are rather gnarly right now, and
349  * making all of the arch DMA ops work on the vring device itself
350  * is a mess.
351  */
352 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
353 {
354         return vq->dma_dev;
355 }
356
357 /* Map one sg entry. */
358 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
359                                    struct scatterlist *sg,
360                                    enum dma_data_direction direction)
361 {
362         if (!vq->use_dma_api) {
363                 /*
364                  * If DMA is not used, KMSAN doesn't know that the scatterlist
365                  * is initialized by the hardware. Explicitly check/unpoison it
366                  * depending on the direction.
367                  */
368                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
369                 return (dma_addr_t)sg_phys(sg);
370         }
371
372         /*
373          * We can't use dma_map_sg, because we don't use scatterlists in
374          * the way it expects (we don't guarantee that the scatterlist
375          * will exist for the lifetime of the mapping).
376          */
377         return dma_map_page(vring_dma_dev(vq),
378                             sg_page(sg), sg->offset, sg->length,
379                             direction);
380 }
381
382 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
383                                    void *cpu_addr, size_t size,
384                                    enum dma_data_direction direction)
385 {
386         if (!vq->use_dma_api)
387                 return (dma_addr_t)virt_to_phys(cpu_addr);
388
389         return dma_map_single(vring_dma_dev(vq),
390                               cpu_addr, size, direction);
391 }
392
393 static int vring_mapping_error(const struct vring_virtqueue *vq,
394                                dma_addr_t addr)
395 {
396         if (!vq->use_dma_api)
397                 return 0;
398
399         return dma_mapping_error(vring_dma_dev(vq), addr);
400 }
401
402 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
403 {
404         vq->vq.num_free = num;
405
406         if (vq->packed_ring)
407                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
408         else
409                 vq->last_used_idx = 0;
410
411         vq->event_triggered = false;
412         vq->num_added = 0;
413
414 #ifdef DEBUG
415         vq->in_use = false;
416         vq->last_add_time_valid = false;
417 #endif
418 }
419
420
421 /*
422  * Split ring specific functions - *_split().
423  */
424
425 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
426                                            const struct vring_desc *desc)
427 {
428         u16 flags;
429
430         if (!vq->use_dma_api)
431                 return;
432
433         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
434
435         dma_unmap_page(vring_dma_dev(vq),
436                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
437                        virtio32_to_cpu(vq->vq.vdev, desc->len),
438                        (flags & VRING_DESC_F_WRITE) ?
439                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
440 }
441
442 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
443                                           unsigned int i)
444 {
445         struct vring_desc_extra *extra = vq->split.desc_extra;
446         u16 flags;
447
448         if (!vq->use_dma_api)
449                 goto out;
450
451         flags = extra[i].flags;
452
453         if (flags & VRING_DESC_F_INDIRECT) {
454                 dma_unmap_single(vring_dma_dev(vq),
455                                  extra[i].addr,
456                                  extra[i].len,
457                                  (flags & VRING_DESC_F_WRITE) ?
458                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
459         } else {
460                 dma_unmap_page(vring_dma_dev(vq),
461                                extra[i].addr,
462                                extra[i].len,
463                                (flags & VRING_DESC_F_WRITE) ?
464                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
465         }
466
467 out:
468         return extra[i].next;
469 }
470
471 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
472                                                unsigned int total_sg,
473                                                gfp_t gfp)
474 {
475         struct vring_desc *desc;
476         unsigned int i;
477
478         /*
479          * We require lowmem mappings for the descriptors because
480          * otherwise virt_to_phys will give us bogus addresses in the
481          * virtqueue.
482          */
483         gfp &= ~__GFP_HIGHMEM;
484
485         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
486         if (!desc)
487                 return NULL;
488
489         for (i = 0; i < total_sg; i++)
490                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
491         return desc;
492 }
493
494 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
495                                                     struct vring_desc *desc,
496                                                     unsigned int i,
497                                                     dma_addr_t addr,
498                                                     unsigned int len,
499                                                     u16 flags,
500                                                     bool indirect)
501 {
502         struct vring_virtqueue *vring = to_vvq(vq);
503         struct vring_desc_extra *extra = vring->split.desc_extra;
504         u16 next;
505
506         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
507         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
508         desc[i].len = cpu_to_virtio32(vq->vdev, len);
509
510         if (!indirect) {
511                 next = extra[i].next;
512                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
513
514                 extra[i].addr = addr;
515                 extra[i].len = len;
516                 extra[i].flags = flags;
517         } else
518                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
519
520         return next;
521 }
522
523 static inline int virtqueue_add_split(struct virtqueue *_vq,
524                                       struct scatterlist *sgs[],
525                                       unsigned int total_sg,
526                                       unsigned int out_sgs,
527                                       unsigned int in_sgs,
528                                       void *data,
529                                       void *ctx,
530                                       gfp_t gfp)
531 {
532         struct vring_virtqueue *vq = to_vvq(_vq);
533         struct scatterlist *sg;
534         struct vring_desc *desc;
535         unsigned int i, n, avail, descs_used, prev, err_idx;
536         int head;
537         bool indirect;
538
539         START_USE(vq);
540
541         BUG_ON(data == NULL);
542         BUG_ON(ctx && vq->indirect);
543
544         if (unlikely(vq->broken)) {
545                 END_USE(vq);
546                 return -EIO;
547         }
548
549         LAST_ADD_TIME_UPDATE(vq);
550
551         BUG_ON(total_sg == 0);
552
553         head = vq->free_head;
554
555         if (virtqueue_use_indirect(vq, total_sg))
556                 desc = alloc_indirect_split(_vq, total_sg, gfp);
557         else {
558                 desc = NULL;
559                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
560         }
561
562         if (desc) {
563                 /* Use a single buffer which doesn't continue */
564                 indirect = true;
565                 /* Set up rest to use this indirect table. */
566                 i = 0;
567                 descs_used = 1;
568         } else {
569                 indirect = false;
570                 desc = vq->split.vring.desc;
571                 i = head;
572                 descs_used = total_sg;
573         }
574
575         if (unlikely(vq->vq.num_free < descs_used)) {
576                 pr_debug("Can't add buf len %i - avail = %i\n",
577                          descs_used, vq->vq.num_free);
578                 /* FIXME: for historical reasons, we force a notify here if
579                  * there are outgoing parts to the buffer.  Presumably the
580                  * host should service the ring ASAP. */
581                 if (out_sgs)
582                         vq->notify(&vq->vq);
583                 if (indirect)
584                         kfree(desc);
585                 END_USE(vq);
586                 return -ENOSPC;
587         }
588
589         for (n = 0; n < out_sgs; n++) {
590                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
591                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
592                         if (vring_mapping_error(vq, addr))
593                                 goto unmap_release;
594
595                         prev = i;
596                         /* Note that we trust indirect descriptor
597                          * table since it use stream DMA mapping.
598                          */
599                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
600                                                      VRING_DESC_F_NEXT,
601                                                      indirect);
602                 }
603         }
604         for (; n < (out_sgs + in_sgs); n++) {
605                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
606                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
607                         if (vring_mapping_error(vq, addr))
608                                 goto unmap_release;
609
610                         prev = i;
611                         /* Note that we trust indirect descriptor
612                          * table since it use stream DMA mapping.
613                          */
614                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
615                                                      sg->length,
616                                                      VRING_DESC_F_NEXT |
617                                                      VRING_DESC_F_WRITE,
618                                                      indirect);
619                 }
620         }
621         /* Last one doesn't continue. */
622         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
623         if (!indirect && vq->use_dma_api)
624                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
625                         ~VRING_DESC_F_NEXT;
626
627         if (indirect) {
628                 /* Now that the indirect table is filled in, map it. */
629                 dma_addr_t addr = vring_map_single(
630                         vq, desc, total_sg * sizeof(struct vring_desc),
631                         DMA_TO_DEVICE);
632                 if (vring_mapping_error(vq, addr))
633                         goto unmap_release;
634
635                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
636                                          head, addr,
637                                          total_sg * sizeof(struct vring_desc),
638                                          VRING_DESC_F_INDIRECT,
639                                          false);
640         }
641
642         /* We're using some buffers from the free list. */
643         vq->vq.num_free -= descs_used;
644
645         /* Update free pointer */
646         if (indirect)
647                 vq->free_head = vq->split.desc_extra[head].next;
648         else
649                 vq->free_head = i;
650
651         /* Store token and indirect buffer state. */
652         vq->split.desc_state[head].data = data;
653         if (indirect)
654                 vq->split.desc_state[head].indir_desc = desc;
655         else
656                 vq->split.desc_state[head].indir_desc = ctx;
657
658         /* Put entry in available array (but don't update avail->idx until they
659          * do sync). */
660         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
661         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
662
663         /* Descriptors and available array need to be set before we expose the
664          * new available array entries. */
665         virtio_wmb(vq->weak_barriers);
666         vq->split.avail_idx_shadow++;
667         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
668                                                 vq->split.avail_idx_shadow);
669         vq->num_added++;
670
671         pr_debug("Added buffer head %i to %p\n", head, vq);
672         END_USE(vq);
673
674         /* This is very unlikely, but theoretically possible.  Kick
675          * just in case. */
676         if (unlikely(vq->num_added == (1 << 16) - 1))
677                 virtqueue_kick(_vq);
678
679         return 0;
680
681 unmap_release:
682         err_idx = i;
683
684         if (indirect)
685                 i = 0;
686         else
687                 i = head;
688
689         for (n = 0; n < total_sg; n++) {
690                 if (i == err_idx)
691                         break;
692                 if (indirect) {
693                         vring_unmap_one_split_indirect(vq, &desc[i]);
694                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
695                 } else
696                         i = vring_unmap_one_split(vq, i);
697         }
698
699         if (indirect)
700                 kfree(desc);
701
702         END_USE(vq);
703         return -ENOMEM;
704 }
705
706 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
707 {
708         struct vring_virtqueue *vq = to_vvq(_vq);
709         u16 new, old;
710         bool needs_kick;
711
712         START_USE(vq);
713         /* We need to expose available array entries before checking avail
714          * event. */
715         virtio_mb(vq->weak_barriers);
716
717         old = vq->split.avail_idx_shadow - vq->num_added;
718         new = vq->split.avail_idx_shadow;
719         vq->num_added = 0;
720
721         LAST_ADD_TIME_CHECK(vq);
722         LAST_ADD_TIME_INVALID(vq);
723
724         if (vq->event) {
725                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
726                                         vring_avail_event(&vq->split.vring)),
727                                               new, old);
728         } else {
729                 needs_kick = !(vq->split.vring.used->flags &
730                                         cpu_to_virtio16(_vq->vdev,
731                                                 VRING_USED_F_NO_NOTIFY));
732         }
733         END_USE(vq);
734         return needs_kick;
735 }
736
737 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
738                              void **ctx)
739 {
740         unsigned int i, j;
741         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
742
743         /* Clear data ptr. */
744         vq->split.desc_state[head].data = NULL;
745
746         /* Put back on free list: unmap first-level descriptors and find end */
747         i = head;
748
749         while (vq->split.vring.desc[i].flags & nextflag) {
750                 vring_unmap_one_split(vq, i);
751                 i = vq->split.desc_extra[i].next;
752                 vq->vq.num_free++;
753         }
754
755         vring_unmap_one_split(vq, i);
756         vq->split.desc_extra[i].next = vq->free_head;
757         vq->free_head = head;
758
759         /* Plus final descriptor */
760         vq->vq.num_free++;
761
762         if (vq->indirect) {
763                 struct vring_desc *indir_desc =
764                                 vq->split.desc_state[head].indir_desc;
765                 u32 len;
766
767                 /* Free the indirect table, if any, now that it's unmapped. */
768                 if (!indir_desc)
769                         return;
770
771                 len = vq->split.desc_extra[head].len;
772
773                 BUG_ON(!(vq->split.desc_extra[head].flags &
774                                 VRING_DESC_F_INDIRECT));
775                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
776
777                 for (j = 0; j < len / sizeof(struct vring_desc); j++)
778                         vring_unmap_one_split_indirect(vq, &indir_desc[j]);
779
780                 kfree(indir_desc);
781                 vq->split.desc_state[head].indir_desc = NULL;
782         } else if (ctx) {
783                 *ctx = vq->split.desc_state[head].indir_desc;
784         }
785 }
786
787 static bool more_used_split(const struct vring_virtqueue *vq)
788 {
789         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
790                         vq->split.vring.used->idx);
791 }
792
793 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
794                                          unsigned int *len,
795                                          void **ctx)
796 {
797         struct vring_virtqueue *vq = to_vvq(_vq);
798         void *ret;
799         unsigned int i;
800         u16 last_used;
801
802         START_USE(vq);
803
804         if (unlikely(vq->broken)) {
805                 END_USE(vq);
806                 return NULL;
807         }
808
809         if (!more_used_split(vq)) {
810                 pr_debug("No more buffers in queue\n");
811                 END_USE(vq);
812                 return NULL;
813         }
814
815         /* Only get used array entries after they have been exposed by host. */
816         virtio_rmb(vq->weak_barriers);
817
818         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
819         i = virtio32_to_cpu(_vq->vdev,
820                         vq->split.vring.used->ring[last_used].id);
821         *len = virtio32_to_cpu(_vq->vdev,
822                         vq->split.vring.used->ring[last_used].len);
823
824         if (unlikely(i >= vq->split.vring.num)) {
825                 BAD_RING(vq, "id %u out of range\n", i);
826                 return NULL;
827         }
828         if (unlikely(!vq->split.desc_state[i].data)) {
829                 BAD_RING(vq, "id %u is not a head!\n", i);
830                 return NULL;
831         }
832
833         /* detach_buf_split clears data, so grab it now. */
834         ret = vq->split.desc_state[i].data;
835         detach_buf_split(vq, i, ctx);
836         vq->last_used_idx++;
837         /* If we expect an interrupt for the next entry, tell host
838          * by writing event index and flush out the write before
839          * the read in the next get_buf call. */
840         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
841                 virtio_store_mb(vq->weak_barriers,
842                                 &vring_used_event(&vq->split.vring),
843                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
844
845         LAST_ADD_TIME_INVALID(vq);
846
847         END_USE(vq);
848         return ret;
849 }
850
851 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
852 {
853         struct vring_virtqueue *vq = to_vvq(_vq);
854
855         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
856                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
857
858                 /*
859                  * If device triggered an event already it won't trigger one again:
860                  * no need to disable.
861                  */
862                 if (vq->event_triggered)
863                         return;
864
865                 if (vq->event)
866                         /* TODO: this is a hack. Figure out a cleaner value to write. */
867                         vring_used_event(&vq->split.vring) = 0x0;
868                 else
869                         vq->split.vring.avail->flags =
870                                 cpu_to_virtio16(_vq->vdev,
871                                                 vq->split.avail_flags_shadow);
872         }
873 }
874
875 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
876 {
877         struct vring_virtqueue *vq = to_vvq(_vq);
878         u16 last_used_idx;
879
880         START_USE(vq);
881
882         /* We optimistically turn back on interrupts, then check if there was
883          * more to do. */
884         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
885          * either clear the flags bit or point the event index at the next
886          * entry. Always do both to keep code simple. */
887         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
888                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
889                 if (!vq->event)
890                         vq->split.vring.avail->flags =
891                                 cpu_to_virtio16(_vq->vdev,
892                                                 vq->split.avail_flags_shadow);
893         }
894         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
895                         last_used_idx = vq->last_used_idx);
896         END_USE(vq);
897         return last_used_idx;
898 }
899
900 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
901 {
902         struct vring_virtqueue *vq = to_vvq(_vq);
903
904         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
905                         vq->split.vring.used->idx);
906 }
907
908 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
909 {
910         struct vring_virtqueue *vq = to_vvq(_vq);
911         u16 bufs;
912
913         START_USE(vq);
914
915         /* We optimistically turn back on interrupts, then check if there was
916          * more to do. */
917         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
918          * either clear the flags bit or point the event index at the next
919          * entry. Always update the event index to keep code simple. */
920         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
921                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
922                 if (!vq->event)
923                         vq->split.vring.avail->flags =
924                                 cpu_to_virtio16(_vq->vdev,
925                                                 vq->split.avail_flags_shadow);
926         }
927         /* TODO: tune this threshold */
928         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
929
930         virtio_store_mb(vq->weak_barriers,
931                         &vring_used_event(&vq->split.vring),
932                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
933
934         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
935                                         - vq->last_used_idx) > bufs)) {
936                 END_USE(vq);
937                 return false;
938         }
939
940         END_USE(vq);
941         return true;
942 }
943
944 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
945 {
946         struct vring_virtqueue *vq = to_vvq(_vq);
947         unsigned int i;
948         void *buf;
949
950         START_USE(vq);
951
952         for (i = 0; i < vq->split.vring.num; i++) {
953                 if (!vq->split.desc_state[i].data)
954                         continue;
955                 /* detach_buf_split clears data, so grab it now. */
956                 buf = vq->split.desc_state[i].data;
957                 detach_buf_split(vq, i, NULL);
958                 vq->split.avail_idx_shadow--;
959                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
960                                 vq->split.avail_idx_shadow);
961                 END_USE(vq);
962                 return buf;
963         }
964         /* That should have freed everything. */
965         BUG_ON(vq->vq.num_free != vq->split.vring.num);
966
967         END_USE(vq);
968         return NULL;
969 }
970
971 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
972                                        struct vring_virtqueue *vq)
973 {
974         struct virtio_device *vdev;
975
976         vdev = vq->vq.vdev;
977
978         vring_split->avail_flags_shadow = 0;
979         vring_split->avail_idx_shadow = 0;
980
981         /* No callback?  Tell other side not to bother us. */
982         if (!vq->vq.callback) {
983                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
984                 if (!vq->event)
985                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
986                                         vring_split->avail_flags_shadow);
987         }
988 }
989
990 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
991 {
992         int num;
993
994         num = vq->split.vring.num;
995
996         vq->split.vring.avail->flags = 0;
997         vq->split.vring.avail->idx = 0;
998
999         /* reset avail event */
1000         vq->split.vring.avail->ring[num] = 0;
1001
1002         vq->split.vring.used->flags = 0;
1003         vq->split.vring.used->idx = 0;
1004
1005         /* reset used event */
1006         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1007
1008         virtqueue_init(vq, num);
1009
1010         virtqueue_vring_init_split(&vq->split, vq);
1011 }
1012
1013 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1014                                          struct vring_virtqueue_split *vring_split)
1015 {
1016         vq->split = *vring_split;
1017
1018         /* Put everything in free lists. */
1019         vq->free_head = 0;
1020 }
1021
1022 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1023 {
1024         struct vring_desc_state_split *state;
1025         struct vring_desc_extra *extra;
1026         u32 num = vring_split->vring.num;
1027
1028         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1029         if (!state)
1030                 goto err_state;
1031
1032         extra = vring_alloc_desc_extra(num);
1033         if (!extra)
1034                 goto err_extra;
1035
1036         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1037
1038         vring_split->desc_state = state;
1039         vring_split->desc_extra = extra;
1040         return 0;
1041
1042 err_extra:
1043         kfree(state);
1044 err_state:
1045         return -ENOMEM;
1046 }
1047
1048 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1049                              struct virtio_device *vdev, struct device *dma_dev)
1050 {
1051         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1052                          vring_split->vring.desc,
1053                          vring_split->queue_dma_addr,
1054                          dma_dev);
1055
1056         kfree(vring_split->desc_state);
1057         kfree(vring_split->desc_extra);
1058 }
1059
1060 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1061                                    struct virtio_device *vdev,
1062                                    u32 num,
1063                                    unsigned int vring_align,
1064                                    bool may_reduce_num,
1065                                    struct device *dma_dev)
1066 {
1067         void *queue = NULL;
1068         dma_addr_t dma_addr;
1069
1070         /* We assume num is a power of 2. */
1071         if (!is_power_of_2(num)) {
1072                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1073                 return -EINVAL;
1074         }
1075
1076         /* TODO: allocate each queue chunk individually */
1077         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1078                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1079                                           &dma_addr,
1080                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1081                                           dma_dev);
1082                 if (queue)
1083                         break;
1084                 if (!may_reduce_num)
1085                         return -ENOMEM;
1086         }
1087
1088         if (!num)
1089                 return -ENOMEM;
1090
1091         if (!queue) {
1092                 /* Try to get a single page. You are my only hope! */
1093                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1094                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1095                                           dma_dev);
1096         }
1097         if (!queue)
1098                 return -ENOMEM;
1099
1100         vring_init(&vring_split->vring, num, queue, vring_align);
1101
1102         vring_split->queue_dma_addr = dma_addr;
1103         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1104
1105         vring_split->vring_align = vring_align;
1106         vring_split->may_reduce_num = may_reduce_num;
1107
1108         return 0;
1109 }
1110
1111 static struct virtqueue *vring_create_virtqueue_split(
1112         unsigned int index,
1113         unsigned int num,
1114         unsigned int vring_align,
1115         struct virtio_device *vdev,
1116         bool weak_barriers,
1117         bool may_reduce_num,
1118         bool context,
1119         bool (*notify)(struct virtqueue *),
1120         void (*callback)(struct virtqueue *),
1121         const char *name,
1122         struct device *dma_dev)
1123 {
1124         struct vring_virtqueue_split vring_split = {};
1125         struct virtqueue *vq;
1126         int err;
1127
1128         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1129                                       may_reduce_num, dma_dev);
1130         if (err)
1131                 return NULL;
1132
1133         vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1134                                    context, notify, callback, name, dma_dev);
1135         if (!vq) {
1136                 vring_free_split(&vring_split, vdev, dma_dev);
1137                 return NULL;
1138         }
1139
1140         to_vvq(vq)->we_own_ring = true;
1141
1142         return vq;
1143 }
1144
1145 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1146 {
1147         struct vring_virtqueue_split vring_split = {};
1148         struct vring_virtqueue *vq = to_vvq(_vq);
1149         struct virtio_device *vdev = _vq->vdev;
1150         int err;
1151
1152         err = vring_alloc_queue_split(&vring_split, vdev, num,
1153                                       vq->split.vring_align,
1154                                       vq->split.may_reduce_num,
1155                                       vring_dma_dev(vq));
1156         if (err)
1157                 goto err;
1158
1159         err = vring_alloc_state_extra_split(&vring_split);
1160         if (err)
1161                 goto err_state_extra;
1162
1163         vring_free(&vq->vq);
1164
1165         virtqueue_vring_init_split(&vring_split, vq);
1166
1167         virtqueue_init(vq, vring_split.vring.num);
1168         virtqueue_vring_attach_split(vq, &vring_split);
1169
1170         return 0;
1171
1172 err_state_extra:
1173         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1174 err:
1175         virtqueue_reinit_split(vq);
1176         return -ENOMEM;
1177 }
1178
1179
1180 /*
1181  * Packed ring specific functions - *_packed().
1182  */
1183 static bool packed_used_wrap_counter(u16 last_used_idx)
1184 {
1185         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1186 }
1187
1188 static u16 packed_last_used(u16 last_used_idx)
1189 {
1190         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1191 }
1192
1193 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1194                                      const struct vring_desc_extra *extra)
1195 {
1196         u16 flags;
1197
1198         if (!vq->use_dma_api)
1199                 return;
1200
1201         flags = extra->flags;
1202
1203         if (flags & VRING_DESC_F_INDIRECT) {
1204                 dma_unmap_single(vring_dma_dev(vq),
1205                                  extra->addr, extra->len,
1206                                  (flags & VRING_DESC_F_WRITE) ?
1207                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1208         } else {
1209                 dma_unmap_page(vring_dma_dev(vq),
1210                                extra->addr, extra->len,
1211                                (flags & VRING_DESC_F_WRITE) ?
1212                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1213         }
1214 }
1215
1216 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1217                                     const struct vring_packed_desc *desc)
1218 {
1219         u16 flags;
1220
1221         if (!vq->use_dma_api)
1222                 return;
1223
1224         flags = le16_to_cpu(desc->flags);
1225
1226         dma_unmap_page(vring_dma_dev(vq),
1227                        le64_to_cpu(desc->addr),
1228                        le32_to_cpu(desc->len),
1229                        (flags & VRING_DESC_F_WRITE) ?
1230                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1231 }
1232
1233 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1234                                                        gfp_t gfp)
1235 {
1236         struct vring_packed_desc *desc;
1237
1238         /*
1239          * We require lowmem mappings for the descriptors because
1240          * otherwise virt_to_phys will give us bogus addresses in the
1241          * virtqueue.
1242          */
1243         gfp &= ~__GFP_HIGHMEM;
1244
1245         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1246
1247         return desc;
1248 }
1249
1250 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1251                                          struct scatterlist *sgs[],
1252                                          unsigned int total_sg,
1253                                          unsigned int out_sgs,
1254                                          unsigned int in_sgs,
1255                                          void *data,
1256                                          gfp_t gfp)
1257 {
1258         struct vring_packed_desc *desc;
1259         struct scatterlist *sg;
1260         unsigned int i, n, err_idx;
1261         u16 head, id;
1262         dma_addr_t addr;
1263
1264         head = vq->packed.next_avail_idx;
1265         desc = alloc_indirect_packed(total_sg, gfp);
1266         if (!desc)
1267                 return -ENOMEM;
1268
1269         if (unlikely(vq->vq.num_free < 1)) {
1270                 pr_debug("Can't add buf len 1 - avail = 0\n");
1271                 kfree(desc);
1272                 END_USE(vq);
1273                 return -ENOSPC;
1274         }
1275
1276         i = 0;
1277         id = vq->free_head;
1278         BUG_ON(id == vq->packed.vring.num);
1279
1280         for (n = 0; n < out_sgs + in_sgs; n++) {
1281                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1282                         addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1283                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1284                         if (vring_mapping_error(vq, addr))
1285                                 goto unmap_release;
1286
1287                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1288                                                 0 : VRING_DESC_F_WRITE);
1289                         desc[i].addr = cpu_to_le64(addr);
1290                         desc[i].len = cpu_to_le32(sg->length);
1291                         i++;
1292                 }
1293         }
1294
1295         /* Now that the indirect table is filled in, map it. */
1296         addr = vring_map_single(vq, desc,
1297                         total_sg * sizeof(struct vring_packed_desc),
1298                         DMA_TO_DEVICE);
1299         if (vring_mapping_error(vq, addr))
1300                 goto unmap_release;
1301
1302         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1303         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1304                                 sizeof(struct vring_packed_desc));
1305         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1306
1307         if (vq->use_dma_api) {
1308                 vq->packed.desc_extra[id].addr = addr;
1309                 vq->packed.desc_extra[id].len = total_sg *
1310                                 sizeof(struct vring_packed_desc);
1311                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1312                                                   vq->packed.avail_used_flags;
1313         }
1314
1315         /*
1316          * A driver MUST NOT make the first descriptor in the list
1317          * available before all subsequent descriptors comprising
1318          * the list are made available.
1319          */
1320         virtio_wmb(vq->weak_barriers);
1321         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1322                                                 vq->packed.avail_used_flags);
1323
1324         /* We're using some buffers from the free list. */
1325         vq->vq.num_free -= 1;
1326
1327         /* Update free pointer */
1328         n = head + 1;
1329         if (n >= vq->packed.vring.num) {
1330                 n = 0;
1331                 vq->packed.avail_wrap_counter ^= 1;
1332                 vq->packed.avail_used_flags ^=
1333                                 1 << VRING_PACKED_DESC_F_AVAIL |
1334                                 1 << VRING_PACKED_DESC_F_USED;
1335         }
1336         vq->packed.next_avail_idx = n;
1337         vq->free_head = vq->packed.desc_extra[id].next;
1338
1339         /* Store token and indirect buffer state. */
1340         vq->packed.desc_state[id].num = 1;
1341         vq->packed.desc_state[id].data = data;
1342         vq->packed.desc_state[id].indir_desc = desc;
1343         vq->packed.desc_state[id].last = id;
1344
1345         vq->num_added += 1;
1346
1347         pr_debug("Added buffer head %i to %p\n", head, vq);
1348         END_USE(vq);
1349
1350         return 0;
1351
1352 unmap_release:
1353         err_idx = i;
1354
1355         for (i = 0; i < err_idx; i++)
1356                 vring_unmap_desc_packed(vq, &desc[i]);
1357
1358         kfree(desc);
1359
1360         END_USE(vq);
1361         return -ENOMEM;
1362 }
1363
1364 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1365                                        struct scatterlist *sgs[],
1366                                        unsigned int total_sg,
1367                                        unsigned int out_sgs,
1368                                        unsigned int in_sgs,
1369                                        void *data,
1370                                        void *ctx,
1371                                        gfp_t gfp)
1372 {
1373         struct vring_virtqueue *vq = to_vvq(_vq);
1374         struct vring_packed_desc *desc;
1375         struct scatterlist *sg;
1376         unsigned int i, n, c, descs_used, err_idx;
1377         __le16 head_flags, flags;
1378         u16 head, id, prev, curr, avail_used_flags;
1379         int err;
1380
1381         START_USE(vq);
1382
1383         BUG_ON(data == NULL);
1384         BUG_ON(ctx && vq->indirect);
1385
1386         if (unlikely(vq->broken)) {
1387                 END_USE(vq);
1388                 return -EIO;
1389         }
1390
1391         LAST_ADD_TIME_UPDATE(vq);
1392
1393         BUG_ON(total_sg == 0);
1394
1395         if (virtqueue_use_indirect(vq, total_sg)) {
1396                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1397                                                     in_sgs, data, gfp);
1398                 if (err != -ENOMEM) {
1399                         END_USE(vq);
1400                         return err;
1401                 }
1402
1403                 /* fall back on direct */
1404         }
1405
1406         head = vq->packed.next_avail_idx;
1407         avail_used_flags = vq->packed.avail_used_flags;
1408
1409         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1410
1411         desc = vq->packed.vring.desc;
1412         i = head;
1413         descs_used = total_sg;
1414
1415         if (unlikely(vq->vq.num_free < descs_used)) {
1416                 pr_debug("Can't add buf len %i - avail = %i\n",
1417                          descs_used, vq->vq.num_free);
1418                 END_USE(vq);
1419                 return -ENOSPC;
1420         }
1421
1422         id = vq->free_head;
1423         BUG_ON(id == vq->packed.vring.num);
1424
1425         curr = id;
1426         c = 0;
1427         for (n = 0; n < out_sgs + in_sgs; n++) {
1428                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1429                         dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1430                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1431                         if (vring_mapping_error(vq, addr))
1432                                 goto unmap_release;
1433
1434                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1435                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1436                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1437                         if (i == head)
1438                                 head_flags = flags;
1439                         else
1440                                 desc[i].flags = flags;
1441
1442                         desc[i].addr = cpu_to_le64(addr);
1443                         desc[i].len = cpu_to_le32(sg->length);
1444                         desc[i].id = cpu_to_le16(id);
1445
1446                         if (unlikely(vq->use_dma_api)) {
1447                                 vq->packed.desc_extra[curr].addr = addr;
1448                                 vq->packed.desc_extra[curr].len = sg->length;
1449                                 vq->packed.desc_extra[curr].flags =
1450                                         le16_to_cpu(flags);
1451                         }
1452                         prev = curr;
1453                         curr = vq->packed.desc_extra[curr].next;
1454
1455                         if ((unlikely(++i >= vq->packed.vring.num))) {
1456                                 i = 0;
1457                                 vq->packed.avail_used_flags ^=
1458                                         1 << VRING_PACKED_DESC_F_AVAIL |
1459                                         1 << VRING_PACKED_DESC_F_USED;
1460                         }
1461                 }
1462         }
1463
1464         if (i < head)
1465                 vq->packed.avail_wrap_counter ^= 1;
1466
1467         /* We're using some buffers from the free list. */
1468         vq->vq.num_free -= descs_used;
1469
1470         /* Update free pointer */
1471         vq->packed.next_avail_idx = i;
1472         vq->free_head = curr;
1473
1474         /* Store token. */
1475         vq->packed.desc_state[id].num = descs_used;
1476         vq->packed.desc_state[id].data = data;
1477         vq->packed.desc_state[id].indir_desc = ctx;
1478         vq->packed.desc_state[id].last = prev;
1479
1480         /*
1481          * A driver MUST NOT make the first descriptor in the list
1482          * available before all subsequent descriptors comprising
1483          * the list are made available.
1484          */
1485         virtio_wmb(vq->weak_barriers);
1486         vq->packed.vring.desc[head].flags = head_flags;
1487         vq->num_added += descs_used;
1488
1489         pr_debug("Added buffer head %i to %p\n", head, vq);
1490         END_USE(vq);
1491
1492         return 0;
1493
1494 unmap_release:
1495         err_idx = i;
1496         i = head;
1497         curr = vq->free_head;
1498
1499         vq->packed.avail_used_flags = avail_used_flags;
1500
1501         for (n = 0; n < total_sg; n++) {
1502                 if (i == err_idx)
1503                         break;
1504                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1505                 curr = vq->packed.desc_extra[curr].next;
1506                 i++;
1507                 if (i >= vq->packed.vring.num)
1508                         i = 0;
1509         }
1510
1511         END_USE(vq);
1512         return -EIO;
1513 }
1514
1515 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1516 {
1517         struct vring_virtqueue *vq = to_vvq(_vq);
1518         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1519         bool needs_kick;
1520         union {
1521                 struct {
1522                         __le16 off_wrap;
1523                         __le16 flags;
1524                 };
1525                 u32 u32;
1526         } snapshot;
1527
1528         START_USE(vq);
1529
1530         /*
1531          * We need to expose the new flags value before checking notification
1532          * suppressions.
1533          */
1534         virtio_mb(vq->weak_barriers);
1535
1536         old = vq->packed.next_avail_idx - vq->num_added;
1537         new = vq->packed.next_avail_idx;
1538         vq->num_added = 0;
1539
1540         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1541         flags = le16_to_cpu(snapshot.flags);
1542
1543         LAST_ADD_TIME_CHECK(vq);
1544         LAST_ADD_TIME_INVALID(vq);
1545
1546         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1547                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1548                 goto out;
1549         }
1550
1551         off_wrap = le16_to_cpu(snapshot.off_wrap);
1552
1553         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1554         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1555         if (wrap_counter != vq->packed.avail_wrap_counter)
1556                 event_idx -= vq->packed.vring.num;
1557
1558         needs_kick = vring_need_event(event_idx, new, old);
1559 out:
1560         END_USE(vq);
1561         return needs_kick;
1562 }
1563
1564 static void detach_buf_packed(struct vring_virtqueue *vq,
1565                               unsigned int id, void **ctx)
1566 {
1567         struct vring_desc_state_packed *state = NULL;
1568         struct vring_packed_desc *desc;
1569         unsigned int i, curr;
1570
1571         state = &vq->packed.desc_state[id];
1572
1573         /* Clear data ptr. */
1574         state->data = NULL;
1575
1576         vq->packed.desc_extra[state->last].next = vq->free_head;
1577         vq->free_head = id;
1578         vq->vq.num_free += state->num;
1579
1580         if (unlikely(vq->use_dma_api)) {
1581                 curr = id;
1582                 for (i = 0; i < state->num; i++) {
1583                         vring_unmap_extra_packed(vq,
1584                                                  &vq->packed.desc_extra[curr]);
1585                         curr = vq->packed.desc_extra[curr].next;
1586                 }
1587         }
1588
1589         if (vq->indirect) {
1590                 u32 len;
1591
1592                 /* Free the indirect table, if any, now that it's unmapped. */
1593                 desc = state->indir_desc;
1594                 if (!desc)
1595                         return;
1596
1597                 if (vq->use_dma_api) {
1598                         len = vq->packed.desc_extra[id].len;
1599                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1600                                         i++)
1601                                 vring_unmap_desc_packed(vq, &desc[i]);
1602                 }
1603                 kfree(desc);
1604                 state->indir_desc = NULL;
1605         } else if (ctx) {
1606                 *ctx = state->indir_desc;
1607         }
1608 }
1609
1610 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1611                                        u16 idx, bool used_wrap_counter)
1612 {
1613         bool avail, used;
1614         u16 flags;
1615
1616         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1617         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1618         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1619
1620         return avail == used && used == used_wrap_counter;
1621 }
1622
1623 static bool more_used_packed(const struct vring_virtqueue *vq)
1624 {
1625         u16 last_used;
1626         u16 last_used_idx;
1627         bool used_wrap_counter;
1628
1629         last_used_idx = READ_ONCE(vq->last_used_idx);
1630         last_used = packed_last_used(last_used_idx);
1631         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1632         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1633 }
1634
1635 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1636                                           unsigned int *len,
1637                                           void **ctx)
1638 {
1639         struct vring_virtqueue *vq = to_vvq(_vq);
1640         u16 last_used, id, last_used_idx;
1641         bool used_wrap_counter;
1642         void *ret;
1643
1644         START_USE(vq);
1645
1646         if (unlikely(vq->broken)) {
1647                 END_USE(vq);
1648                 return NULL;
1649         }
1650
1651         if (!more_used_packed(vq)) {
1652                 pr_debug("No more buffers in queue\n");
1653                 END_USE(vq);
1654                 return NULL;
1655         }
1656
1657         /* Only get used elements after they have been exposed by host. */
1658         virtio_rmb(vq->weak_barriers);
1659
1660         last_used_idx = READ_ONCE(vq->last_used_idx);
1661         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1662         last_used = packed_last_used(last_used_idx);
1663         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1664         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1665
1666         if (unlikely(id >= vq->packed.vring.num)) {
1667                 BAD_RING(vq, "id %u out of range\n", id);
1668                 return NULL;
1669         }
1670         if (unlikely(!vq->packed.desc_state[id].data)) {
1671                 BAD_RING(vq, "id %u is not a head!\n", id);
1672                 return NULL;
1673         }
1674
1675         /* detach_buf_packed clears data, so grab it now. */
1676         ret = vq->packed.desc_state[id].data;
1677         detach_buf_packed(vq, id, ctx);
1678
1679         last_used += vq->packed.desc_state[id].num;
1680         if (unlikely(last_used >= vq->packed.vring.num)) {
1681                 last_used -= vq->packed.vring.num;
1682                 used_wrap_counter ^= 1;
1683         }
1684
1685         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1686         WRITE_ONCE(vq->last_used_idx, last_used);
1687
1688         /*
1689          * If we expect an interrupt for the next entry, tell host
1690          * by writing event index and flush out the write before
1691          * the read in the next get_buf call.
1692          */
1693         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1694                 virtio_store_mb(vq->weak_barriers,
1695                                 &vq->packed.vring.driver->off_wrap,
1696                                 cpu_to_le16(vq->last_used_idx));
1697
1698         LAST_ADD_TIME_INVALID(vq);
1699
1700         END_USE(vq);
1701         return ret;
1702 }
1703
1704 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1705 {
1706         struct vring_virtqueue *vq = to_vvq(_vq);
1707
1708         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1709                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1710
1711                 /*
1712                  * If device triggered an event already it won't trigger one again:
1713                  * no need to disable.
1714                  */
1715                 if (vq->event_triggered)
1716                         return;
1717
1718                 vq->packed.vring.driver->flags =
1719                         cpu_to_le16(vq->packed.event_flags_shadow);
1720         }
1721 }
1722
1723 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1724 {
1725         struct vring_virtqueue *vq = to_vvq(_vq);
1726
1727         START_USE(vq);
1728
1729         /*
1730          * We optimistically turn back on interrupts, then check if there was
1731          * more to do.
1732          */
1733
1734         if (vq->event) {
1735                 vq->packed.vring.driver->off_wrap =
1736                         cpu_to_le16(vq->last_used_idx);
1737                 /*
1738                  * We need to update event offset and event wrap
1739                  * counter first before updating event flags.
1740                  */
1741                 virtio_wmb(vq->weak_barriers);
1742         }
1743
1744         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1745                 vq->packed.event_flags_shadow = vq->event ?
1746                                 VRING_PACKED_EVENT_FLAG_DESC :
1747                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1748                 vq->packed.vring.driver->flags =
1749                                 cpu_to_le16(vq->packed.event_flags_shadow);
1750         }
1751
1752         END_USE(vq);
1753         return vq->last_used_idx;
1754 }
1755
1756 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1757 {
1758         struct vring_virtqueue *vq = to_vvq(_vq);
1759         bool wrap_counter;
1760         u16 used_idx;
1761
1762         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1763         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1764
1765         return is_used_desc_packed(vq, used_idx, wrap_counter);
1766 }
1767
1768 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1769 {
1770         struct vring_virtqueue *vq = to_vvq(_vq);
1771         u16 used_idx, wrap_counter, last_used_idx;
1772         u16 bufs;
1773
1774         START_USE(vq);
1775
1776         /*
1777          * We optimistically turn back on interrupts, then check if there was
1778          * more to do.
1779          */
1780
1781         if (vq->event) {
1782                 /* TODO: tune this threshold */
1783                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1784                 last_used_idx = READ_ONCE(vq->last_used_idx);
1785                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1786
1787                 used_idx = packed_last_used(last_used_idx) + bufs;
1788                 if (used_idx >= vq->packed.vring.num) {
1789                         used_idx -= vq->packed.vring.num;
1790                         wrap_counter ^= 1;
1791                 }
1792
1793                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1794                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1795
1796                 /*
1797                  * We need to update event offset and event wrap
1798                  * counter first before updating event flags.
1799                  */
1800                 virtio_wmb(vq->weak_barriers);
1801         }
1802
1803         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1804                 vq->packed.event_flags_shadow = vq->event ?
1805                                 VRING_PACKED_EVENT_FLAG_DESC :
1806                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1807                 vq->packed.vring.driver->flags =
1808                                 cpu_to_le16(vq->packed.event_flags_shadow);
1809         }
1810
1811         /*
1812          * We need to update event suppression structure first
1813          * before re-checking for more used buffers.
1814          */
1815         virtio_mb(vq->weak_barriers);
1816
1817         last_used_idx = READ_ONCE(vq->last_used_idx);
1818         wrap_counter = packed_used_wrap_counter(last_used_idx);
1819         used_idx = packed_last_used(last_used_idx);
1820         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1821                 END_USE(vq);
1822                 return false;
1823         }
1824
1825         END_USE(vq);
1826         return true;
1827 }
1828
1829 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1830 {
1831         struct vring_virtqueue *vq = to_vvq(_vq);
1832         unsigned int i;
1833         void *buf;
1834
1835         START_USE(vq);
1836
1837         for (i = 0; i < vq->packed.vring.num; i++) {
1838                 if (!vq->packed.desc_state[i].data)
1839                         continue;
1840                 /* detach_buf clears data, so grab it now. */
1841                 buf = vq->packed.desc_state[i].data;
1842                 detach_buf_packed(vq, i, NULL);
1843                 END_USE(vq);
1844                 return buf;
1845         }
1846         /* That should have freed everything. */
1847         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1848
1849         END_USE(vq);
1850         return NULL;
1851 }
1852
1853 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1854 {
1855         struct vring_desc_extra *desc_extra;
1856         unsigned int i;
1857
1858         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1859                                    GFP_KERNEL);
1860         if (!desc_extra)
1861                 return NULL;
1862
1863         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1864
1865         for (i = 0; i < num - 1; i++)
1866                 desc_extra[i].next = i + 1;
1867
1868         return desc_extra;
1869 }
1870
1871 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1872                               struct virtio_device *vdev,
1873                               struct device *dma_dev)
1874 {
1875         if (vring_packed->vring.desc)
1876                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1877                                  vring_packed->vring.desc,
1878                                  vring_packed->ring_dma_addr,
1879                                  dma_dev);
1880
1881         if (vring_packed->vring.driver)
1882                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1883                                  vring_packed->vring.driver,
1884                                  vring_packed->driver_event_dma_addr,
1885                                  dma_dev);
1886
1887         if (vring_packed->vring.device)
1888                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1889                                  vring_packed->vring.device,
1890                                  vring_packed->device_event_dma_addr,
1891                                  dma_dev);
1892
1893         kfree(vring_packed->desc_state);
1894         kfree(vring_packed->desc_extra);
1895 }
1896
1897 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1898                                     struct virtio_device *vdev,
1899                                     u32 num, struct device *dma_dev)
1900 {
1901         struct vring_packed_desc *ring;
1902         struct vring_packed_desc_event *driver, *device;
1903         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1904         size_t ring_size_in_bytes, event_size_in_bytes;
1905
1906         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1907
1908         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1909                                  &ring_dma_addr,
1910                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1911                                  dma_dev);
1912         if (!ring)
1913                 goto err;
1914
1915         vring_packed->vring.desc         = ring;
1916         vring_packed->ring_dma_addr      = ring_dma_addr;
1917         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1918
1919         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1920
1921         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1922                                    &driver_event_dma_addr,
1923                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1924                                    dma_dev);
1925         if (!driver)
1926                 goto err;
1927
1928         vring_packed->vring.driver          = driver;
1929         vring_packed->event_size_in_bytes   = event_size_in_bytes;
1930         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1931
1932         device = vring_alloc_queue(vdev, event_size_in_bytes,
1933                                    &device_event_dma_addr,
1934                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1935                                    dma_dev);
1936         if (!device)
1937                 goto err;
1938
1939         vring_packed->vring.device          = device;
1940         vring_packed->device_event_dma_addr = device_event_dma_addr;
1941
1942         vring_packed->vring.num = num;
1943
1944         return 0;
1945
1946 err:
1947         vring_free_packed(vring_packed, vdev, dma_dev);
1948         return -ENOMEM;
1949 }
1950
1951 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1952 {
1953         struct vring_desc_state_packed *state;
1954         struct vring_desc_extra *extra;
1955         u32 num = vring_packed->vring.num;
1956
1957         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1958         if (!state)
1959                 goto err_desc_state;
1960
1961         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1962
1963         extra = vring_alloc_desc_extra(num);
1964         if (!extra)
1965                 goto err_desc_extra;
1966
1967         vring_packed->desc_state = state;
1968         vring_packed->desc_extra = extra;
1969
1970         return 0;
1971
1972 err_desc_extra:
1973         kfree(state);
1974 err_desc_state:
1975         return -ENOMEM;
1976 }
1977
1978 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1979                                         bool callback)
1980 {
1981         vring_packed->next_avail_idx = 0;
1982         vring_packed->avail_wrap_counter = 1;
1983         vring_packed->event_flags_shadow = 0;
1984         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1985
1986         /* No callback?  Tell other side not to bother us. */
1987         if (!callback) {
1988                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1989                 vring_packed->vring.driver->flags =
1990                         cpu_to_le16(vring_packed->event_flags_shadow);
1991         }
1992 }
1993
1994 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1995                                           struct vring_virtqueue_packed *vring_packed)
1996 {
1997         vq->packed = *vring_packed;
1998
1999         /* Put everything in free lists. */
2000         vq->free_head = 0;
2001 }
2002
2003 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2004 {
2005         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2006         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2007
2008         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2009         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2010
2011         virtqueue_init(vq, vq->packed.vring.num);
2012         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2013 }
2014
2015 static struct virtqueue *vring_create_virtqueue_packed(
2016         unsigned int index,
2017         unsigned int num,
2018         unsigned int vring_align,
2019         struct virtio_device *vdev,
2020         bool weak_barriers,
2021         bool may_reduce_num,
2022         bool context,
2023         bool (*notify)(struct virtqueue *),
2024         void (*callback)(struct virtqueue *),
2025         const char *name,
2026         struct device *dma_dev)
2027 {
2028         struct vring_virtqueue_packed vring_packed = {};
2029         struct vring_virtqueue *vq;
2030         int err;
2031
2032         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2033                 goto err_ring;
2034
2035         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2036         if (!vq)
2037                 goto err_vq;
2038
2039         vq->vq.callback = callback;
2040         vq->vq.vdev = vdev;
2041         vq->vq.name = name;
2042         vq->vq.index = index;
2043         vq->vq.reset = false;
2044         vq->we_own_ring = true;
2045         vq->notify = notify;
2046         vq->weak_barriers = weak_barriers;
2047 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2048         vq->broken = true;
2049 #else
2050         vq->broken = false;
2051 #endif
2052         vq->packed_ring = true;
2053         vq->dma_dev = dma_dev;
2054         vq->use_dma_api = vring_use_dma_api(vdev);
2055
2056         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2057                 !context;
2058         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2059
2060         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2061                 vq->weak_barriers = false;
2062
2063         err = vring_alloc_state_extra_packed(&vring_packed);
2064         if (err)
2065                 goto err_state_extra;
2066
2067         virtqueue_vring_init_packed(&vring_packed, !!callback);
2068
2069         virtqueue_init(vq, num);
2070         virtqueue_vring_attach_packed(vq, &vring_packed);
2071
2072         spin_lock(&vdev->vqs_list_lock);
2073         list_add_tail(&vq->vq.list, &vdev->vqs);
2074         spin_unlock(&vdev->vqs_list_lock);
2075         return &vq->vq;
2076
2077 err_state_extra:
2078         kfree(vq);
2079 err_vq:
2080         vring_free_packed(&vring_packed, vdev, dma_dev);
2081 err_ring:
2082         return NULL;
2083 }
2084
2085 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2086 {
2087         struct vring_virtqueue_packed vring_packed = {};
2088         struct vring_virtqueue *vq = to_vvq(_vq);
2089         struct virtio_device *vdev = _vq->vdev;
2090         int err;
2091
2092         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2093                 goto err_ring;
2094
2095         err = vring_alloc_state_extra_packed(&vring_packed);
2096         if (err)
2097                 goto err_state_extra;
2098
2099         vring_free(&vq->vq);
2100
2101         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2102
2103         virtqueue_init(vq, vring_packed.vring.num);
2104         virtqueue_vring_attach_packed(vq, &vring_packed);
2105
2106         return 0;
2107
2108 err_state_extra:
2109         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2110 err_ring:
2111         virtqueue_reinit_packed(vq);
2112         return -ENOMEM;
2113 }
2114
2115
2116 /*
2117  * Generic functions and exported symbols.
2118  */
2119
2120 static inline int virtqueue_add(struct virtqueue *_vq,
2121                                 struct scatterlist *sgs[],
2122                                 unsigned int total_sg,
2123                                 unsigned int out_sgs,
2124                                 unsigned int in_sgs,
2125                                 void *data,
2126                                 void *ctx,
2127                                 gfp_t gfp)
2128 {
2129         struct vring_virtqueue *vq = to_vvq(_vq);
2130
2131         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2132                                         out_sgs, in_sgs, data, ctx, gfp) :
2133                                  virtqueue_add_split(_vq, sgs, total_sg,
2134                                         out_sgs, in_sgs, data, ctx, gfp);
2135 }
2136
2137 /**
2138  * virtqueue_add_sgs - expose buffers to other end
2139  * @_vq: the struct virtqueue we're talking about.
2140  * @sgs: array of terminated scatterlists.
2141  * @out_sgs: the number of scatterlists readable by other side
2142  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2143  * @data: the token identifying the buffer.
2144  * @gfp: how to do memory allocations (if necessary).
2145  *
2146  * Caller must ensure we don't call this with other virtqueue operations
2147  * at the same time (except where noted).
2148  *
2149  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2150  */
2151 int virtqueue_add_sgs(struct virtqueue *_vq,
2152                       struct scatterlist *sgs[],
2153                       unsigned int out_sgs,
2154                       unsigned int in_sgs,
2155                       void *data,
2156                       gfp_t gfp)
2157 {
2158         unsigned int i, total_sg = 0;
2159
2160         /* Count them first. */
2161         for (i = 0; i < out_sgs + in_sgs; i++) {
2162                 struct scatterlist *sg;
2163
2164                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2165                         total_sg++;
2166         }
2167         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2168                              data, NULL, gfp);
2169 }
2170 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2171
2172 /**
2173  * virtqueue_add_outbuf - expose output buffers to other end
2174  * @vq: the struct virtqueue we're talking about.
2175  * @sg: scatterlist (must be well-formed and terminated!)
2176  * @num: the number of entries in @sg readable by other side
2177  * @data: the token identifying the buffer.
2178  * @gfp: how to do memory allocations (if necessary).
2179  *
2180  * Caller must ensure we don't call this with other virtqueue operations
2181  * at the same time (except where noted).
2182  *
2183  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2184  */
2185 int virtqueue_add_outbuf(struct virtqueue *vq,
2186                          struct scatterlist *sg, unsigned int num,
2187                          void *data,
2188                          gfp_t gfp)
2189 {
2190         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2191 }
2192 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2193
2194 /**
2195  * virtqueue_add_inbuf - expose input buffers to other end
2196  * @vq: the struct virtqueue we're talking about.
2197  * @sg: scatterlist (must be well-formed and terminated!)
2198  * @num: the number of entries in @sg writable by other side
2199  * @data: the token identifying the buffer.
2200  * @gfp: how to do memory allocations (if necessary).
2201  *
2202  * Caller must ensure we don't call this with other virtqueue operations
2203  * at the same time (except where noted).
2204  *
2205  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2206  */
2207 int virtqueue_add_inbuf(struct virtqueue *vq,
2208                         struct scatterlist *sg, unsigned int num,
2209                         void *data,
2210                         gfp_t gfp)
2211 {
2212         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2213 }
2214 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2215
2216 /**
2217  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2218  * @vq: the struct virtqueue we're talking about.
2219  * @sg: scatterlist (must be well-formed and terminated!)
2220  * @num: the number of entries in @sg writable by other side
2221  * @data: the token identifying the buffer.
2222  * @ctx: extra context for the token
2223  * @gfp: how to do memory allocations (if necessary).
2224  *
2225  * Caller must ensure we don't call this with other virtqueue operations
2226  * at the same time (except where noted).
2227  *
2228  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2229  */
2230 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2231                         struct scatterlist *sg, unsigned int num,
2232                         void *data,
2233                         void *ctx,
2234                         gfp_t gfp)
2235 {
2236         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2237 }
2238 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2239
2240 /**
2241  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2242  * @_vq: the struct virtqueue
2243  *
2244  * Instead of virtqueue_kick(), you can do:
2245  *      if (virtqueue_kick_prepare(vq))
2246  *              virtqueue_notify(vq);
2247  *
2248  * This is sometimes useful because the virtqueue_kick_prepare() needs
2249  * to be serialized, but the actual virtqueue_notify() call does not.
2250  */
2251 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2252 {
2253         struct vring_virtqueue *vq = to_vvq(_vq);
2254
2255         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2256                                  virtqueue_kick_prepare_split(_vq);
2257 }
2258 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2259
2260 /**
2261  * virtqueue_notify - second half of split virtqueue_kick call.
2262  * @_vq: the struct virtqueue
2263  *
2264  * This does not need to be serialized.
2265  *
2266  * Returns false if host notify failed or queue is broken, otherwise true.
2267  */
2268 bool virtqueue_notify(struct virtqueue *_vq)
2269 {
2270         struct vring_virtqueue *vq = to_vvq(_vq);
2271
2272         if (unlikely(vq->broken))
2273                 return false;
2274
2275         /* Prod other side to tell it about changes. */
2276         if (!vq->notify(_vq)) {
2277                 vq->broken = true;
2278                 return false;
2279         }
2280         return true;
2281 }
2282 EXPORT_SYMBOL_GPL(virtqueue_notify);
2283
2284 /**
2285  * virtqueue_kick - update after add_buf
2286  * @vq: the struct virtqueue
2287  *
2288  * After one or more virtqueue_add_* calls, invoke this to kick
2289  * the other side.
2290  *
2291  * Caller must ensure we don't call this with other virtqueue
2292  * operations at the same time (except where noted).
2293  *
2294  * Returns false if kick failed, otherwise true.
2295  */
2296 bool virtqueue_kick(struct virtqueue *vq)
2297 {
2298         if (virtqueue_kick_prepare(vq))
2299                 return virtqueue_notify(vq);
2300         return true;
2301 }
2302 EXPORT_SYMBOL_GPL(virtqueue_kick);
2303
2304 /**
2305  * virtqueue_get_buf_ctx - get the next used buffer
2306  * @_vq: the struct virtqueue we're talking about.
2307  * @len: the length written into the buffer
2308  * @ctx: extra context for the token
2309  *
2310  * If the device wrote data into the buffer, @len will be set to the
2311  * amount written.  This means you don't need to clear the buffer
2312  * beforehand to ensure there's no data leakage in the case of short
2313  * writes.
2314  *
2315  * Caller must ensure we don't call this with other virtqueue
2316  * operations at the same time (except where noted).
2317  *
2318  * Returns NULL if there are no used buffers, or the "data" token
2319  * handed to virtqueue_add_*().
2320  */
2321 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2322                             void **ctx)
2323 {
2324         struct vring_virtqueue *vq = to_vvq(_vq);
2325
2326         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2327                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2328 }
2329 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2330
2331 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2332 {
2333         return virtqueue_get_buf_ctx(_vq, len, NULL);
2334 }
2335 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2336 /**
2337  * virtqueue_disable_cb - disable callbacks
2338  * @_vq: the struct virtqueue we're talking about.
2339  *
2340  * Note that this is not necessarily synchronous, hence unreliable and only
2341  * useful as an optimization.
2342  *
2343  * Unlike other operations, this need not be serialized.
2344  */
2345 void virtqueue_disable_cb(struct virtqueue *_vq)
2346 {
2347         struct vring_virtqueue *vq = to_vvq(_vq);
2348
2349         if (vq->packed_ring)
2350                 virtqueue_disable_cb_packed(_vq);
2351         else
2352                 virtqueue_disable_cb_split(_vq);
2353 }
2354 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2355
2356 /**
2357  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2358  * @_vq: the struct virtqueue we're talking about.
2359  *
2360  * This re-enables callbacks; it returns current queue state
2361  * in an opaque unsigned value. This value should be later tested by
2362  * virtqueue_poll, to detect a possible race between the driver checking for
2363  * more work, and enabling callbacks.
2364  *
2365  * Caller must ensure we don't call this with other virtqueue
2366  * operations at the same time (except where noted).
2367  */
2368 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2369 {
2370         struct vring_virtqueue *vq = to_vvq(_vq);
2371
2372         if (vq->event_triggered)
2373                 vq->event_triggered = false;
2374
2375         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2376                                  virtqueue_enable_cb_prepare_split(_vq);
2377 }
2378 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2379
2380 /**
2381  * virtqueue_poll - query pending used buffers
2382  * @_vq: the struct virtqueue we're talking about.
2383  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2384  *
2385  * Returns "true" if there are pending used buffers in the queue.
2386  *
2387  * This does not need to be serialized.
2388  */
2389 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2390 {
2391         struct vring_virtqueue *vq = to_vvq(_vq);
2392
2393         if (unlikely(vq->broken))
2394                 return false;
2395
2396         virtio_mb(vq->weak_barriers);
2397         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2398                                  virtqueue_poll_split(_vq, last_used_idx);
2399 }
2400 EXPORT_SYMBOL_GPL(virtqueue_poll);
2401
2402 /**
2403  * virtqueue_enable_cb - restart callbacks after disable_cb.
2404  * @_vq: the struct virtqueue we're talking about.
2405  *
2406  * This re-enables callbacks; it returns "false" if there are pending
2407  * buffers in the queue, to detect a possible race between the driver
2408  * checking for more work, and enabling callbacks.
2409  *
2410  * Caller must ensure we don't call this with other virtqueue
2411  * operations at the same time (except where noted).
2412  */
2413 bool virtqueue_enable_cb(struct virtqueue *_vq)
2414 {
2415         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2416
2417         return !virtqueue_poll(_vq, last_used_idx);
2418 }
2419 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2420
2421 /**
2422  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2423  * @_vq: the struct virtqueue we're talking about.
2424  *
2425  * This re-enables callbacks but hints to the other side to delay
2426  * interrupts until most of the available buffers have been processed;
2427  * it returns "false" if there are many pending buffers in the queue,
2428  * to detect a possible race between the driver checking for more work,
2429  * and enabling callbacks.
2430  *
2431  * Caller must ensure we don't call this with other virtqueue
2432  * operations at the same time (except where noted).
2433  */
2434 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2435 {
2436         struct vring_virtqueue *vq = to_vvq(_vq);
2437
2438         if (vq->event_triggered)
2439                 vq->event_triggered = false;
2440
2441         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2442                                  virtqueue_enable_cb_delayed_split(_vq);
2443 }
2444 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2445
2446 /**
2447  * virtqueue_detach_unused_buf - detach first unused buffer
2448  * @_vq: the struct virtqueue we're talking about.
2449  *
2450  * Returns NULL or the "data" token handed to virtqueue_add_*().
2451  * This is not valid on an active queue; it is useful for device
2452  * shutdown or the reset queue.
2453  */
2454 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2455 {
2456         struct vring_virtqueue *vq = to_vvq(_vq);
2457
2458         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2459                                  virtqueue_detach_unused_buf_split(_vq);
2460 }
2461 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2462
2463 static inline bool more_used(const struct vring_virtqueue *vq)
2464 {
2465         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2466 }
2467
2468 /**
2469  * vring_interrupt - notify a virtqueue on an interrupt
2470  * @irq: the IRQ number (ignored)
2471  * @_vq: the struct virtqueue to notify
2472  *
2473  * Calls the callback function of @_vq to process the virtqueue
2474  * notification.
2475  */
2476 irqreturn_t vring_interrupt(int irq, void *_vq)
2477 {
2478         struct vring_virtqueue *vq = to_vvq(_vq);
2479
2480         if (!more_used(vq)) {
2481                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2482                 return IRQ_NONE;
2483         }
2484
2485         if (unlikely(vq->broken)) {
2486 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2487                 dev_warn_once(&vq->vq.vdev->dev,
2488                               "virtio vring IRQ raised before DRIVER_OK");
2489                 return IRQ_NONE;
2490 #else
2491                 return IRQ_HANDLED;
2492 #endif
2493         }
2494
2495         /* Just a hint for performance: so it's ok that this can be racy! */
2496         if (vq->event)
2497                 vq->event_triggered = true;
2498
2499         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2500         if (vq->vq.callback)
2501                 vq->vq.callback(&vq->vq);
2502
2503         return IRQ_HANDLED;
2504 }
2505 EXPORT_SYMBOL_GPL(vring_interrupt);
2506
2507 /* Only available for split ring */
2508 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2509                                                struct vring_virtqueue_split *vring_split,
2510                                                struct virtio_device *vdev,
2511                                                bool weak_barriers,
2512                                                bool context,
2513                                                bool (*notify)(struct virtqueue *),
2514                                                void (*callback)(struct virtqueue *),
2515                                                const char *name,
2516                                                struct device *dma_dev)
2517 {
2518         struct vring_virtqueue *vq;
2519         int err;
2520
2521         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2522                 return NULL;
2523
2524         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2525         if (!vq)
2526                 return NULL;
2527
2528         vq->packed_ring = false;
2529         vq->vq.callback = callback;
2530         vq->vq.vdev = vdev;
2531         vq->vq.name = name;
2532         vq->vq.index = index;
2533         vq->vq.reset = false;
2534         vq->we_own_ring = false;
2535         vq->notify = notify;
2536         vq->weak_barriers = weak_barriers;
2537 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2538         vq->broken = true;
2539 #else
2540         vq->broken = false;
2541 #endif
2542         vq->dma_dev = dma_dev;
2543         vq->use_dma_api = vring_use_dma_api(vdev);
2544
2545         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2546                 !context;
2547         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2548
2549         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2550                 vq->weak_barriers = false;
2551
2552         err = vring_alloc_state_extra_split(vring_split);
2553         if (err) {
2554                 kfree(vq);
2555                 return NULL;
2556         }
2557
2558         virtqueue_vring_init_split(vring_split, vq);
2559
2560         virtqueue_init(vq, vring_split->vring.num);
2561         virtqueue_vring_attach_split(vq, vring_split);
2562
2563         spin_lock(&vdev->vqs_list_lock);
2564         list_add_tail(&vq->vq.list, &vdev->vqs);
2565         spin_unlock(&vdev->vqs_list_lock);
2566         return &vq->vq;
2567 }
2568
2569 struct virtqueue *vring_create_virtqueue(
2570         unsigned int index,
2571         unsigned int num,
2572         unsigned int vring_align,
2573         struct virtio_device *vdev,
2574         bool weak_barriers,
2575         bool may_reduce_num,
2576         bool context,
2577         bool (*notify)(struct virtqueue *),
2578         void (*callback)(struct virtqueue *),
2579         const char *name)
2580 {
2581
2582         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2583                 return vring_create_virtqueue_packed(index, num, vring_align,
2584                                 vdev, weak_barriers, may_reduce_num,
2585                                 context, notify, callback, name, vdev->dev.parent);
2586
2587         return vring_create_virtqueue_split(index, num, vring_align,
2588                         vdev, weak_barriers, may_reduce_num,
2589                         context, notify, callback, name, vdev->dev.parent);
2590 }
2591 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2592
2593 struct virtqueue *vring_create_virtqueue_dma(
2594         unsigned int index,
2595         unsigned int num,
2596         unsigned int vring_align,
2597         struct virtio_device *vdev,
2598         bool weak_barriers,
2599         bool may_reduce_num,
2600         bool context,
2601         bool (*notify)(struct virtqueue *),
2602         void (*callback)(struct virtqueue *),
2603         const char *name,
2604         struct device *dma_dev)
2605 {
2606
2607         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2608                 return vring_create_virtqueue_packed(index, num, vring_align,
2609                                 vdev, weak_barriers, may_reduce_num,
2610                                 context, notify, callback, name, dma_dev);
2611
2612         return vring_create_virtqueue_split(index, num, vring_align,
2613                         vdev, weak_barriers, may_reduce_num,
2614                         context, notify, callback, name, dma_dev);
2615 }
2616 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2617
2618 /**
2619  * virtqueue_resize - resize the vring of vq
2620  * @_vq: the struct virtqueue we're talking about.
2621  * @num: new ring num
2622  * @recycle: callback for recycle the useless buffer
2623  *
2624  * When it is really necessary to create a new vring, it will set the current vq
2625  * into the reset state. Then call the passed callback to recycle the buffer
2626  * that is no longer used. Only after the new vring is successfully created, the
2627  * old vring will be released.
2628  *
2629  * Caller must ensure we don't call this with other virtqueue operations
2630  * at the same time (except where noted).
2631  *
2632  * Returns zero or a negative error.
2633  * 0: success.
2634  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2635  *  vq can still work normally
2636  * -EBUSY: Failed to sync with device, vq may not work properly
2637  * -ENOENT: Transport or device not supported
2638  * -E2BIG/-EINVAL: num error
2639  * -EPERM: Operation not permitted
2640  *
2641  */
2642 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2643                      void (*recycle)(struct virtqueue *vq, void *buf))
2644 {
2645         struct vring_virtqueue *vq = to_vvq(_vq);
2646         struct virtio_device *vdev = vq->vq.vdev;
2647         void *buf;
2648         int err;
2649
2650         if (!vq->we_own_ring)
2651                 return -EPERM;
2652
2653         if (num > vq->vq.num_max)
2654                 return -E2BIG;
2655
2656         if (!num)
2657                 return -EINVAL;
2658
2659         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2660                 return 0;
2661
2662         if (!vdev->config->disable_vq_and_reset)
2663                 return -ENOENT;
2664
2665         if (!vdev->config->enable_vq_after_reset)
2666                 return -ENOENT;
2667
2668         err = vdev->config->disable_vq_and_reset(_vq);
2669         if (err)
2670                 return err;
2671
2672         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2673                 recycle(_vq, buf);
2674
2675         if (vq->packed_ring)
2676                 err = virtqueue_resize_packed(_vq, num);
2677         else
2678                 err = virtqueue_resize_split(_vq, num);
2679
2680         if (vdev->config->enable_vq_after_reset(_vq))
2681                 return -EBUSY;
2682
2683         return err;
2684 }
2685 EXPORT_SYMBOL_GPL(virtqueue_resize);
2686
2687 /* Only available for split ring */
2688 struct virtqueue *vring_new_virtqueue(unsigned int index,
2689                                       unsigned int num,
2690                                       unsigned int vring_align,
2691                                       struct virtio_device *vdev,
2692                                       bool weak_barriers,
2693                                       bool context,
2694                                       void *pages,
2695                                       bool (*notify)(struct virtqueue *vq),
2696                                       void (*callback)(struct virtqueue *vq),
2697                                       const char *name)
2698 {
2699         struct vring_virtqueue_split vring_split = {};
2700
2701         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2702                 return NULL;
2703
2704         vring_init(&vring_split.vring, num, pages, vring_align);
2705         return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2706                                      context, notify, callback, name,
2707                                      vdev->dev.parent);
2708 }
2709 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2710
2711 static void vring_free(struct virtqueue *_vq)
2712 {
2713         struct vring_virtqueue *vq = to_vvq(_vq);
2714
2715         if (vq->we_own_ring) {
2716                 if (vq->packed_ring) {
2717                         vring_free_queue(vq->vq.vdev,
2718                                          vq->packed.ring_size_in_bytes,
2719                                          vq->packed.vring.desc,
2720                                          vq->packed.ring_dma_addr,
2721                                          vring_dma_dev(vq));
2722
2723                         vring_free_queue(vq->vq.vdev,
2724                                          vq->packed.event_size_in_bytes,
2725                                          vq->packed.vring.driver,
2726                                          vq->packed.driver_event_dma_addr,
2727                                          vring_dma_dev(vq));
2728
2729                         vring_free_queue(vq->vq.vdev,
2730                                          vq->packed.event_size_in_bytes,
2731                                          vq->packed.vring.device,
2732                                          vq->packed.device_event_dma_addr,
2733                                          vring_dma_dev(vq));
2734
2735                         kfree(vq->packed.desc_state);
2736                         kfree(vq->packed.desc_extra);
2737                 } else {
2738                         vring_free_queue(vq->vq.vdev,
2739                                          vq->split.queue_size_in_bytes,
2740                                          vq->split.vring.desc,
2741                                          vq->split.queue_dma_addr,
2742                                          vring_dma_dev(vq));
2743                 }
2744         }
2745         if (!vq->packed_ring) {
2746                 kfree(vq->split.desc_state);
2747                 kfree(vq->split.desc_extra);
2748         }
2749 }
2750
2751 void vring_del_virtqueue(struct virtqueue *_vq)
2752 {
2753         struct vring_virtqueue *vq = to_vvq(_vq);
2754
2755         spin_lock(&vq->vq.vdev->vqs_list_lock);
2756         list_del(&_vq->list);
2757         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2758
2759         vring_free(_vq);
2760
2761         kfree(vq);
2762 }
2763 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2764
2765 u32 vring_notification_data(struct virtqueue *_vq)
2766 {
2767         struct vring_virtqueue *vq = to_vvq(_vq);
2768         u16 next;
2769
2770         if (vq->packed_ring)
2771                 next = (vq->packed.next_avail_idx &
2772                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2773                         vq->packed.avail_wrap_counter <<
2774                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2775         else
2776                 next = vq->split.avail_idx_shadow;
2777
2778         return next << 16 | _vq->index;
2779 }
2780 EXPORT_SYMBOL_GPL(vring_notification_data);
2781
2782 /* Manipulates transport-specific feature bits. */
2783 void vring_transport_features(struct virtio_device *vdev)
2784 {
2785         unsigned int i;
2786
2787         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2788                 switch (i) {
2789                 case VIRTIO_RING_F_INDIRECT_DESC:
2790                         break;
2791                 case VIRTIO_RING_F_EVENT_IDX:
2792                         break;
2793                 case VIRTIO_F_VERSION_1:
2794                         break;
2795                 case VIRTIO_F_ACCESS_PLATFORM:
2796                         break;
2797                 case VIRTIO_F_RING_PACKED:
2798                         break;
2799                 case VIRTIO_F_ORDER_PLATFORM:
2800                         break;
2801                 case VIRTIO_F_NOTIFICATION_DATA:
2802                         break;
2803                 default:
2804                         /* We don't understand this bit. */
2805                         __virtio_clear_bit(vdev, i);
2806                 }
2807         }
2808 }
2809 EXPORT_SYMBOL_GPL(vring_transport_features);
2810
2811 /**
2812  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2813  * @_vq: the struct virtqueue containing the vring of interest.
2814  *
2815  * Returns the size of the vring.  This is mainly used for boasting to
2816  * userspace.  Unlike other operations, this need not be serialized.
2817  */
2818 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2819 {
2820
2821         const struct vring_virtqueue *vq = to_vvq(_vq);
2822
2823         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2824 }
2825 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2826
2827 /*
2828  * This function should only be called by the core, not directly by the driver.
2829  */
2830 void __virtqueue_break(struct virtqueue *_vq)
2831 {
2832         struct vring_virtqueue *vq = to_vvq(_vq);
2833
2834         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2835         WRITE_ONCE(vq->broken, true);
2836 }
2837 EXPORT_SYMBOL_GPL(__virtqueue_break);
2838
2839 /*
2840  * This function should only be called by the core, not directly by the driver.
2841  */
2842 void __virtqueue_unbreak(struct virtqueue *_vq)
2843 {
2844         struct vring_virtqueue *vq = to_vvq(_vq);
2845
2846         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2847         WRITE_ONCE(vq->broken, false);
2848 }
2849 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2850
2851 bool virtqueue_is_broken(const struct virtqueue *_vq)
2852 {
2853         const struct vring_virtqueue *vq = to_vvq(_vq);
2854
2855         return READ_ONCE(vq->broken);
2856 }
2857 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2858
2859 /*
2860  * This should prevent the device from being used, allowing drivers to
2861  * recover.  You may need to grab appropriate locks to flush.
2862  */
2863 void virtio_break_device(struct virtio_device *dev)
2864 {
2865         struct virtqueue *_vq;
2866
2867         spin_lock(&dev->vqs_list_lock);
2868         list_for_each_entry(_vq, &dev->vqs, list) {
2869                 struct vring_virtqueue *vq = to_vvq(_vq);
2870
2871                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2872                 WRITE_ONCE(vq->broken, true);
2873         }
2874         spin_unlock(&dev->vqs_list_lock);
2875 }
2876 EXPORT_SYMBOL_GPL(virtio_break_device);
2877
2878 /*
2879  * This should allow the device to be used by the driver. You may
2880  * need to grab appropriate locks to flush the write to
2881  * vq->broken. This should only be used in some specific case e.g
2882  * (probing and restoring). This function should only be called by the
2883  * core, not directly by the driver.
2884  */
2885 void __virtio_unbreak_device(struct virtio_device *dev)
2886 {
2887         struct virtqueue *_vq;
2888
2889         spin_lock(&dev->vqs_list_lock);
2890         list_for_each_entry(_vq, &dev->vqs, list) {
2891                 struct vring_virtqueue *vq = to_vvq(_vq);
2892
2893                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2894                 WRITE_ONCE(vq->broken, false);
2895         }
2896         spin_unlock(&dev->vqs_list_lock);
2897 }
2898 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2899
2900 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
2901 {
2902         const struct vring_virtqueue *vq = to_vvq(_vq);
2903
2904         BUG_ON(!vq->we_own_ring);
2905
2906         if (vq->packed_ring)
2907                 return vq->packed.ring_dma_addr;
2908
2909         return vq->split.queue_dma_addr;
2910 }
2911 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2912
2913 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
2914 {
2915         const struct vring_virtqueue *vq = to_vvq(_vq);
2916
2917         BUG_ON(!vq->we_own_ring);
2918
2919         if (vq->packed_ring)
2920                 return vq->packed.driver_event_dma_addr;
2921
2922         return vq->split.queue_dma_addr +
2923                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2924 }
2925 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2926
2927 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
2928 {
2929         const struct vring_virtqueue *vq = to_vvq(_vq);
2930
2931         BUG_ON(!vq->we_own_ring);
2932
2933         if (vq->packed_ring)
2934                 return vq->packed.device_event_dma_addr;
2935
2936         return vq->split.queue_dma_addr +
2937                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2938 }
2939 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2940
2941 /* Only available for split ring */
2942 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
2943 {
2944         return &to_vvq(vq)->split.vring;
2945 }
2946 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2947
2948 MODULE_LICENSE("GPL");