virtio_ring: introduce virtqueue_dma_dev()
[platform/kernel/linux-rpi.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
73 };
74
75 struct vring_desc_state_packed {
76         void *data;                     /* Data for callback. */
77         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
78         u16 num;                        /* Descriptor list length. */
79         u16 last;                       /* The last desc state in a list. */
80 };
81
82 struct vring_desc_extra {
83         dma_addr_t addr;                /* Descriptor DMA addr. */
84         u32 len;                        /* Descriptor length. */
85         u16 flags;                      /* Descriptor flags. */
86         u16 next;                       /* The next desc state in a list. */
87 };
88
89 struct vring_virtqueue_split {
90         /* Actual memory layout for this queue. */
91         struct vring vring;
92
93         /* Last written value to avail->flags */
94         u16 avail_flags_shadow;
95
96         /*
97          * Last written value to avail->idx in
98          * guest byte order.
99          */
100         u16 avail_idx_shadow;
101
102         /* Per-descriptor state. */
103         struct vring_desc_state_split *desc_state;
104         struct vring_desc_extra *desc_extra;
105
106         /* DMA address and size information */
107         dma_addr_t queue_dma_addr;
108         size_t queue_size_in_bytes;
109
110         /*
111          * The parameters for creating vrings are reserved for creating new
112          * vring.
113          */
114         u32 vring_align;
115         bool may_reduce_num;
116 };
117
118 struct vring_virtqueue_packed {
119         /* Actual memory layout for this queue. */
120         struct {
121                 unsigned int num;
122                 struct vring_packed_desc *desc;
123                 struct vring_packed_desc_event *driver;
124                 struct vring_packed_desc_event *device;
125         } vring;
126
127         /* Driver ring wrap counter. */
128         bool avail_wrap_counter;
129
130         /* Avail used flags. */
131         u16 avail_used_flags;
132
133         /* Index of the next avail descriptor. */
134         u16 next_avail_idx;
135
136         /*
137          * Last written value to driver->flags in
138          * guest byte order.
139          */
140         u16 event_flags_shadow;
141
142         /* Per-descriptor state. */
143         struct vring_desc_state_packed *desc_state;
144         struct vring_desc_extra *desc_extra;
145
146         /* DMA address and size information */
147         dma_addr_t ring_dma_addr;
148         dma_addr_t driver_event_dma_addr;
149         dma_addr_t device_event_dma_addr;
150         size_t ring_size_in_bytes;
151         size_t event_size_in_bytes;
152 };
153
154 struct vring_virtqueue {
155         struct virtqueue vq;
156
157         /* Is this a packed ring? */
158         bool packed_ring;
159
160         /* Is DMA API used? */
161         bool use_dma_api;
162
163         /* Can we use weak barriers? */
164         bool weak_barriers;
165
166         /* Other side has made a mess, don't try any more. */
167         bool broken;
168
169         /* Host supports indirect buffers */
170         bool indirect;
171
172         /* Host publishes avail event idx */
173         bool event;
174
175         /* Do DMA mapping by driver */
176         bool premapped;
177
178         /* Head of free buffer list. */
179         unsigned int free_head;
180         /* Number we've added since last sync. */
181         unsigned int num_added;
182
183         /* Last used index  we've seen.
184          * for split ring, it just contains last used index
185          * for packed ring:
186          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
187          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
188          */
189         u16 last_used_idx;
190
191         /* Hint for event idx: already triggered no need to disable. */
192         bool event_triggered;
193
194         union {
195                 /* Available for split ring */
196                 struct vring_virtqueue_split split;
197
198                 /* Available for packed ring */
199                 struct vring_virtqueue_packed packed;
200         };
201
202         /* How to notify other side. FIXME: commonalize hcalls! */
203         bool (*notify)(struct virtqueue *vq);
204
205         /* DMA, allocation, and size information */
206         bool we_own_ring;
207
208         /* Device used for doing DMA */
209         struct device *dma_dev;
210
211 #ifdef DEBUG
212         /* They're supposed to lock for us. */
213         unsigned int in_use;
214
215         /* Figure out if their kicks are too delayed. */
216         bool last_add_time_valid;
217         ktime_t last_add_time;
218 #endif
219 };
220
221 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
222                                                struct vring_virtqueue_split *vring_split,
223                                                struct virtio_device *vdev,
224                                                bool weak_barriers,
225                                                bool context,
226                                                bool (*notify)(struct virtqueue *),
227                                                void (*callback)(struct virtqueue *),
228                                                const char *name,
229                                                struct device *dma_dev);
230 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
231 static void vring_free(struct virtqueue *_vq);
232
233 /*
234  * Helpers.
235  */
236
237 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
238
239 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
240                                    unsigned int total_sg)
241 {
242         /*
243          * If the host supports indirect descriptor tables, and we have multiple
244          * buffers, then go indirect. FIXME: tune this threshold
245          */
246         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
247 }
248
249 /*
250  * Modern virtio devices have feature bits to specify whether they need a
251  * quirk and bypass the IOMMU. If not there, just use the DMA API.
252  *
253  * If there, the interaction between virtio and DMA API is messy.
254  *
255  * On most systems with virtio, physical addresses match bus addresses,
256  * and it doesn't particularly matter whether we use the DMA API.
257  *
258  * On some systems, including Xen and any system with a physical device
259  * that speaks virtio behind a physical IOMMU, we must use the DMA API
260  * for virtio DMA to work at all.
261  *
262  * On other systems, including SPARC and PPC64, virtio-pci devices are
263  * enumerated as though they are behind an IOMMU, but the virtio host
264  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
265  * there or somehow map everything as the identity.
266  *
267  * For the time being, we preserve historic behavior and bypass the DMA
268  * API.
269  *
270  * TODO: install a per-device DMA ops structure that does the right thing
271  * taking into account all the above quirks, and use the DMA API
272  * unconditionally on data path.
273  */
274
275 static bool vring_use_dma_api(const struct virtio_device *vdev)
276 {
277         if (!virtio_has_dma_quirk(vdev))
278                 return true;
279
280         /* Otherwise, we are left to guess. */
281         /*
282          * In theory, it's possible to have a buggy QEMU-supposed
283          * emulated Q35 IOMMU and Xen enabled at the same time.  On
284          * such a configuration, virtio has never worked and will
285          * not work without an even larger kludge.  Instead, enable
286          * the DMA API if we're a Xen guest, which at least allows
287          * all of the sensible Xen configurations to work correctly.
288          */
289         if (xen_domain())
290                 return true;
291
292         return false;
293 }
294
295 size_t virtio_max_dma_size(const struct virtio_device *vdev)
296 {
297         size_t max_segment_size = SIZE_MAX;
298
299         if (vring_use_dma_api(vdev))
300                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
301
302         return max_segment_size;
303 }
304 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
305
306 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
307                                dma_addr_t *dma_handle, gfp_t flag,
308                                struct device *dma_dev)
309 {
310         if (vring_use_dma_api(vdev)) {
311                 return dma_alloc_coherent(dma_dev, size,
312                                           dma_handle, flag);
313         } else {
314                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
315
316                 if (queue) {
317                         phys_addr_t phys_addr = virt_to_phys(queue);
318                         *dma_handle = (dma_addr_t)phys_addr;
319
320                         /*
321                          * Sanity check: make sure we dind't truncate
322                          * the address.  The only arches I can find that
323                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
324                          * are certain non-highmem MIPS and x86
325                          * configurations, but these configurations
326                          * should never allocate physical pages above 32
327                          * bits, so this is fine.  Just in case, throw a
328                          * warning and abort if we end up with an
329                          * unrepresentable address.
330                          */
331                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
332                                 free_pages_exact(queue, PAGE_ALIGN(size));
333                                 return NULL;
334                         }
335                 }
336                 return queue;
337         }
338 }
339
340 static void vring_free_queue(struct virtio_device *vdev, size_t size,
341                              void *queue, dma_addr_t dma_handle,
342                              struct device *dma_dev)
343 {
344         if (vring_use_dma_api(vdev))
345                 dma_free_coherent(dma_dev, size, queue, dma_handle);
346         else
347                 free_pages_exact(queue, PAGE_ALIGN(size));
348 }
349
350 /*
351  * The DMA ops on various arches are rather gnarly right now, and
352  * making all of the arch DMA ops work on the vring device itself
353  * is a mess.
354  */
355 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
356 {
357         return vq->dma_dev;
358 }
359
360 /* Map one sg entry. */
361 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
362                             enum dma_data_direction direction, dma_addr_t *addr)
363 {
364         if (vq->premapped) {
365                 *addr = sg_dma_address(sg);
366                 return 0;
367         }
368
369         if (!vq->use_dma_api) {
370                 /*
371                  * If DMA is not used, KMSAN doesn't know that the scatterlist
372                  * is initialized by the hardware. Explicitly check/unpoison it
373                  * depending on the direction.
374                  */
375                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
376                 *addr = (dma_addr_t)sg_phys(sg);
377                 return 0;
378         }
379
380         /*
381          * We can't use dma_map_sg, because we don't use scatterlists in
382          * the way it expects (we don't guarantee that the scatterlist
383          * will exist for the lifetime of the mapping).
384          */
385         *addr = dma_map_page(vring_dma_dev(vq),
386                             sg_page(sg), sg->offset, sg->length,
387                             direction);
388
389         if (dma_mapping_error(vring_dma_dev(vq), *addr))
390                 return -ENOMEM;
391
392         return 0;
393 }
394
395 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
396                                    void *cpu_addr, size_t size,
397                                    enum dma_data_direction direction)
398 {
399         if (!vq->use_dma_api)
400                 return (dma_addr_t)virt_to_phys(cpu_addr);
401
402         return dma_map_single(vring_dma_dev(vq),
403                               cpu_addr, size, direction);
404 }
405
406 static int vring_mapping_error(const struct vring_virtqueue *vq,
407                                dma_addr_t addr)
408 {
409         if (!vq->use_dma_api)
410                 return 0;
411
412         return dma_mapping_error(vring_dma_dev(vq), addr);
413 }
414
415 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
416 {
417         vq->vq.num_free = num;
418
419         if (vq->packed_ring)
420                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
421         else
422                 vq->last_used_idx = 0;
423
424         vq->event_triggered = false;
425         vq->num_added = 0;
426
427 #ifdef DEBUG
428         vq->in_use = false;
429         vq->last_add_time_valid = false;
430 #endif
431 }
432
433
434 /*
435  * Split ring specific functions - *_split().
436  */
437
438 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
439                                            const struct vring_desc *desc)
440 {
441         u16 flags;
442
443         if (!vq->use_dma_api)
444                 return;
445
446         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
447
448         dma_unmap_page(vring_dma_dev(vq),
449                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
450                        virtio32_to_cpu(vq->vq.vdev, desc->len),
451                        (flags & VRING_DESC_F_WRITE) ?
452                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
453 }
454
455 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
456                                           unsigned int i)
457 {
458         struct vring_desc_extra *extra = vq->split.desc_extra;
459         u16 flags;
460
461         if (!vq->use_dma_api)
462                 goto out;
463
464         flags = extra[i].flags;
465
466         if (flags & VRING_DESC_F_INDIRECT) {
467                 dma_unmap_single(vring_dma_dev(vq),
468                                  extra[i].addr,
469                                  extra[i].len,
470                                  (flags & VRING_DESC_F_WRITE) ?
471                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
472         } else {
473                 dma_unmap_page(vring_dma_dev(vq),
474                                extra[i].addr,
475                                extra[i].len,
476                                (flags & VRING_DESC_F_WRITE) ?
477                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
478         }
479
480 out:
481         return extra[i].next;
482 }
483
484 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
485                                                unsigned int total_sg,
486                                                gfp_t gfp)
487 {
488         struct vring_desc *desc;
489         unsigned int i;
490
491         /*
492          * We require lowmem mappings for the descriptors because
493          * otherwise virt_to_phys will give us bogus addresses in the
494          * virtqueue.
495          */
496         gfp &= ~__GFP_HIGHMEM;
497
498         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
499         if (!desc)
500                 return NULL;
501
502         for (i = 0; i < total_sg; i++)
503                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
504         return desc;
505 }
506
507 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
508                                                     struct vring_desc *desc,
509                                                     unsigned int i,
510                                                     dma_addr_t addr,
511                                                     unsigned int len,
512                                                     u16 flags,
513                                                     bool indirect)
514 {
515         struct vring_virtqueue *vring = to_vvq(vq);
516         struct vring_desc_extra *extra = vring->split.desc_extra;
517         u16 next;
518
519         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
520         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
521         desc[i].len = cpu_to_virtio32(vq->vdev, len);
522
523         if (!indirect) {
524                 next = extra[i].next;
525                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
526
527                 extra[i].addr = addr;
528                 extra[i].len = len;
529                 extra[i].flags = flags;
530         } else
531                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
532
533         return next;
534 }
535
536 static inline int virtqueue_add_split(struct virtqueue *_vq,
537                                       struct scatterlist *sgs[],
538                                       unsigned int total_sg,
539                                       unsigned int out_sgs,
540                                       unsigned int in_sgs,
541                                       void *data,
542                                       void *ctx,
543                                       gfp_t gfp)
544 {
545         struct vring_virtqueue *vq = to_vvq(_vq);
546         struct scatterlist *sg;
547         struct vring_desc *desc;
548         unsigned int i, n, avail, descs_used, prev, err_idx;
549         int head;
550         bool indirect;
551
552         START_USE(vq);
553
554         BUG_ON(data == NULL);
555         BUG_ON(ctx && vq->indirect);
556
557         if (unlikely(vq->broken)) {
558                 END_USE(vq);
559                 return -EIO;
560         }
561
562         LAST_ADD_TIME_UPDATE(vq);
563
564         BUG_ON(total_sg == 0);
565
566         head = vq->free_head;
567
568         if (virtqueue_use_indirect(vq, total_sg))
569                 desc = alloc_indirect_split(_vq, total_sg, gfp);
570         else {
571                 desc = NULL;
572                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
573         }
574
575         if (desc) {
576                 /* Use a single buffer which doesn't continue */
577                 indirect = true;
578                 /* Set up rest to use this indirect table. */
579                 i = 0;
580                 descs_used = 1;
581         } else {
582                 indirect = false;
583                 desc = vq->split.vring.desc;
584                 i = head;
585                 descs_used = total_sg;
586         }
587
588         if (unlikely(vq->vq.num_free < descs_used)) {
589                 pr_debug("Can't add buf len %i - avail = %i\n",
590                          descs_used, vq->vq.num_free);
591                 /* FIXME: for historical reasons, we force a notify here if
592                  * there are outgoing parts to the buffer.  Presumably the
593                  * host should service the ring ASAP. */
594                 if (out_sgs)
595                         vq->notify(&vq->vq);
596                 if (indirect)
597                         kfree(desc);
598                 END_USE(vq);
599                 return -ENOSPC;
600         }
601
602         for (n = 0; n < out_sgs; n++) {
603                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
604                         dma_addr_t addr;
605
606                         if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
607                                 goto unmap_release;
608
609                         prev = i;
610                         /* Note that we trust indirect descriptor
611                          * table since it use stream DMA mapping.
612                          */
613                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
614                                                      VRING_DESC_F_NEXT,
615                                                      indirect);
616                 }
617         }
618         for (; n < (out_sgs + in_sgs); n++) {
619                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
620                         dma_addr_t addr;
621
622                         if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
623                                 goto unmap_release;
624
625                         prev = i;
626                         /* Note that we trust indirect descriptor
627                          * table since it use stream DMA mapping.
628                          */
629                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
630                                                      sg->length,
631                                                      VRING_DESC_F_NEXT |
632                                                      VRING_DESC_F_WRITE,
633                                                      indirect);
634                 }
635         }
636         /* Last one doesn't continue. */
637         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
638         if (!indirect && vq->use_dma_api)
639                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
640                         ~VRING_DESC_F_NEXT;
641
642         if (indirect) {
643                 /* Now that the indirect table is filled in, map it. */
644                 dma_addr_t addr = vring_map_single(
645                         vq, desc, total_sg * sizeof(struct vring_desc),
646                         DMA_TO_DEVICE);
647                 if (vring_mapping_error(vq, addr)) {
648                         if (vq->premapped)
649                                 goto free_indirect;
650
651                         goto unmap_release;
652                 }
653
654                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
655                                          head, addr,
656                                          total_sg * sizeof(struct vring_desc),
657                                          VRING_DESC_F_INDIRECT,
658                                          false);
659         }
660
661         /* We're using some buffers from the free list. */
662         vq->vq.num_free -= descs_used;
663
664         /* Update free pointer */
665         if (indirect)
666                 vq->free_head = vq->split.desc_extra[head].next;
667         else
668                 vq->free_head = i;
669
670         /* Store token and indirect buffer state. */
671         vq->split.desc_state[head].data = data;
672         if (indirect)
673                 vq->split.desc_state[head].indir_desc = desc;
674         else
675                 vq->split.desc_state[head].indir_desc = ctx;
676
677         /* Put entry in available array (but don't update avail->idx until they
678          * do sync). */
679         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
680         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
681
682         /* Descriptors and available array need to be set before we expose the
683          * new available array entries. */
684         virtio_wmb(vq->weak_barriers);
685         vq->split.avail_idx_shadow++;
686         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
687                                                 vq->split.avail_idx_shadow);
688         vq->num_added++;
689
690         pr_debug("Added buffer head %i to %p\n", head, vq);
691         END_USE(vq);
692
693         /* This is very unlikely, but theoretically possible.  Kick
694          * just in case. */
695         if (unlikely(vq->num_added == (1 << 16) - 1))
696                 virtqueue_kick(_vq);
697
698         return 0;
699
700 unmap_release:
701         err_idx = i;
702
703         if (indirect)
704                 i = 0;
705         else
706                 i = head;
707
708         for (n = 0; n < total_sg; n++) {
709                 if (i == err_idx)
710                         break;
711                 if (indirect) {
712                         vring_unmap_one_split_indirect(vq, &desc[i]);
713                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
714                 } else
715                         i = vring_unmap_one_split(vq, i);
716         }
717
718 free_indirect:
719         if (indirect)
720                 kfree(desc);
721
722         END_USE(vq);
723         return -ENOMEM;
724 }
725
726 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
727 {
728         struct vring_virtqueue *vq = to_vvq(_vq);
729         u16 new, old;
730         bool needs_kick;
731
732         START_USE(vq);
733         /* We need to expose available array entries before checking avail
734          * event. */
735         virtio_mb(vq->weak_barriers);
736
737         old = vq->split.avail_idx_shadow - vq->num_added;
738         new = vq->split.avail_idx_shadow;
739         vq->num_added = 0;
740
741         LAST_ADD_TIME_CHECK(vq);
742         LAST_ADD_TIME_INVALID(vq);
743
744         if (vq->event) {
745                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
746                                         vring_avail_event(&vq->split.vring)),
747                                               new, old);
748         } else {
749                 needs_kick = !(vq->split.vring.used->flags &
750                                         cpu_to_virtio16(_vq->vdev,
751                                                 VRING_USED_F_NO_NOTIFY));
752         }
753         END_USE(vq);
754         return needs_kick;
755 }
756
757 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
758                              void **ctx)
759 {
760         unsigned int i, j;
761         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
762
763         /* Clear data ptr. */
764         vq->split.desc_state[head].data = NULL;
765
766         /* Put back on free list: unmap first-level descriptors and find end */
767         i = head;
768
769         while (vq->split.vring.desc[i].flags & nextflag) {
770                 vring_unmap_one_split(vq, i);
771                 i = vq->split.desc_extra[i].next;
772                 vq->vq.num_free++;
773         }
774
775         vring_unmap_one_split(vq, i);
776         vq->split.desc_extra[i].next = vq->free_head;
777         vq->free_head = head;
778
779         /* Plus final descriptor */
780         vq->vq.num_free++;
781
782         if (vq->indirect) {
783                 struct vring_desc *indir_desc =
784                                 vq->split.desc_state[head].indir_desc;
785                 u32 len;
786
787                 /* Free the indirect table, if any, now that it's unmapped. */
788                 if (!indir_desc)
789                         return;
790
791                 len = vq->split.desc_extra[head].len;
792
793                 BUG_ON(!(vq->split.desc_extra[head].flags &
794                                 VRING_DESC_F_INDIRECT));
795                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
796
797                 if (vq->use_dma_api) {
798                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
799                                 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
800                 }
801
802                 kfree(indir_desc);
803                 vq->split.desc_state[head].indir_desc = NULL;
804         } else if (ctx) {
805                 *ctx = vq->split.desc_state[head].indir_desc;
806         }
807 }
808
809 static bool more_used_split(const struct vring_virtqueue *vq)
810 {
811         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
812                         vq->split.vring.used->idx);
813 }
814
815 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
816                                          unsigned int *len,
817                                          void **ctx)
818 {
819         struct vring_virtqueue *vq = to_vvq(_vq);
820         void *ret;
821         unsigned int i;
822         u16 last_used;
823
824         START_USE(vq);
825
826         if (unlikely(vq->broken)) {
827                 END_USE(vq);
828                 return NULL;
829         }
830
831         if (!more_used_split(vq)) {
832                 pr_debug("No more buffers in queue\n");
833                 END_USE(vq);
834                 return NULL;
835         }
836
837         /* Only get used array entries after they have been exposed by host. */
838         virtio_rmb(vq->weak_barriers);
839
840         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
841         i = virtio32_to_cpu(_vq->vdev,
842                         vq->split.vring.used->ring[last_used].id);
843         *len = virtio32_to_cpu(_vq->vdev,
844                         vq->split.vring.used->ring[last_used].len);
845
846         if (unlikely(i >= vq->split.vring.num)) {
847                 BAD_RING(vq, "id %u out of range\n", i);
848                 return NULL;
849         }
850         if (unlikely(!vq->split.desc_state[i].data)) {
851                 BAD_RING(vq, "id %u is not a head!\n", i);
852                 return NULL;
853         }
854
855         /* detach_buf_split clears data, so grab it now. */
856         ret = vq->split.desc_state[i].data;
857         detach_buf_split(vq, i, ctx);
858         vq->last_used_idx++;
859         /* If we expect an interrupt for the next entry, tell host
860          * by writing event index and flush out the write before
861          * the read in the next get_buf call. */
862         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
863                 virtio_store_mb(vq->weak_barriers,
864                                 &vring_used_event(&vq->split.vring),
865                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
866
867         LAST_ADD_TIME_INVALID(vq);
868
869         END_USE(vq);
870         return ret;
871 }
872
873 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
874 {
875         struct vring_virtqueue *vq = to_vvq(_vq);
876
877         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
878                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
879
880                 /*
881                  * If device triggered an event already it won't trigger one again:
882                  * no need to disable.
883                  */
884                 if (vq->event_triggered)
885                         return;
886
887                 if (vq->event)
888                         /* TODO: this is a hack. Figure out a cleaner value to write. */
889                         vring_used_event(&vq->split.vring) = 0x0;
890                 else
891                         vq->split.vring.avail->flags =
892                                 cpu_to_virtio16(_vq->vdev,
893                                                 vq->split.avail_flags_shadow);
894         }
895 }
896
897 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
898 {
899         struct vring_virtqueue *vq = to_vvq(_vq);
900         u16 last_used_idx;
901
902         START_USE(vq);
903
904         /* We optimistically turn back on interrupts, then check if there was
905          * more to do. */
906         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
907          * either clear the flags bit or point the event index at the next
908          * entry. Always do both to keep code simple. */
909         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
910                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
911                 if (!vq->event)
912                         vq->split.vring.avail->flags =
913                                 cpu_to_virtio16(_vq->vdev,
914                                                 vq->split.avail_flags_shadow);
915         }
916         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
917                         last_used_idx = vq->last_used_idx);
918         END_USE(vq);
919         return last_used_idx;
920 }
921
922 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
923 {
924         struct vring_virtqueue *vq = to_vvq(_vq);
925
926         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
927                         vq->split.vring.used->idx);
928 }
929
930 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
931 {
932         struct vring_virtqueue *vq = to_vvq(_vq);
933         u16 bufs;
934
935         START_USE(vq);
936
937         /* We optimistically turn back on interrupts, then check if there was
938          * more to do. */
939         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
940          * either clear the flags bit or point the event index at the next
941          * entry. Always update the event index to keep code simple. */
942         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
943                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
944                 if (!vq->event)
945                         vq->split.vring.avail->flags =
946                                 cpu_to_virtio16(_vq->vdev,
947                                                 vq->split.avail_flags_shadow);
948         }
949         /* TODO: tune this threshold */
950         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
951
952         virtio_store_mb(vq->weak_barriers,
953                         &vring_used_event(&vq->split.vring),
954                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
955
956         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
957                                         - vq->last_used_idx) > bufs)) {
958                 END_USE(vq);
959                 return false;
960         }
961
962         END_USE(vq);
963         return true;
964 }
965
966 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
967 {
968         struct vring_virtqueue *vq = to_vvq(_vq);
969         unsigned int i;
970         void *buf;
971
972         START_USE(vq);
973
974         for (i = 0; i < vq->split.vring.num; i++) {
975                 if (!vq->split.desc_state[i].data)
976                         continue;
977                 /* detach_buf_split clears data, so grab it now. */
978                 buf = vq->split.desc_state[i].data;
979                 detach_buf_split(vq, i, NULL);
980                 vq->split.avail_idx_shadow--;
981                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
982                                 vq->split.avail_idx_shadow);
983                 END_USE(vq);
984                 return buf;
985         }
986         /* That should have freed everything. */
987         BUG_ON(vq->vq.num_free != vq->split.vring.num);
988
989         END_USE(vq);
990         return NULL;
991 }
992
993 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
994                                        struct vring_virtqueue *vq)
995 {
996         struct virtio_device *vdev;
997
998         vdev = vq->vq.vdev;
999
1000         vring_split->avail_flags_shadow = 0;
1001         vring_split->avail_idx_shadow = 0;
1002
1003         /* No callback?  Tell other side not to bother us. */
1004         if (!vq->vq.callback) {
1005                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1006                 if (!vq->event)
1007                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1008                                         vring_split->avail_flags_shadow);
1009         }
1010 }
1011
1012 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1013 {
1014         int num;
1015
1016         num = vq->split.vring.num;
1017
1018         vq->split.vring.avail->flags = 0;
1019         vq->split.vring.avail->idx = 0;
1020
1021         /* reset avail event */
1022         vq->split.vring.avail->ring[num] = 0;
1023
1024         vq->split.vring.used->flags = 0;
1025         vq->split.vring.used->idx = 0;
1026
1027         /* reset used event */
1028         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1029
1030         virtqueue_init(vq, num);
1031
1032         virtqueue_vring_init_split(&vq->split, vq);
1033 }
1034
1035 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1036                                          struct vring_virtqueue_split *vring_split)
1037 {
1038         vq->split = *vring_split;
1039
1040         /* Put everything in free lists. */
1041         vq->free_head = 0;
1042 }
1043
1044 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1045 {
1046         struct vring_desc_state_split *state;
1047         struct vring_desc_extra *extra;
1048         u32 num = vring_split->vring.num;
1049
1050         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1051         if (!state)
1052                 goto err_state;
1053
1054         extra = vring_alloc_desc_extra(num);
1055         if (!extra)
1056                 goto err_extra;
1057
1058         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1059
1060         vring_split->desc_state = state;
1061         vring_split->desc_extra = extra;
1062         return 0;
1063
1064 err_extra:
1065         kfree(state);
1066 err_state:
1067         return -ENOMEM;
1068 }
1069
1070 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1071                              struct virtio_device *vdev, struct device *dma_dev)
1072 {
1073         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1074                          vring_split->vring.desc,
1075                          vring_split->queue_dma_addr,
1076                          dma_dev);
1077
1078         kfree(vring_split->desc_state);
1079         kfree(vring_split->desc_extra);
1080 }
1081
1082 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1083                                    struct virtio_device *vdev,
1084                                    u32 num,
1085                                    unsigned int vring_align,
1086                                    bool may_reduce_num,
1087                                    struct device *dma_dev)
1088 {
1089         void *queue = NULL;
1090         dma_addr_t dma_addr;
1091
1092         /* We assume num is a power of 2. */
1093         if (!is_power_of_2(num)) {
1094                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1095                 return -EINVAL;
1096         }
1097
1098         /* TODO: allocate each queue chunk individually */
1099         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1100                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1101                                           &dma_addr,
1102                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1103                                           dma_dev);
1104                 if (queue)
1105                         break;
1106                 if (!may_reduce_num)
1107                         return -ENOMEM;
1108         }
1109
1110         if (!num)
1111                 return -ENOMEM;
1112
1113         if (!queue) {
1114                 /* Try to get a single page. You are my only hope! */
1115                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1116                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1117                                           dma_dev);
1118         }
1119         if (!queue)
1120                 return -ENOMEM;
1121
1122         vring_init(&vring_split->vring, num, queue, vring_align);
1123
1124         vring_split->queue_dma_addr = dma_addr;
1125         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1126
1127         vring_split->vring_align = vring_align;
1128         vring_split->may_reduce_num = may_reduce_num;
1129
1130         return 0;
1131 }
1132
1133 static struct virtqueue *vring_create_virtqueue_split(
1134         unsigned int index,
1135         unsigned int num,
1136         unsigned int vring_align,
1137         struct virtio_device *vdev,
1138         bool weak_barriers,
1139         bool may_reduce_num,
1140         bool context,
1141         bool (*notify)(struct virtqueue *),
1142         void (*callback)(struct virtqueue *),
1143         const char *name,
1144         struct device *dma_dev)
1145 {
1146         struct vring_virtqueue_split vring_split = {};
1147         struct virtqueue *vq;
1148         int err;
1149
1150         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1151                                       may_reduce_num, dma_dev);
1152         if (err)
1153                 return NULL;
1154
1155         vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1156                                    context, notify, callback, name, dma_dev);
1157         if (!vq) {
1158                 vring_free_split(&vring_split, vdev, dma_dev);
1159                 return NULL;
1160         }
1161
1162         to_vvq(vq)->we_own_ring = true;
1163
1164         return vq;
1165 }
1166
1167 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1168 {
1169         struct vring_virtqueue_split vring_split = {};
1170         struct vring_virtqueue *vq = to_vvq(_vq);
1171         struct virtio_device *vdev = _vq->vdev;
1172         int err;
1173
1174         err = vring_alloc_queue_split(&vring_split, vdev, num,
1175                                       vq->split.vring_align,
1176                                       vq->split.may_reduce_num,
1177                                       vring_dma_dev(vq));
1178         if (err)
1179                 goto err;
1180
1181         err = vring_alloc_state_extra_split(&vring_split);
1182         if (err)
1183                 goto err_state_extra;
1184
1185         vring_free(&vq->vq);
1186
1187         virtqueue_vring_init_split(&vring_split, vq);
1188
1189         virtqueue_init(vq, vring_split.vring.num);
1190         virtqueue_vring_attach_split(vq, &vring_split);
1191
1192         return 0;
1193
1194 err_state_extra:
1195         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1196 err:
1197         virtqueue_reinit_split(vq);
1198         return -ENOMEM;
1199 }
1200
1201
1202 /*
1203  * Packed ring specific functions - *_packed().
1204  */
1205 static bool packed_used_wrap_counter(u16 last_used_idx)
1206 {
1207         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1208 }
1209
1210 static u16 packed_last_used(u16 last_used_idx)
1211 {
1212         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1213 }
1214
1215 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1216                                      const struct vring_desc_extra *extra)
1217 {
1218         u16 flags;
1219
1220         if (!vq->use_dma_api)
1221                 return;
1222
1223         flags = extra->flags;
1224
1225         if (flags & VRING_DESC_F_INDIRECT) {
1226                 dma_unmap_single(vring_dma_dev(vq),
1227                                  extra->addr, extra->len,
1228                                  (flags & VRING_DESC_F_WRITE) ?
1229                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1230         } else {
1231                 dma_unmap_page(vring_dma_dev(vq),
1232                                extra->addr, extra->len,
1233                                (flags & VRING_DESC_F_WRITE) ?
1234                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1235         }
1236 }
1237
1238 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1239                                     const struct vring_packed_desc *desc)
1240 {
1241         u16 flags;
1242
1243         if (!vq->use_dma_api)
1244                 return;
1245
1246         flags = le16_to_cpu(desc->flags);
1247
1248         dma_unmap_page(vring_dma_dev(vq),
1249                        le64_to_cpu(desc->addr),
1250                        le32_to_cpu(desc->len),
1251                        (flags & VRING_DESC_F_WRITE) ?
1252                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1253 }
1254
1255 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1256                                                        gfp_t gfp)
1257 {
1258         struct vring_packed_desc *desc;
1259
1260         /*
1261          * We require lowmem mappings for the descriptors because
1262          * otherwise virt_to_phys will give us bogus addresses in the
1263          * virtqueue.
1264          */
1265         gfp &= ~__GFP_HIGHMEM;
1266
1267         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1268
1269         return desc;
1270 }
1271
1272 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1273                                          struct scatterlist *sgs[],
1274                                          unsigned int total_sg,
1275                                          unsigned int out_sgs,
1276                                          unsigned int in_sgs,
1277                                          void *data,
1278                                          gfp_t gfp)
1279 {
1280         struct vring_packed_desc *desc;
1281         struct scatterlist *sg;
1282         unsigned int i, n, err_idx;
1283         u16 head, id;
1284         dma_addr_t addr;
1285
1286         head = vq->packed.next_avail_idx;
1287         desc = alloc_indirect_packed(total_sg, gfp);
1288         if (!desc)
1289                 return -ENOMEM;
1290
1291         if (unlikely(vq->vq.num_free < 1)) {
1292                 pr_debug("Can't add buf len 1 - avail = 0\n");
1293                 kfree(desc);
1294                 END_USE(vq);
1295                 return -ENOSPC;
1296         }
1297
1298         i = 0;
1299         id = vq->free_head;
1300         BUG_ON(id == vq->packed.vring.num);
1301
1302         for (n = 0; n < out_sgs + in_sgs; n++) {
1303                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1304                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1305                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1306                                 goto unmap_release;
1307
1308                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1309                                                 0 : VRING_DESC_F_WRITE);
1310                         desc[i].addr = cpu_to_le64(addr);
1311                         desc[i].len = cpu_to_le32(sg->length);
1312                         i++;
1313                 }
1314         }
1315
1316         /* Now that the indirect table is filled in, map it. */
1317         addr = vring_map_single(vq, desc,
1318                         total_sg * sizeof(struct vring_packed_desc),
1319                         DMA_TO_DEVICE);
1320         if (vring_mapping_error(vq, addr)) {
1321                 if (vq->premapped)
1322                         goto free_desc;
1323
1324                 goto unmap_release;
1325         }
1326
1327         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1328         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1329                                 sizeof(struct vring_packed_desc));
1330         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1331
1332         if (vq->use_dma_api) {
1333                 vq->packed.desc_extra[id].addr = addr;
1334                 vq->packed.desc_extra[id].len = total_sg *
1335                                 sizeof(struct vring_packed_desc);
1336                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1337                                                   vq->packed.avail_used_flags;
1338         }
1339
1340         /*
1341          * A driver MUST NOT make the first descriptor in the list
1342          * available before all subsequent descriptors comprising
1343          * the list are made available.
1344          */
1345         virtio_wmb(vq->weak_barriers);
1346         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1347                                                 vq->packed.avail_used_flags);
1348
1349         /* We're using some buffers from the free list. */
1350         vq->vq.num_free -= 1;
1351
1352         /* Update free pointer */
1353         n = head + 1;
1354         if (n >= vq->packed.vring.num) {
1355                 n = 0;
1356                 vq->packed.avail_wrap_counter ^= 1;
1357                 vq->packed.avail_used_flags ^=
1358                                 1 << VRING_PACKED_DESC_F_AVAIL |
1359                                 1 << VRING_PACKED_DESC_F_USED;
1360         }
1361         vq->packed.next_avail_idx = n;
1362         vq->free_head = vq->packed.desc_extra[id].next;
1363
1364         /* Store token and indirect buffer state. */
1365         vq->packed.desc_state[id].num = 1;
1366         vq->packed.desc_state[id].data = data;
1367         vq->packed.desc_state[id].indir_desc = desc;
1368         vq->packed.desc_state[id].last = id;
1369
1370         vq->num_added += 1;
1371
1372         pr_debug("Added buffer head %i to %p\n", head, vq);
1373         END_USE(vq);
1374
1375         return 0;
1376
1377 unmap_release:
1378         err_idx = i;
1379
1380         for (i = 0; i < err_idx; i++)
1381                 vring_unmap_desc_packed(vq, &desc[i]);
1382
1383 free_desc:
1384         kfree(desc);
1385
1386         END_USE(vq);
1387         return -ENOMEM;
1388 }
1389
1390 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1391                                        struct scatterlist *sgs[],
1392                                        unsigned int total_sg,
1393                                        unsigned int out_sgs,
1394                                        unsigned int in_sgs,
1395                                        void *data,
1396                                        void *ctx,
1397                                        gfp_t gfp)
1398 {
1399         struct vring_virtqueue *vq = to_vvq(_vq);
1400         struct vring_packed_desc *desc;
1401         struct scatterlist *sg;
1402         unsigned int i, n, c, descs_used, err_idx;
1403         __le16 head_flags, flags;
1404         u16 head, id, prev, curr, avail_used_flags;
1405         int err;
1406
1407         START_USE(vq);
1408
1409         BUG_ON(data == NULL);
1410         BUG_ON(ctx && vq->indirect);
1411
1412         if (unlikely(vq->broken)) {
1413                 END_USE(vq);
1414                 return -EIO;
1415         }
1416
1417         LAST_ADD_TIME_UPDATE(vq);
1418
1419         BUG_ON(total_sg == 0);
1420
1421         if (virtqueue_use_indirect(vq, total_sg)) {
1422                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1423                                                     in_sgs, data, gfp);
1424                 if (err != -ENOMEM) {
1425                         END_USE(vq);
1426                         return err;
1427                 }
1428
1429                 /* fall back on direct */
1430         }
1431
1432         head = vq->packed.next_avail_idx;
1433         avail_used_flags = vq->packed.avail_used_flags;
1434
1435         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1436
1437         desc = vq->packed.vring.desc;
1438         i = head;
1439         descs_used = total_sg;
1440
1441         if (unlikely(vq->vq.num_free < descs_used)) {
1442                 pr_debug("Can't add buf len %i - avail = %i\n",
1443                          descs_used, vq->vq.num_free);
1444                 END_USE(vq);
1445                 return -ENOSPC;
1446         }
1447
1448         id = vq->free_head;
1449         BUG_ON(id == vq->packed.vring.num);
1450
1451         curr = id;
1452         c = 0;
1453         for (n = 0; n < out_sgs + in_sgs; n++) {
1454                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1455                         dma_addr_t addr;
1456
1457                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1458                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1459                                 goto unmap_release;
1460
1461                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1462                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1463                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1464                         if (i == head)
1465                                 head_flags = flags;
1466                         else
1467                                 desc[i].flags = flags;
1468
1469                         desc[i].addr = cpu_to_le64(addr);
1470                         desc[i].len = cpu_to_le32(sg->length);
1471                         desc[i].id = cpu_to_le16(id);
1472
1473                         if (unlikely(vq->use_dma_api)) {
1474                                 vq->packed.desc_extra[curr].addr = addr;
1475                                 vq->packed.desc_extra[curr].len = sg->length;
1476                                 vq->packed.desc_extra[curr].flags =
1477                                         le16_to_cpu(flags);
1478                         }
1479                         prev = curr;
1480                         curr = vq->packed.desc_extra[curr].next;
1481
1482                         if ((unlikely(++i >= vq->packed.vring.num))) {
1483                                 i = 0;
1484                                 vq->packed.avail_used_flags ^=
1485                                         1 << VRING_PACKED_DESC_F_AVAIL |
1486                                         1 << VRING_PACKED_DESC_F_USED;
1487                         }
1488                 }
1489         }
1490
1491         if (i < head)
1492                 vq->packed.avail_wrap_counter ^= 1;
1493
1494         /* We're using some buffers from the free list. */
1495         vq->vq.num_free -= descs_used;
1496
1497         /* Update free pointer */
1498         vq->packed.next_avail_idx = i;
1499         vq->free_head = curr;
1500
1501         /* Store token. */
1502         vq->packed.desc_state[id].num = descs_used;
1503         vq->packed.desc_state[id].data = data;
1504         vq->packed.desc_state[id].indir_desc = ctx;
1505         vq->packed.desc_state[id].last = prev;
1506
1507         /*
1508          * A driver MUST NOT make the first descriptor in the list
1509          * available before all subsequent descriptors comprising
1510          * the list are made available.
1511          */
1512         virtio_wmb(vq->weak_barriers);
1513         vq->packed.vring.desc[head].flags = head_flags;
1514         vq->num_added += descs_used;
1515
1516         pr_debug("Added buffer head %i to %p\n", head, vq);
1517         END_USE(vq);
1518
1519         return 0;
1520
1521 unmap_release:
1522         err_idx = i;
1523         i = head;
1524         curr = vq->free_head;
1525
1526         vq->packed.avail_used_flags = avail_used_flags;
1527
1528         for (n = 0; n < total_sg; n++) {
1529                 if (i == err_idx)
1530                         break;
1531                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1532                 curr = vq->packed.desc_extra[curr].next;
1533                 i++;
1534                 if (i >= vq->packed.vring.num)
1535                         i = 0;
1536         }
1537
1538         END_USE(vq);
1539         return -EIO;
1540 }
1541
1542 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1543 {
1544         struct vring_virtqueue *vq = to_vvq(_vq);
1545         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1546         bool needs_kick;
1547         union {
1548                 struct {
1549                         __le16 off_wrap;
1550                         __le16 flags;
1551                 };
1552                 u32 u32;
1553         } snapshot;
1554
1555         START_USE(vq);
1556
1557         /*
1558          * We need to expose the new flags value before checking notification
1559          * suppressions.
1560          */
1561         virtio_mb(vq->weak_barriers);
1562
1563         old = vq->packed.next_avail_idx - vq->num_added;
1564         new = vq->packed.next_avail_idx;
1565         vq->num_added = 0;
1566
1567         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1568         flags = le16_to_cpu(snapshot.flags);
1569
1570         LAST_ADD_TIME_CHECK(vq);
1571         LAST_ADD_TIME_INVALID(vq);
1572
1573         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1574                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1575                 goto out;
1576         }
1577
1578         off_wrap = le16_to_cpu(snapshot.off_wrap);
1579
1580         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1581         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1582         if (wrap_counter != vq->packed.avail_wrap_counter)
1583                 event_idx -= vq->packed.vring.num;
1584
1585         needs_kick = vring_need_event(event_idx, new, old);
1586 out:
1587         END_USE(vq);
1588         return needs_kick;
1589 }
1590
1591 static void detach_buf_packed(struct vring_virtqueue *vq,
1592                               unsigned int id, void **ctx)
1593 {
1594         struct vring_desc_state_packed *state = NULL;
1595         struct vring_packed_desc *desc;
1596         unsigned int i, curr;
1597
1598         state = &vq->packed.desc_state[id];
1599
1600         /* Clear data ptr. */
1601         state->data = NULL;
1602
1603         vq->packed.desc_extra[state->last].next = vq->free_head;
1604         vq->free_head = id;
1605         vq->vq.num_free += state->num;
1606
1607         if (unlikely(vq->use_dma_api)) {
1608                 curr = id;
1609                 for (i = 0; i < state->num; i++) {
1610                         vring_unmap_extra_packed(vq,
1611                                                  &vq->packed.desc_extra[curr]);
1612                         curr = vq->packed.desc_extra[curr].next;
1613                 }
1614         }
1615
1616         if (vq->indirect) {
1617                 u32 len;
1618
1619                 /* Free the indirect table, if any, now that it's unmapped. */
1620                 desc = state->indir_desc;
1621                 if (!desc)
1622                         return;
1623
1624                 if (vq->use_dma_api) {
1625                         len = vq->packed.desc_extra[id].len;
1626                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1627                                         i++)
1628                                 vring_unmap_desc_packed(vq, &desc[i]);
1629                 }
1630                 kfree(desc);
1631                 state->indir_desc = NULL;
1632         } else if (ctx) {
1633                 *ctx = state->indir_desc;
1634         }
1635 }
1636
1637 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1638                                        u16 idx, bool used_wrap_counter)
1639 {
1640         bool avail, used;
1641         u16 flags;
1642
1643         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1644         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1645         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1646
1647         return avail == used && used == used_wrap_counter;
1648 }
1649
1650 static bool more_used_packed(const struct vring_virtqueue *vq)
1651 {
1652         u16 last_used;
1653         u16 last_used_idx;
1654         bool used_wrap_counter;
1655
1656         last_used_idx = READ_ONCE(vq->last_used_idx);
1657         last_used = packed_last_used(last_used_idx);
1658         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1659         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1660 }
1661
1662 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1663                                           unsigned int *len,
1664                                           void **ctx)
1665 {
1666         struct vring_virtqueue *vq = to_vvq(_vq);
1667         u16 last_used, id, last_used_idx;
1668         bool used_wrap_counter;
1669         void *ret;
1670
1671         START_USE(vq);
1672
1673         if (unlikely(vq->broken)) {
1674                 END_USE(vq);
1675                 return NULL;
1676         }
1677
1678         if (!more_used_packed(vq)) {
1679                 pr_debug("No more buffers in queue\n");
1680                 END_USE(vq);
1681                 return NULL;
1682         }
1683
1684         /* Only get used elements after they have been exposed by host. */
1685         virtio_rmb(vq->weak_barriers);
1686
1687         last_used_idx = READ_ONCE(vq->last_used_idx);
1688         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1689         last_used = packed_last_used(last_used_idx);
1690         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1691         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1692
1693         if (unlikely(id >= vq->packed.vring.num)) {
1694                 BAD_RING(vq, "id %u out of range\n", id);
1695                 return NULL;
1696         }
1697         if (unlikely(!vq->packed.desc_state[id].data)) {
1698                 BAD_RING(vq, "id %u is not a head!\n", id);
1699                 return NULL;
1700         }
1701
1702         /* detach_buf_packed clears data, so grab it now. */
1703         ret = vq->packed.desc_state[id].data;
1704         detach_buf_packed(vq, id, ctx);
1705
1706         last_used += vq->packed.desc_state[id].num;
1707         if (unlikely(last_used >= vq->packed.vring.num)) {
1708                 last_used -= vq->packed.vring.num;
1709                 used_wrap_counter ^= 1;
1710         }
1711
1712         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1713         WRITE_ONCE(vq->last_used_idx, last_used);
1714
1715         /*
1716          * If we expect an interrupt for the next entry, tell host
1717          * by writing event index and flush out the write before
1718          * the read in the next get_buf call.
1719          */
1720         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1721                 virtio_store_mb(vq->weak_barriers,
1722                                 &vq->packed.vring.driver->off_wrap,
1723                                 cpu_to_le16(vq->last_used_idx));
1724
1725         LAST_ADD_TIME_INVALID(vq);
1726
1727         END_USE(vq);
1728         return ret;
1729 }
1730
1731 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1732 {
1733         struct vring_virtqueue *vq = to_vvq(_vq);
1734
1735         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1736                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1737
1738                 /*
1739                  * If device triggered an event already it won't trigger one again:
1740                  * no need to disable.
1741                  */
1742                 if (vq->event_triggered)
1743                         return;
1744
1745                 vq->packed.vring.driver->flags =
1746                         cpu_to_le16(vq->packed.event_flags_shadow);
1747         }
1748 }
1749
1750 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1751 {
1752         struct vring_virtqueue *vq = to_vvq(_vq);
1753
1754         START_USE(vq);
1755
1756         /*
1757          * We optimistically turn back on interrupts, then check if there was
1758          * more to do.
1759          */
1760
1761         if (vq->event) {
1762                 vq->packed.vring.driver->off_wrap =
1763                         cpu_to_le16(vq->last_used_idx);
1764                 /*
1765                  * We need to update event offset and event wrap
1766                  * counter first before updating event flags.
1767                  */
1768                 virtio_wmb(vq->weak_barriers);
1769         }
1770
1771         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1772                 vq->packed.event_flags_shadow = vq->event ?
1773                                 VRING_PACKED_EVENT_FLAG_DESC :
1774                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1775                 vq->packed.vring.driver->flags =
1776                                 cpu_to_le16(vq->packed.event_flags_shadow);
1777         }
1778
1779         END_USE(vq);
1780         return vq->last_used_idx;
1781 }
1782
1783 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1784 {
1785         struct vring_virtqueue *vq = to_vvq(_vq);
1786         bool wrap_counter;
1787         u16 used_idx;
1788
1789         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1790         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1791
1792         return is_used_desc_packed(vq, used_idx, wrap_counter);
1793 }
1794
1795 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1796 {
1797         struct vring_virtqueue *vq = to_vvq(_vq);
1798         u16 used_idx, wrap_counter, last_used_idx;
1799         u16 bufs;
1800
1801         START_USE(vq);
1802
1803         /*
1804          * We optimistically turn back on interrupts, then check if there was
1805          * more to do.
1806          */
1807
1808         if (vq->event) {
1809                 /* TODO: tune this threshold */
1810                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1811                 last_used_idx = READ_ONCE(vq->last_used_idx);
1812                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1813
1814                 used_idx = packed_last_used(last_used_idx) + bufs;
1815                 if (used_idx >= vq->packed.vring.num) {
1816                         used_idx -= vq->packed.vring.num;
1817                         wrap_counter ^= 1;
1818                 }
1819
1820                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1821                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1822
1823                 /*
1824                  * We need to update event offset and event wrap
1825                  * counter first before updating event flags.
1826                  */
1827                 virtio_wmb(vq->weak_barriers);
1828         }
1829
1830         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1831                 vq->packed.event_flags_shadow = vq->event ?
1832                                 VRING_PACKED_EVENT_FLAG_DESC :
1833                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1834                 vq->packed.vring.driver->flags =
1835                                 cpu_to_le16(vq->packed.event_flags_shadow);
1836         }
1837
1838         /*
1839          * We need to update event suppression structure first
1840          * before re-checking for more used buffers.
1841          */
1842         virtio_mb(vq->weak_barriers);
1843
1844         last_used_idx = READ_ONCE(vq->last_used_idx);
1845         wrap_counter = packed_used_wrap_counter(last_used_idx);
1846         used_idx = packed_last_used(last_used_idx);
1847         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1848                 END_USE(vq);
1849                 return false;
1850         }
1851
1852         END_USE(vq);
1853         return true;
1854 }
1855
1856 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1857 {
1858         struct vring_virtqueue *vq = to_vvq(_vq);
1859         unsigned int i;
1860         void *buf;
1861
1862         START_USE(vq);
1863
1864         for (i = 0; i < vq->packed.vring.num; i++) {
1865                 if (!vq->packed.desc_state[i].data)
1866                         continue;
1867                 /* detach_buf clears data, so grab it now. */
1868                 buf = vq->packed.desc_state[i].data;
1869                 detach_buf_packed(vq, i, NULL);
1870                 END_USE(vq);
1871                 return buf;
1872         }
1873         /* That should have freed everything. */
1874         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1875
1876         END_USE(vq);
1877         return NULL;
1878 }
1879
1880 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1881 {
1882         struct vring_desc_extra *desc_extra;
1883         unsigned int i;
1884
1885         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1886                                    GFP_KERNEL);
1887         if (!desc_extra)
1888                 return NULL;
1889
1890         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1891
1892         for (i = 0; i < num - 1; i++)
1893                 desc_extra[i].next = i + 1;
1894
1895         return desc_extra;
1896 }
1897
1898 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1899                               struct virtio_device *vdev,
1900                               struct device *dma_dev)
1901 {
1902         if (vring_packed->vring.desc)
1903                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1904                                  vring_packed->vring.desc,
1905                                  vring_packed->ring_dma_addr,
1906                                  dma_dev);
1907
1908         if (vring_packed->vring.driver)
1909                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1910                                  vring_packed->vring.driver,
1911                                  vring_packed->driver_event_dma_addr,
1912                                  dma_dev);
1913
1914         if (vring_packed->vring.device)
1915                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1916                                  vring_packed->vring.device,
1917                                  vring_packed->device_event_dma_addr,
1918                                  dma_dev);
1919
1920         kfree(vring_packed->desc_state);
1921         kfree(vring_packed->desc_extra);
1922 }
1923
1924 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1925                                     struct virtio_device *vdev,
1926                                     u32 num, struct device *dma_dev)
1927 {
1928         struct vring_packed_desc *ring;
1929         struct vring_packed_desc_event *driver, *device;
1930         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1931         size_t ring_size_in_bytes, event_size_in_bytes;
1932
1933         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1934
1935         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1936                                  &ring_dma_addr,
1937                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1938                                  dma_dev);
1939         if (!ring)
1940                 goto err;
1941
1942         vring_packed->vring.desc         = ring;
1943         vring_packed->ring_dma_addr      = ring_dma_addr;
1944         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1945
1946         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1947
1948         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1949                                    &driver_event_dma_addr,
1950                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1951                                    dma_dev);
1952         if (!driver)
1953                 goto err;
1954
1955         vring_packed->vring.driver          = driver;
1956         vring_packed->event_size_in_bytes   = event_size_in_bytes;
1957         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1958
1959         device = vring_alloc_queue(vdev, event_size_in_bytes,
1960                                    &device_event_dma_addr,
1961                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1962                                    dma_dev);
1963         if (!device)
1964                 goto err;
1965
1966         vring_packed->vring.device          = device;
1967         vring_packed->device_event_dma_addr = device_event_dma_addr;
1968
1969         vring_packed->vring.num = num;
1970
1971         return 0;
1972
1973 err:
1974         vring_free_packed(vring_packed, vdev, dma_dev);
1975         return -ENOMEM;
1976 }
1977
1978 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1979 {
1980         struct vring_desc_state_packed *state;
1981         struct vring_desc_extra *extra;
1982         u32 num = vring_packed->vring.num;
1983
1984         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1985         if (!state)
1986                 goto err_desc_state;
1987
1988         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1989
1990         extra = vring_alloc_desc_extra(num);
1991         if (!extra)
1992                 goto err_desc_extra;
1993
1994         vring_packed->desc_state = state;
1995         vring_packed->desc_extra = extra;
1996
1997         return 0;
1998
1999 err_desc_extra:
2000         kfree(state);
2001 err_desc_state:
2002         return -ENOMEM;
2003 }
2004
2005 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2006                                         bool callback)
2007 {
2008         vring_packed->next_avail_idx = 0;
2009         vring_packed->avail_wrap_counter = 1;
2010         vring_packed->event_flags_shadow = 0;
2011         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2012
2013         /* No callback?  Tell other side not to bother us. */
2014         if (!callback) {
2015                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2016                 vring_packed->vring.driver->flags =
2017                         cpu_to_le16(vring_packed->event_flags_shadow);
2018         }
2019 }
2020
2021 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2022                                           struct vring_virtqueue_packed *vring_packed)
2023 {
2024         vq->packed = *vring_packed;
2025
2026         /* Put everything in free lists. */
2027         vq->free_head = 0;
2028 }
2029
2030 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2031 {
2032         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2033         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2034
2035         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2036         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2037
2038         virtqueue_init(vq, vq->packed.vring.num);
2039         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2040 }
2041
2042 static struct virtqueue *vring_create_virtqueue_packed(
2043         unsigned int index,
2044         unsigned int num,
2045         unsigned int vring_align,
2046         struct virtio_device *vdev,
2047         bool weak_barriers,
2048         bool may_reduce_num,
2049         bool context,
2050         bool (*notify)(struct virtqueue *),
2051         void (*callback)(struct virtqueue *),
2052         const char *name,
2053         struct device *dma_dev)
2054 {
2055         struct vring_virtqueue_packed vring_packed = {};
2056         struct vring_virtqueue *vq;
2057         int err;
2058
2059         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2060                 goto err_ring;
2061
2062         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2063         if (!vq)
2064                 goto err_vq;
2065
2066         vq->vq.callback = callback;
2067         vq->vq.vdev = vdev;
2068         vq->vq.name = name;
2069         vq->vq.index = index;
2070         vq->vq.reset = false;
2071         vq->we_own_ring = true;
2072         vq->notify = notify;
2073         vq->weak_barriers = weak_barriers;
2074 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2075         vq->broken = true;
2076 #else
2077         vq->broken = false;
2078 #endif
2079         vq->packed_ring = true;
2080         vq->dma_dev = dma_dev;
2081         vq->use_dma_api = vring_use_dma_api(vdev);
2082         vq->premapped = false;
2083
2084         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2085                 !context;
2086         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2087
2088         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2089                 vq->weak_barriers = false;
2090
2091         err = vring_alloc_state_extra_packed(&vring_packed);
2092         if (err)
2093                 goto err_state_extra;
2094
2095         virtqueue_vring_init_packed(&vring_packed, !!callback);
2096
2097         virtqueue_init(vq, num);
2098         virtqueue_vring_attach_packed(vq, &vring_packed);
2099
2100         spin_lock(&vdev->vqs_list_lock);
2101         list_add_tail(&vq->vq.list, &vdev->vqs);
2102         spin_unlock(&vdev->vqs_list_lock);
2103         return &vq->vq;
2104
2105 err_state_extra:
2106         kfree(vq);
2107 err_vq:
2108         vring_free_packed(&vring_packed, vdev, dma_dev);
2109 err_ring:
2110         return NULL;
2111 }
2112
2113 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2114 {
2115         struct vring_virtqueue_packed vring_packed = {};
2116         struct vring_virtqueue *vq = to_vvq(_vq);
2117         struct virtio_device *vdev = _vq->vdev;
2118         int err;
2119
2120         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2121                 goto err_ring;
2122
2123         err = vring_alloc_state_extra_packed(&vring_packed);
2124         if (err)
2125                 goto err_state_extra;
2126
2127         vring_free(&vq->vq);
2128
2129         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2130
2131         virtqueue_init(vq, vring_packed.vring.num);
2132         virtqueue_vring_attach_packed(vq, &vring_packed);
2133
2134         return 0;
2135
2136 err_state_extra:
2137         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2138 err_ring:
2139         virtqueue_reinit_packed(vq);
2140         return -ENOMEM;
2141 }
2142
2143
2144 /*
2145  * Generic functions and exported symbols.
2146  */
2147
2148 static inline int virtqueue_add(struct virtqueue *_vq,
2149                                 struct scatterlist *sgs[],
2150                                 unsigned int total_sg,
2151                                 unsigned int out_sgs,
2152                                 unsigned int in_sgs,
2153                                 void *data,
2154                                 void *ctx,
2155                                 gfp_t gfp)
2156 {
2157         struct vring_virtqueue *vq = to_vvq(_vq);
2158
2159         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2160                                         out_sgs, in_sgs, data, ctx, gfp) :
2161                                  virtqueue_add_split(_vq, sgs, total_sg,
2162                                         out_sgs, in_sgs, data, ctx, gfp);
2163 }
2164
2165 /**
2166  * virtqueue_add_sgs - expose buffers to other end
2167  * @_vq: the struct virtqueue we're talking about.
2168  * @sgs: array of terminated scatterlists.
2169  * @out_sgs: the number of scatterlists readable by other side
2170  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2171  * @data: the token identifying the buffer.
2172  * @gfp: how to do memory allocations (if necessary).
2173  *
2174  * Caller must ensure we don't call this with other virtqueue operations
2175  * at the same time (except where noted).
2176  *
2177  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2178  */
2179 int virtqueue_add_sgs(struct virtqueue *_vq,
2180                       struct scatterlist *sgs[],
2181                       unsigned int out_sgs,
2182                       unsigned int in_sgs,
2183                       void *data,
2184                       gfp_t gfp)
2185 {
2186         unsigned int i, total_sg = 0;
2187
2188         /* Count them first. */
2189         for (i = 0; i < out_sgs + in_sgs; i++) {
2190                 struct scatterlist *sg;
2191
2192                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2193                         total_sg++;
2194         }
2195         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2196                              data, NULL, gfp);
2197 }
2198 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2199
2200 /**
2201  * virtqueue_add_outbuf - expose output buffers to other end
2202  * @vq: the struct virtqueue we're talking about.
2203  * @sg: scatterlist (must be well-formed and terminated!)
2204  * @num: the number of entries in @sg readable by other side
2205  * @data: the token identifying the buffer.
2206  * @gfp: how to do memory allocations (if necessary).
2207  *
2208  * Caller must ensure we don't call this with other virtqueue operations
2209  * at the same time (except where noted).
2210  *
2211  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2212  */
2213 int virtqueue_add_outbuf(struct virtqueue *vq,
2214                          struct scatterlist *sg, unsigned int num,
2215                          void *data,
2216                          gfp_t gfp)
2217 {
2218         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2219 }
2220 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2221
2222 /**
2223  * virtqueue_add_inbuf - expose input buffers to other end
2224  * @vq: the struct virtqueue we're talking about.
2225  * @sg: scatterlist (must be well-formed and terminated!)
2226  * @num: the number of entries in @sg writable by other side
2227  * @data: the token identifying the buffer.
2228  * @gfp: how to do memory allocations (if necessary).
2229  *
2230  * Caller must ensure we don't call this with other virtqueue operations
2231  * at the same time (except where noted).
2232  *
2233  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2234  */
2235 int virtqueue_add_inbuf(struct virtqueue *vq,
2236                         struct scatterlist *sg, unsigned int num,
2237                         void *data,
2238                         gfp_t gfp)
2239 {
2240         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2241 }
2242 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2243
2244 /**
2245  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2246  * @vq: the struct virtqueue we're talking about.
2247  * @sg: scatterlist (must be well-formed and terminated!)
2248  * @num: the number of entries in @sg writable by other side
2249  * @data: the token identifying the buffer.
2250  * @ctx: extra context for the token
2251  * @gfp: how to do memory allocations (if necessary).
2252  *
2253  * Caller must ensure we don't call this with other virtqueue operations
2254  * at the same time (except where noted).
2255  *
2256  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2257  */
2258 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2259                         struct scatterlist *sg, unsigned int num,
2260                         void *data,
2261                         void *ctx,
2262                         gfp_t gfp)
2263 {
2264         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2265 }
2266 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2267
2268 /**
2269  * virtqueue_dma_dev - get the dma dev
2270  * @_vq: the struct virtqueue we're talking about.
2271  *
2272  * Returns the dma dev. That can been used for dma api.
2273  */
2274 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2275 {
2276         struct vring_virtqueue *vq = to_vvq(_vq);
2277
2278         if (vq->use_dma_api)
2279                 return vring_dma_dev(vq);
2280         else
2281                 return NULL;
2282 }
2283 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2284
2285 /**
2286  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2287  * @_vq: the struct virtqueue
2288  *
2289  * Instead of virtqueue_kick(), you can do:
2290  *      if (virtqueue_kick_prepare(vq))
2291  *              virtqueue_notify(vq);
2292  *
2293  * This is sometimes useful because the virtqueue_kick_prepare() needs
2294  * to be serialized, but the actual virtqueue_notify() call does not.
2295  */
2296 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2297 {
2298         struct vring_virtqueue *vq = to_vvq(_vq);
2299
2300         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2301                                  virtqueue_kick_prepare_split(_vq);
2302 }
2303 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2304
2305 /**
2306  * virtqueue_notify - second half of split virtqueue_kick call.
2307  * @_vq: the struct virtqueue
2308  *
2309  * This does not need to be serialized.
2310  *
2311  * Returns false if host notify failed or queue is broken, otherwise true.
2312  */
2313 bool virtqueue_notify(struct virtqueue *_vq)
2314 {
2315         struct vring_virtqueue *vq = to_vvq(_vq);
2316
2317         if (unlikely(vq->broken))
2318                 return false;
2319
2320         /* Prod other side to tell it about changes. */
2321         if (!vq->notify(_vq)) {
2322                 vq->broken = true;
2323                 return false;
2324         }
2325         return true;
2326 }
2327 EXPORT_SYMBOL_GPL(virtqueue_notify);
2328
2329 /**
2330  * virtqueue_kick - update after add_buf
2331  * @vq: the struct virtqueue
2332  *
2333  * After one or more virtqueue_add_* calls, invoke this to kick
2334  * the other side.
2335  *
2336  * Caller must ensure we don't call this with other virtqueue
2337  * operations at the same time (except where noted).
2338  *
2339  * Returns false if kick failed, otherwise true.
2340  */
2341 bool virtqueue_kick(struct virtqueue *vq)
2342 {
2343         if (virtqueue_kick_prepare(vq))
2344                 return virtqueue_notify(vq);
2345         return true;
2346 }
2347 EXPORT_SYMBOL_GPL(virtqueue_kick);
2348
2349 /**
2350  * virtqueue_get_buf_ctx - get the next used buffer
2351  * @_vq: the struct virtqueue we're talking about.
2352  * @len: the length written into the buffer
2353  * @ctx: extra context for the token
2354  *
2355  * If the device wrote data into the buffer, @len will be set to the
2356  * amount written.  This means you don't need to clear the buffer
2357  * beforehand to ensure there's no data leakage in the case of short
2358  * writes.
2359  *
2360  * Caller must ensure we don't call this with other virtqueue
2361  * operations at the same time (except where noted).
2362  *
2363  * Returns NULL if there are no used buffers, or the "data" token
2364  * handed to virtqueue_add_*().
2365  */
2366 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2367                             void **ctx)
2368 {
2369         struct vring_virtqueue *vq = to_vvq(_vq);
2370
2371         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2372                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2373 }
2374 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2375
2376 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2377 {
2378         return virtqueue_get_buf_ctx(_vq, len, NULL);
2379 }
2380 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2381 /**
2382  * virtqueue_disable_cb - disable callbacks
2383  * @_vq: the struct virtqueue we're talking about.
2384  *
2385  * Note that this is not necessarily synchronous, hence unreliable and only
2386  * useful as an optimization.
2387  *
2388  * Unlike other operations, this need not be serialized.
2389  */
2390 void virtqueue_disable_cb(struct virtqueue *_vq)
2391 {
2392         struct vring_virtqueue *vq = to_vvq(_vq);
2393
2394         if (vq->packed_ring)
2395                 virtqueue_disable_cb_packed(_vq);
2396         else
2397                 virtqueue_disable_cb_split(_vq);
2398 }
2399 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2400
2401 /**
2402  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2403  * @_vq: the struct virtqueue we're talking about.
2404  *
2405  * This re-enables callbacks; it returns current queue state
2406  * in an opaque unsigned value. This value should be later tested by
2407  * virtqueue_poll, to detect a possible race between the driver checking for
2408  * more work, and enabling callbacks.
2409  *
2410  * Caller must ensure we don't call this with other virtqueue
2411  * operations at the same time (except where noted).
2412  */
2413 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2414 {
2415         struct vring_virtqueue *vq = to_vvq(_vq);
2416
2417         if (vq->event_triggered)
2418                 vq->event_triggered = false;
2419
2420         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2421                                  virtqueue_enable_cb_prepare_split(_vq);
2422 }
2423 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2424
2425 /**
2426  * virtqueue_poll - query pending used buffers
2427  * @_vq: the struct virtqueue we're talking about.
2428  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2429  *
2430  * Returns "true" if there are pending used buffers in the queue.
2431  *
2432  * This does not need to be serialized.
2433  */
2434 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2435 {
2436         struct vring_virtqueue *vq = to_vvq(_vq);
2437
2438         if (unlikely(vq->broken))
2439                 return false;
2440
2441         virtio_mb(vq->weak_barriers);
2442         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2443                                  virtqueue_poll_split(_vq, last_used_idx);
2444 }
2445 EXPORT_SYMBOL_GPL(virtqueue_poll);
2446
2447 /**
2448  * virtqueue_enable_cb - restart callbacks after disable_cb.
2449  * @_vq: the struct virtqueue we're talking about.
2450  *
2451  * This re-enables callbacks; it returns "false" if there are pending
2452  * buffers in the queue, to detect a possible race between the driver
2453  * checking for more work, and enabling callbacks.
2454  *
2455  * Caller must ensure we don't call this with other virtqueue
2456  * operations at the same time (except where noted).
2457  */
2458 bool virtqueue_enable_cb(struct virtqueue *_vq)
2459 {
2460         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2461
2462         return !virtqueue_poll(_vq, last_used_idx);
2463 }
2464 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2465
2466 /**
2467  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2468  * @_vq: the struct virtqueue we're talking about.
2469  *
2470  * This re-enables callbacks but hints to the other side to delay
2471  * interrupts until most of the available buffers have been processed;
2472  * it returns "false" if there are many pending buffers in the queue,
2473  * to detect a possible race between the driver checking for more work,
2474  * and enabling callbacks.
2475  *
2476  * Caller must ensure we don't call this with other virtqueue
2477  * operations at the same time (except where noted).
2478  */
2479 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2480 {
2481         struct vring_virtqueue *vq = to_vvq(_vq);
2482
2483         if (vq->event_triggered)
2484                 vq->event_triggered = false;
2485
2486         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2487                                  virtqueue_enable_cb_delayed_split(_vq);
2488 }
2489 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2490
2491 /**
2492  * virtqueue_detach_unused_buf - detach first unused buffer
2493  * @_vq: the struct virtqueue we're talking about.
2494  *
2495  * Returns NULL or the "data" token handed to virtqueue_add_*().
2496  * This is not valid on an active queue; it is useful for device
2497  * shutdown or the reset queue.
2498  */
2499 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2500 {
2501         struct vring_virtqueue *vq = to_vvq(_vq);
2502
2503         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2504                                  virtqueue_detach_unused_buf_split(_vq);
2505 }
2506 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2507
2508 static inline bool more_used(const struct vring_virtqueue *vq)
2509 {
2510         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2511 }
2512
2513 /**
2514  * vring_interrupt - notify a virtqueue on an interrupt
2515  * @irq: the IRQ number (ignored)
2516  * @_vq: the struct virtqueue to notify
2517  *
2518  * Calls the callback function of @_vq to process the virtqueue
2519  * notification.
2520  */
2521 irqreturn_t vring_interrupt(int irq, void *_vq)
2522 {
2523         struct vring_virtqueue *vq = to_vvq(_vq);
2524
2525         if (!more_used(vq)) {
2526                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2527                 return IRQ_NONE;
2528         }
2529
2530         if (unlikely(vq->broken)) {
2531 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2532                 dev_warn_once(&vq->vq.vdev->dev,
2533                               "virtio vring IRQ raised before DRIVER_OK");
2534                 return IRQ_NONE;
2535 #else
2536                 return IRQ_HANDLED;
2537 #endif
2538         }
2539
2540         /* Just a hint for performance: so it's ok that this can be racy! */
2541         if (vq->event)
2542                 vq->event_triggered = true;
2543
2544         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2545         if (vq->vq.callback)
2546                 vq->vq.callback(&vq->vq);
2547
2548         return IRQ_HANDLED;
2549 }
2550 EXPORT_SYMBOL_GPL(vring_interrupt);
2551
2552 /* Only available for split ring */
2553 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2554                                                struct vring_virtqueue_split *vring_split,
2555                                                struct virtio_device *vdev,
2556                                                bool weak_barriers,
2557                                                bool context,
2558                                                bool (*notify)(struct virtqueue *),
2559                                                void (*callback)(struct virtqueue *),
2560                                                const char *name,
2561                                                struct device *dma_dev)
2562 {
2563         struct vring_virtqueue *vq;
2564         int err;
2565
2566         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2567                 return NULL;
2568
2569         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2570         if (!vq)
2571                 return NULL;
2572
2573         vq->packed_ring = false;
2574         vq->vq.callback = callback;
2575         vq->vq.vdev = vdev;
2576         vq->vq.name = name;
2577         vq->vq.index = index;
2578         vq->vq.reset = false;
2579         vq->we_own_ring = false;
2580         vq->notify = notify;
2581         vq->weak_barriers = weak_barriers;
2582 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2583         vq->broken = true;
2584 #else
2585         vq->broken = false;
2586 #endif
2587         vq->dma_dev = dma_dev;
2588         vq->use_dma_api = vring_use_dma_api(vdev);
2589         vq->premapped = false;
2590
2591         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2592                 !context;
2593         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2594
2595         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2596                 vq->weak_barriers = false;
2597
2598         err = vring_alloc_state_extra_split(vring_split);
2599         if (err) {
2600                 kfree(vq);
2601                 return NULL;
2602         }
2603
2604         virtqueue_vring_init_split(vring_split, vq);
2605
2606         virtqueue_init(vq, vring_split->vring.num);
2607         virtqueue_vring_attach_split(vq, vring_split);
2608
2609         spin_lock(&vdev->vqs_list_lock);
2610         list_add_tail(&vq->vq.list, &vdev->vqs);
2611         spin_unlock(&vdev->vqs_list_lock);
2612         return &vq->vq;
2613 }
2614
2615 struct virtqueue *vring_create_virtqueue(
2616         unsigned int index,
2617         unsigned int num,
2618         unsigned int vring_align,
2619         struct virtio_device *vdev,
2620         bool weak_barriers,
2621         bool may_reduce_num,
2622         bool context,
2623         bool (*notify)(struct virtqueue *),
2624         void (*callback)(struct virtqueue *),
2625         const char *name)
2626 {
2627
2628         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2629                 return vring_create_virtqueue_packed(index, num, vring_align,
2630                                 vdev, weak_barriers, may_reduce_num,
2631                                 context, notify, callback, name, vdev->dev.parent);
2632
2633         return vring_create_virtqueue_split(index, num, vring_align,
2634                         vdev, weak_barriers, may_reduce_num,
2635                         context, notify, callback, name, vdev->dev.parent);
2636 }
2637 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2638
2639 struct virtqueue *vring_create_virtqueue_dma(
2640         unsigned int index,
2641         unsigned int num,
2642         unsigned int vring_align,
2643         struct virtio_device *vdev,
2644         bool weak_barriers,
2645         bool may_reduce_num,
2646         bool context,
2647         bool (*notify)(struct virtqueue *),
2648         void (*callback)(struct virtqueue *),
2649         const char *name,
2650         struct device *dma_dev)
2651 {
2652
2653         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2654                 return vring_create_virtqueue_packed(index, num, vring_align,
2655                                 vdev, weak_barriers, may_reduce_num,
2656                                 context, notify, callback, name, dma_dev);
2657
2658         return vring_create_virtqueue_split(index, num, vring_align,
2659                         vdev, weak_barriers, may_reduce_num,
2660                         context, notify, callback, name, dma_dev);
2661 }
2662 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2663
2664 /**
2665  * virtqueue_resize - resize the vring of vq
2666  * @_vq: the struct virtqueue we're talking about.
2667  * @num: new ring num
2668  * @recycle: callback for recycle the useless buffer
2669  *
2670  * When it is really necessary to create a new vring, it will set the current vq
2671  * into the reset state. Then call the passed callback to recycle the buffer
2672  * that is no longer used. Only after the new vring is successfully created, the
2673  * old vring will be released.
2674  *
2675  * Caller must ensure we don't call this with other virtqueue operations
2676  * at the same time (except where noted).
2677  *
2678  * Returns zero or a negative error.
2679  * 0: success.
2680  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2681  *  vq can still work normally
2682  * -EBUSY: Failed to sync with device, vq may not work properly
2683  * -ENOENT: Transport or device not supported
2684  * -E2BIG/-EINVAL: num error
2685  * -EPERM: Operation not permitted
2686  *
2687  */
2688 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2689                      void (*recycle)(struct virtqueue *vq, void *buf))
2690 {
2691         struct vring_virtqueue *vq = to_vvq(_vq);
2692         struct virtio_device *vdev = vq->vq.vdev;
2693         void *buf;
2694         int err;
2695
2696         if (!vq->we_own_ring)
2697                 return -EPERM;
2698
2699         if (num > vq->vq.num_max)
2700                 return -E2BIG;
2701
2702         if (!num)
2703                 return -EINVAL;
2704
2705         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2706                 return 0;
2707
2708         if (!vdev->config->disable_vq_and_reset)
2709                 return -ENOENT;
2710
2711         if (!vdev->config->enable_vq_after_reset)
2712                 return -ENOENT;
2713
2714         err = vdev->config->disable_vq_and_reset(_vq);
2715         if (err)
2716                 return err;
2717
2718         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2719                 recycle(_vq, buf);
2720
2721         if (vq->packed_ring)
2722                 err = virtqueue_resize_packed(_vq, num);
2723         else
2724                 err = virtqueue_resize_split(_vq, num);
2725
2726         if (vdev->config->enable_vq_after_reset(_vq))
2727                 return -EBUSY;
2728
2729         return err;
2730 }
2731 EXPORT_SYMBOL_GPL(virtqueue_resize);
2732
2733 /**
2734  * virtqueue_set_dma_premapped - set the vring premapped mode
2735  * @_vq: the struct virtqueue we're talking about.
2736  *
2737  * Enable the premapped mode of the vq.
2738  *
2739  * The vring in premapped mode does not do dma internally, so the driver must
2740  * do dma mapping in advance. The driver must pass the dma_address through
2741  * dma_address of scatterlist. When the driver got a used buffer from
2742  * the vring, it has to unmap the dma address.
2743  *
2744  * This function must be called immediately after creating the vq, or after vq
2745  * reset, and before adding any buffers to it.
2746  *
2747  * Caller must ensure we don't call this with other virtqueue operations
2748  * at the same time (except where noted).
2749  *
2750  * Returns zero or a negative error.
2751  * 0: success.
2752  * -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
2753  */
2754 int virtqueue_set_dma_premapped(struct virtqueue *_vq)
2755 {
2756         struct vring_virtqueue *vq = to_vvq(_vq);
2757         u32 num;
2758
2759         START_USE(vq);
2760
2761         num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2762
2763         if (num != vq->vq.num_free) {
2764                 END_USE(vq);
2765                 return -EINVAL;
2766         }
2767
2768         if (!vq->use_dma_api) {
2769                 END_USE(vq);
2770                 return -EINVAL;
2771         }
2772
2773         vq->premapped = true;
2774
2775         END_USE(vq);
2776
2777         return 0;
2778 }
2779 EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
2780
2781 /* Only available for split ring */
2782 struct virtqueue *vring_new_virtqueue(unsigned int index,
2783                                       unsigned int num,
2784                                       unsigned int vring_align,
2785                                       struct virtio_device *vdev,
2786                                       bool weak_barriers,
2787                                       bool context,
2788                                       void *pages,
2789                                       bool (*notify)(struct virtqueue *vq),
2790                                       void (*callback)(struct virtqueue *vq),
2791                                       const char *name)
2792 {
2793         struct vring_virtqueue_split vring_split = {};
2794
2795         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2796                 return NULL;
2797
2798         vring_init(&vring_split.vring, num, pages, vring_align);
2799         return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2800                                      context, notify, callback, name,
2801                                      vdev->dev.parent);
2802 }
2803 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2804
2805 static void vring_free(struct virtqueue *_vq)
2806 {
2807         struct vring_virtqueue *vq = to_vvq(_vq);
2808
2809         if (vq->we_own_ring) {
2810                 if (vq->packed_ring) {
2811                         vring_free_queue(vq->vq.vdev,
2812                                          vq->packed.ring_size_in_bytes,
2813                                          vq->packed.vring.desc,
2814                                          vq->packed.ring_dma_addr,
2815                                          vring_dma_dev(vq));
2816
2817                         vring_free_queue(vq->vq.vdev,
2818                                          vq->packed.event_size_in_bytes,
2819                                          vq->packed.vring.driver,
2820                                          vq->packed.driver_event_dma_addr,
2821                                          vring_dma_dev(vq));
2822
2823                         vring_free_queue(vq->vq.vdev,
2824                                          vq->packed.event_size_in_bytes,
2825                                          vq->packed.vring.device,
2826                                          vq->packed.device_event_dma_addr,
2827                                          vring_dma_dev(vq));
2828
2829                         kfree(vq->packed.desc_state);
2830                         kfree(vq->packed.desc_extra);
2831                 } else {
2832                         vring_free_queue(vq->vq.vdev,
2833                                          vq->split.queue_size_in_bytes,
2834                                          vq->split.vring.desc,
2835                                          vq->split.queue_dma_addr,
2836                                          vring_dma_dev(vq));
2837                 }
2838         }
2839         if (!vq->packed_ring) {
2840                 kfree(vq->split.desc_state);
2841                 kfree(vq->split.desc_extra);
2842         }
2843 }
2844
2845 void vring_del_virtqueue(struct virtqueue *_vq)
2846 {
2847         struct vring_virtqueue *vq = to_vvq(_vq);
2848
2849         spin_lock(&vq->vq.vdev->vqs_list_lock);
2850         list_del(&_vq->list);
2851         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2852
2853         vring_free(_vq);
2854
2855         kfree(vq);
2856 }
2857 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2858
2859 u32 vring_notification_data(struct virtqueue *_vq)
2860 {
2861         struct vring_virtqueue *vq = to_vvq(_vq);
2862         u16 next;
2863
2864         if (vq->packed_ring)
2865                 next = (vq->packed.next_avail_idx &
2866                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2867                         vq->packed.avail_wrap_counter <<
2868                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2869         else
2870                 next = vq->split.avail_idx_shadow;
2871
2872         return next << 16 | _vq->index;
2873 }
2874 EXPORT_SYMBOL_GPL(vring_notification_data);
2875
2876 /* Manipulates transport-specific feature bits. */
2877 void vring_transport_features(struct virtio_device *vdev)
2878 {
2879         unsigned int i;
2880
2881         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2882                 switch (i) {
2883                 case VIRTIO_RING_F_INDIRECT_DESC:
2884                         break;
2885                 case VIRTIO_RING_F_EVENT_IDX:
2886                         break;
2887                 case VIRTIO_F_VERSION_1:
2888                         break;
2889                 case VIRTIO_F_ACCESS_PLATFORM:
2890                         break;
2891                 case VIRTIO_F_RING_PACKED:
2892                         break;
2893                 case VIRTIO_F_ORDER_PLATFORM:
2894                         break;
2895                 case VIRTIO_F_NOTIFICATION_DATA:
2896                         break;
2897                 default:
2898                         /* We don't understand this bit. */
2899                         __virtio_clear_bit(vdev, i);
2900                 }
2901         }
2902 }
2903 EXPORT_SYMBOL_GPL(vring_transport_features);
2904
2905 /**
2906  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2907  * @_vq: the struct virtqueue containing the vring of interest.
2908  *
2909  * Returns the size of the vring.  This is mainly used for boasting to
2910  * userspace.  Unlike other operations, this need not be serialized.
2911  */
2912 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2913 {
2914
2915         const struct vring_virtqueue *vq = to_vvq(_vq);
2916
2917         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2918 }
2919 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2920
2921 /*
2922  * This function should only be called by the core, not directly by the driver.
2923  */
2924 void __virtqueue_break(struct virtqueue *_vq)
2925 {
2926         struct vring_virtqueue *vq = to_vvq(_vq);
2927
2928         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2929         WRITE_ONCE(vq->broken, true);
2930 }
2931 EXPORT_SYMBOL_GPL(__virtqueue_break);
2932
2933 /*
2934  * This function should only be called by the core, not directly by the driver.
2935  */
2936 void __virtqueue_unbreak(struct virtqueue *_vq)
2937 {
2938         struct vring_virtqueue *vq = to_vvq(_vq);
2939
2940         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2941         WRITE_ONCE(vq->broken, false);
2942 }
2943 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2944
2945 bool virtqueue_is_broken(const struct virtqueue *_vq)
2946 {
2947         const struct vring_virtqueue *vq = to_vvq(_vq);
2948
2949         return READ_ONCE(vq->broken);
2950 }
2951 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2952
2953 /*
2954  * This should prevent the device from being used, allowing drivers to
2955  * recover.  You may need to grab appropriate locks to flush.
2956  */
2957 void virtio_break_device(struct virtio_device *dev)
2958 {
2959         struct virtqueue *_vq;
2960
2961         spin_lock(&dev->vqs_list_lock);
2962         list_for_each_entry(_vq, &dev->vqs, list) {
2963                 struct vring_virtqueue *vq = to_vvq(_vq);
2964
2965                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2966                 WRITE_ONCE(vq->broken, true);
2967         }
2968         spin_unlock(&dev->vqs_list_lock);
2969 }
2970 EXPORT_SYMBOL_GPL(virtio_break_device);
2971
2972 /*
2973  * This should allow the device to be used by the driver. You may
2974  * need to grab appropriate locks to flush the write to
2975  * vq->broken. This should only be used in some specific case e.g
2976  * (probing and restoring). This function should only be called by the
2977  * core, not directly by the driver.
2978  */
2979 void __virtio_unbreak_device(struct virtio_device *dev)
2980 {
2981         struct virtqueue *_vq;
2982
2983         spin_lock(&dev->vqs_list_lock);
2984         list_for_each_entry(_vq, &dev->vqs, list) {
2985                 struct vring_virtqueue *vq = to_vvq(_vq);
2986
2987                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2988                 WRITE_ONCE(vq->broken, false);
2989         }
2990         spin_unlock(&dev->vqs_list_lock);
2991 }
2992 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2993
2994 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
2995 {
2996         const struct vring_virtqueue *vq = to_vvq(_vq);
2997
2998         BUG_ON(!vq->we_own_ring);
2999
3000         if (vq->packed_ring)
3001                 return vq->packed.ring_dma_addr;
3002
3003         return vq->split.queue_dma_addr;
3004 }
3005 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3006
3007 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3008 {
3009         const struct vring_virtqueue *vq = to_vvq(_vq);
3010
3011         BUG_ON(!vq->we_own_ring);
3012
3013         if (vq->packed_ring)
3014                 return vq->packed.driver_event_dma_addr;
3015
3016         return vq->split.queue_dma_addr +
3017                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3018 }
3019 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3020
3021 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3022 {
3023         const struct vring_virtqueue *vq = to_vvq(_vq);
3024
3025         BUG_ON(!vq->we_own_ring);
3026
3027         if (vq->packed_ring)
3028                 return vq->packed.device_event_dma_addr;
3029
3030         return vq->split.queue_dma_addr +
3031                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3032 }
3033 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3034
3035 /* Only available for split ring */
3036 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3037 {
3038         return &to_vvq(vq)->split.vring;
3039 }
3040 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3041
3042 MODULE_LICENSE("GPL");