virtio_ring: correct the expression of the description of virtqueue_resize()
[platform/kernel/linux-rpi.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
73 };
74
75 struct vring_desc_state_packed {
76         void *data;                     /* Data for callback. */
77         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
78         u16 num;                        /* Descriptor list length. */
79         u16 last;                       /* The last desc state in a list. */
80 };
81
82 struct vring_desc_extra {
83         dma_addr_t addr;                /* Descriptor DMA addr. */
84         u32 len;                        /* Descriptor length. */
85         u16 flags;                      /* Descriptor flags. */
86         u16 next;                       /* The next desc state in a list. */
87 };
88
89 struct vring_virtqueue_split {
90         /* Actual memory layout for this queue. */
91         struct vring vring;
92
93         /* Last written value to avail->flags */
94         u16 avail_flags_shadow;
95
96         /*
97          * Last written value to avail->idx in
98          * guest byte order.
99          */
100         u16 avail_idx_shadow;
101
102         /* Per-descriptor state. */
103         struct vring_desc_state_split *desc_state;
104         struct vring_desc_extra *desc_extra;
105
106         /* DMA address and size information */
107         dma_addr_t queue_dma_addr;
108         size_t queue_size_in_bytes;
109
110         /*
111          * The parameters for creating vrings are reserved for creating new
112          * vring.
113          */
114         u32 vring_align;
115         bool may_reduce_num;
116 };
117
118 struct vring_virtqueue_packed {
119         /* Actual memory layout for this queue. */
120         struct {
121                 unsigned int num;
122                 struct vring_packed_desc *desc;
123                 struct vring_packed_desc_event *driver;
124                 struct vring_packed_desc_event *device;
125         } vring;
126
127         /* Driver ring wrap counter. */
128         bool avail_wrap_counter;
129
130         /* Avail used flags. */
131         u16 avail_used_flags;
132
133         /* Index of the next avail descriptor. */
134         u16 next_avail_idx;
135
136         /*
137          * Last written value to driver->flags in
138          * guest byte order.
139          */
140         u16 event_flags_shadow;
141
142         /* Per-descriptor state. */
143         struct vring_desc_state_packed *desc_state;
144         struct vring_desc_extra *desc_extra;
145
146         /* DMA address and size information */
147         dma_addr_t ring_dma_addr;
148         dma_addr_t driver_event_dma_addr;
149         dma_addr_t device_event_dma_addr;
150         size_t ring_size_in_bytes;
151         size_t event_size_in_bytes;
152 };
153
154 struct vring_virtqueue {
155         struct virtqueue vq;
156
157         /* Is this a packed ring? */
158         bool packed_ring;
159
160         /* Is DMA API used? */
161         bool use_dma_api;
162
163         /* Can we use weak barriers? */
164         bool weak_barriers;
165
166         /* Other side has made a mess, don't try any more. */
167         bool broken;
168
169         /* Host supports indirect buffers */
170         bool indirect;
171
172         /* Host publishes avail event idx */
173         bool event;
174
175         /* Do DMA mapping by driver */
176         bool premapped;
177
178         /* Do unmap or not for desc. Just when premapped is False and
179          * use_dma_api is true, this is true.
180          */
181         bool do_unmap;
182
183         /* Head of free buffer list. */
184         unsigned int free_head;
185         /* Number we've added since last sync. */
186         unsigned int num_added;
187
188         /* Last used index  we've seen.
189          * for split ring, it just contains last used index
190          * for packed ring:
191          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
192          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
193          */
194         u16 last_used_idx;
195
196         /* Hint for event idx: already triggered no need to disable. */
197         bool event_triggered;
198
199         union {
200                 /* Available for split ring */
201                 struct vring_virtqueue_split split;
202
203                 /* Available for packed ring */
204                 struct vring_virtqueue_packed packed;
205         };
206
207         /* How to notify other side. FIXME: commonalize hcalls! */
208         bool (*notify)(struct virtqueue *vq);
209
210         /* DMA, allocation, and size information */
211         bool we_own_ring;
212
213         /* Device used for doing DMA */
214         struct device *dma_dev;
215
216 #ifdef DEBUG
217         /* They're supposed to lock for us. */
218         unsigned int in_use;
219
220         /* Figure out if their kicks are too delayed. */
221         bool last_add_time_valid;
222         ktime_t last_add_time;
223 #endif
224 };
225
226 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
227                                                struct vring_virtqueue_split *vring_split,
228                                                struct virtio_device *vdev,
229                                                bool weak_barriers,
230                                                bool context,
231                                                bool (*notify)(struct virtqueue *),
232                                                void (*callback)(struct virtqueue *),
233                                                const char *name,
234                                                struct device *dma_dev);
235 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
236 static void vring_free(struct virtqueue *_vq);
237
238 /*
239  * Helpers.
240  */
241
242 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
243
244 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
245                                    unsigned int total_sg)
246 {
247         /*
248          * If the host supports indirect descriptor tables, and we have multiple
249          * buffers, then go indirect. FIXME: tune this threshold
250          */
251         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
252 }
253
254 /*
255  * Modern virtio devices have feature bits to specify whether they need a
256  * quirk and bypass the IOMMU. If not there, just use the DMA API.
257  *
258  * If there, the interaction between virtio and DMA API is messy.
259  *
260  * On most systems with virtio, physical addresses match bus addresses,
261  * and it doesn't particularly matter whether we use the DMA API.
262  *
263  * On some systems, including Xen and any system with a physical device
264  * that speaks virtio behind a physical IOMMU, we must use the DMA API
265  * for virtio DMA to work at all.
266  *
267  * On other systems, including SPARC and PPC64, virtio-pci devices are
268  * enumerated as though they are behind an IOMMU, but the virtio host
269  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
270  * there or somehow map everything as the identity.
271  *
272  * For the time being, we preserve historic behavior and bypass the DMA
273  * API.
274  *
275  * TODO: install a per-device DMA ops structure that does the right thing
276  * taking into account all the above quirks, and use the DMA API
277  * unconditionally on data path.
278  */
279
280 static bool vring_use_dma_api(const struct virtio_device *vdev)
281 {
282         if (!virtio_has_dma_quirk(vdev))
283                 return true;
284
285         /* Otherwise, we are left to guess. */
286         /*
287          * In theory, it's possible to have a buggy QEMU-supposed
288          * emulated Q35 IOMMU and Xen enabled at the same time.  On
289          * such a configuration, virtio has never worked and will
290          * not work without an even larger kludge.  Instead, enable
291          * the DMA API if we're a Xen guest, which at least allows
292          * all of the sensible Xen configurations to work correctly.
293          */
294         if (xen_domain())
295                 return true;
296
297         return false;
298 }
299
300 size_t virtio_max_dma_size(const struct virtio_device *vdev)
301 {
302         size_t max_segment_size = SIZE_MAX;
303
304         if (vring_use_dma_api(vdev))
305                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
306
307         return max_segment_size;
308 }
309 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
310
311 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
312                                dma_addr_t *dma_handle, gfp_t flag,
313                                struct device *dma_dev)
314 {
315         if (vring_use_dma_api(vdev)) {
316                 return dma_alloc_coherent(dma_dev, size,
317                                           dma_handle, flag);
318         } else {
319                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
320
321                 if (queue) {
322                         phys_addr_t phys_addr = virt_to_phys(queue);
323                         *dma_handle = (dma_addr_t)phys_addr;
324
325                         /*
326                          * Sanity check: make sure we dind't truncate
327                          * the address.  The only arches I can find that
328                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
329                          * are certain non-highmem MIPS and x86
330                          * configurations, but these configurations
331                          * should never allocate physical pages above 32
332                          * bits, so this is fine.  Just in case, throw a
333                          * warning and abort if we end up with an
334                          * unrepresentable address.
335                          */
336                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
337                                 free_pages_exact(queue, PAGE_ALIGN(size));
338                                 return NULL;
339                         }
340                 }
341                 return queue;
342         }
343 }
344
345 static void vring_free_queue(struct virtio_device *vdev, size_t size,
346                              void *queue, dma_addr_t dma_handle,
347                              struct device *dma_dev)
348 {
349         if (vring_use_dma_api(vdev))
350                 dma_free_coherent(dma_dev, size, queue, dma_handle);
351         else
352                 free_pages_exact(queue, PAGE_ALIGN(size));
353 }
354
355 /*
356  * The DMA ops on various arches are rather gnarly right now, and
357  * making all of the arch DMA ops work on the vring device itself
358  * is a mess.
359  */
360 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
361 {
362         return vq->dma_dev;
363 }
364
365 /* Map one sg entry. */
366 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
367                             enum dma_data_direction direction, dma_addr_t *addr)
368 {
369         if (vq->premapped) {
370                 *addr = sg_dma_address(sg);
371                 return 0;
372         }
373
374         if (!vq->use_dma_api) {
375                 /*
376                  * If DMA is not used, KMSAN doesn't know that the scatterlist
377                  * is initialized by the hardware. Explicitly check/unpoison it
378                  * depending on the direction.
379                  */
380                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
381                 *addr = (dma_addr_t)sg_phys(sg);
382                 return 0;
383         }
384
385         /*
386          * We can't use dma_map_sg, because we don't use scatterlists in
387          * the way it expects (we don't guarantee that the scatterlist
388          * will exist for the lifetime of the mapping).
389          */
390         *addr = dma_map_page(vring_dma_dev(vq),
391                             sg_page(sg), sg->offset, sg->length,
392                             direction);
393
394         if (dma_mapping_error(vring_dma_dev(vq), *addr))
395                 return -ENOMEM;
396
397         return 0;
398 }
399
400 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
401                                    void *cpu_addr, size_t size,
402                                    enum dma_data_direction direction)
403 {
404         if (!vq->use_dma_api)
405                 return (dma_addr_t)virt_to_phys(cpu_addr);
406
407         return dma_map_single(vring_dma_dev(vq),
408                               cpu_addr, size, direction);
409 }
410
411 static int vring_mapping_error(const struct vring_virtqueue *vq,
412                                dma_addr_t addr)
413 {
414         if (!vq->use_dma_api)
415                 return 0;
416
417         return dma_mapping_error(vring_dma_dev(vq), addr);
418 }
419
420 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
421 {
422         vq->vq.num_free = num;
423
424         if (vq->packed_ring)
425                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
426         else
427                 vq->last_used_idx = 0;
428
429         vq->event_triggered = false;
430         vq->num_added = 0;
431
432 #ifdef DEBUG
433         vq->in_use = false;
434         vq->last_add_time_valid = false;
435 #endif
436 }
437
438
439 /*
440  * Split ring specific functions - *_split().
441  */
442
443 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
444                                            const struct vring_desc *desc)
445 {
446         u16 flags;
447
448         if (!vq->do_unmap)
449                 return;
450
451         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
452
453         dma_unmap_page(vring_dma_dev(vq),
454                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
455                        virtio32_to_cpu(vq->vq.vdev, desc->len),
456                        (flags & VRING_DESC_F_WRITE) ?
457                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
458 }
459
460 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
461                                           unsigned int i)
462 {
463         struct vring_desc_extra *extra = vq->split.desc_extra;
464         u16 flags;
465
466         flags = extra[i].flags;
467
468         if (flags & VRING_DESC_F_INDIRECT) {
469                 if (!vq->use_dma_api)
470                         goto out;
471
472                 dma_unmap_single(vring_dma_dev(vq),
473                                  extra[i].addr,
474                                  extra[i].len,
475                                  (flags & VRING_DESC_F_WRITE) ?
476                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
477         } else {
478                 if (!vq->do_unmap)
479                         goto out;
480
481                 dma_unmap_page(vring_dma_dev(vq),
482                                extra[i].addr,
483                                extra[i].len,
484                                (flags & VRING_DESC_F_WRITE) ?
485                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
486         }
487
488 out:
489         return extra[i].next;
490 }
491
492 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
493                                                unsigned int total_sg,
494                                                gfp_t gfp)
495 {
496         struct vring_desc *desc;
497         unsigned int i;
498
499         /*
500          * We require lowmem mappings for the descriptors because
501          * otherwise virt_to_phys will give us bogus addresses in the
502          * virtqueue.
503          */
504         gfp &= ~__GFP_HIGHMEM;
505
506         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
507         if (!desc)
508                 return NULL;
509
510         for (i = 0; i < total_sg; i++)
511                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
512         return desc;
513 }
514
515 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
516                                                     struct vring_desc *desc,
517                                                     unsigned int i,
518                                                     dma_addr_t addr,
519                                                     unsigned int len,
520                                                     u16 flags,
521                                                     bool indirect)
522 {
523         struct vring_virtqueue *vring = to_vvq(vq);
524         struct vring_desc_extra *extra = vring->split.desc_extra;
525         u16 next;
526
527         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
528         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
529         desc[i].len = cpu_to_virtio32(vq->vdev, len);
530
531         if (!indirect) {
532                 next = extra[i].next;
533                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
534
535                 extra[i].addr = addr;
536                 extra[i].len = len;
537                 extra[i].flags = flags;
538         } else
539                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
540
541         return next;
542 }
543
544 static inline int virtqueue_add_split(struct virtqueue *_vq,
545                                       struct scatterlist *sgs[],
546                                       unsigned int total_sg,
547                                       unsigned int out_sgs,
548                                       unsigned int in_sgs,
549                                       void *data,
550                                       void *ctx,
551                                       gfp_t gfp)
552 {
553         struct vring_virtqueue *vq = to_vvq(_vq);
554         struct scatterlist *sg;
555         struct vring_desc *desc;
556         unsigned int i, n, avail, descs_used, prev, err_idx;
557         int head;
558         bool indirect;
559
560         START_USE(vq);
561
562         BUG_ON(data == NULL);
563         BUG_ON(ctx && vq->indirect);
564
565         if (unlikely(vq->broken)) {
566                 END_USE(vq);
567                 return -EIO;
568         }
569
570         LAST_ADD_TIME_UPDATE(vq);
571
572         BUG_ON(total_sg == 0);
573
574         head = vq->free_head;
575
576         if (virtqueue_use_indirect(vq, total_sg))
577                 desc = alloc_indirect_split(_vq, total_sg, gfp);
578         else {
579                 desc = NULL;
580                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
581         }
582
583         if (desc) {
584                 /* Use a single buffer which doesn't continue */
585                 indirect = true;
586                 /* Set up rest to use this indirect table. */
587                 i = 0;
588                 descs_used = 1;
589         } else {
590                 indirect = false;
591                 desc = vq->split.vring.desc;
592                 i = head;
593                 descs_used = total_sg;
594         }
595
596         if (unlikely(vq->vq.num_free < descs_used)) {
597                 pr_debug("Can't add buf len %i - avail = %i\n",
598                          descs_used, vq->vq.num_free);
599                 /* FIXME: for historical reasons, we force a notify here if
600                  * there are outgoing parts to the buffer.  Presumably the
601                  * host should service the ring ASAP. */
602                 if (out_sgs)
603                         vq->notify(&vq->vq);
604                 if (indirect)
605                         kfree(desc);
606                 END_USE(vq);
607                 return -ENOSPC;
608         }
609
610         for (n = 0; n < out_sgs; n++) {
611                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
612                         dma_addr_t addr;
613
614                         if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
615                                 goto unmap_release;
616
617                         prev = i;
618                         /* Note that we trust indirect descriptor
619                          * table since it use stream DMA mapping.
620                          */
621                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
622                                                      VRING_DESC_F_NEXT,
623                                                      indirect);
624                 }
625         }
626         for (; n < (out_sgs + in_sgs); n++) {
627                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
628                         dma_addr_t addr;
629
630                         if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
631                                 goto unmap_release;
632
633                         prev = i;
634                         /* Note that we trust indirect descriptor
635                          * table since it use stream DMA mapping.
636                          */
637                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
638                                                      sg->length,
639                                                      VRING_DESC_F_NEXT |
640                                                      VRING_DESC_F_WRITE,
641                                                      indirect);
642                 }
643         }
644         /* Last one doesn't continue. */
645         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
646         if (!indirect && vq->do_unmap)
647                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
648                         ~VRING_DESC_F_NEXT;
649
650         if (indirect) {
651                 /* Now that the indirect table is filled in, map it. */
652                 dma_addr_t addr = vring_map_single(
653                         vq, desc, total_sg * sizeof(struct vring_desc),
654                         DMA_TO_DEVICE);
655                 if (vring_mapping_error(vq, addr)) {
656                         if (vq->premapped)
657                                 goto free_indirect;
658
659                         goto unmap_release;
660                 }
661
662                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
663                                          head, addr,
664                                          total_sg * sizeof(struct vring_desc),
665                                          VRING_DESC_F_INDIRECT,
666                                          false);
667         }
668
669         /* We're using some buffers from the free list. */
670         vq->vq.num_free -= descs_used;
671
672         /* Update free pointer */
673         if (indirect)
674                 vq->free_head = vq->split.desc_extra[head].next;
675         else
676                 vq->free_head = i;
677
678         /* Store token and indirect buffer state. */
679         vq->split.desc_state[head].data = data;
680         if (indirect)
681                 vq->split.desc_state[head].indir_desc = desc;
682         else
683                 vq->split.desc_state[head].indir_desc = ctx;
684
685         /* Put entry in available array (but don't update avail->idx until they
686          * do sync). */
687         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
688         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
689
690         /* Descriptors and available array need to be set before we expose the
691          * new available array entries. */
692         virtio_wmb(vq->weak_barriers);
693         vq->split.avail_idx_shadow++;
694         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
695                                                 vq->split.avail_idx_shadow);
696         vq->num_added++;
697
698         pr_debug("Added buffer head %i to %p\n", head, vq);
699         END_USE(vq);
700
701         /* This is very unlikely, but theoretically possible.  Kick
702          * just in case. */
703         if (unlikely(vq->num_added == (1 << 16) - 1))
704                 virtqueue_kick(_vq);
705
706         return 0;
707
708 unmap_release:
709         err_idx = i;
710
711         if (indirect)
712                 i = 0;
713         else
714                 i = head;
715
716         for (n = 0; n < total_sg; n++) {
717                 if (i == err_idx)
718                         break;
719                 if (indirect) {
720                         vring_unmap_one_split_indirect(vq, &desc[i]);
721                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
722                 } else
723                         i = vring_unmap_one_split(vq, i);
724         }
725
726 free_indirect:
727         if (indirect)
728                 kfree(desc);
729
730         END_USE(vq);
731         return -ENOMEM;
732 }
733
734 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
735 {
736         struct vring_virtqueue *vq = to_vvq(_vq);
737         u16 new, old;
738         bool needs_kick;
739
740         START_USE(vq);
741         /* We need to expose available array entries before checking avail
742          * event. */
743         virtio_mb(vq->weak_barriers);
744
745         old = vq->split.avail_idx_shadow - vq->num_added;
746         new = vq->split.avail_idx_shadow;
747         vq->num_added = 0;
748
749         LAST_ADD_TIME_CHECK(vq);
750         LAST_ADD_TIME_INVALID(vq);
751
752         if (vq->event) {
753                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
754                                         vring_avail_event(&vq->split.vring)),
755                                               new, old);
756         } else {
757                 needs_kick = !(vq->split.vring.used->flags &
758                                         cpu_to_virtio16(_vq->vdev,
759                                                 VRING_USED_F_NO_NOTIFY));
760         }
761         END_USE(vq);
762         return needs_kick;
763 }
764
765 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
766                              void **ctx)
767 {
768         unsigned int i, j;
769         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
770
771         /* Clear data ptr. */
772         vq->split.desc_state[head].data = NULL;
773
774         /* Put back on free list: unmap first-level descriptors and find end */
775         i = head;
776
777         while (vq->split.vring.desc[i].flags & nextflag) {
778                 vring_unmap_one_split(vq, i);
779                 i = vq->split.desc_extra[i].next;
780                 vq->vq.num_free++;
781         }
782
783         vring_unmap_one_split(vq, i);
784         vq->split.desc_extra[i].next = vq->free_head;
785         vq->free_head = head;
786
787         /* Plus final descriptor */
788         vq->vq.num_free++;
789
790         if (vq->indirect) {
791                 struct vring_desc *indir_desc =
792                                 vq->split.desc_state[head].indir_desc;
793                 u32 len;
794
795                 /* Free the indirect table, if any, now that it's unmapped. */
796                 if (!indir_desc)
797                         return;
798
799                 len = vq->split.desc_extra[head].len;
800
801                 BUG_ON(!(vq->split.desc_extra[head].flags &
802                                 VRING_DESC_F_INDIRECT));
803                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
804
805                 if (vq->do_unmap) {
806                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
807                                 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
808                 }
809
810                 kfree(indir_desc);
811                 vq->split.desc_state[head].indir_desc = NULL;
812         } else if (ctx) {
813                 *ctx = vq->split.desc_state[head].indir_desc;
814         }
815 }
816
817 static bool more_used_split(const struct vring_virtqueue *vq)
818 {
819         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
820                         vq->split.vring.used->idx);
821 }
822
823 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
824                                          unsigned int *len,
825                                          void **ctx)
826 {
827         struct vring_virtqueue *vq = to_vvq(_vq);
828         void *ret;
829         unsigned int i;
830         u16 last_used;
831
832         START_USE(vq);
833
834         if (unlikely(vq->broken)) {
835                 END_USE(vq);
836                 return NULL;
837         }
838
839         if (!more_used_split(vq)) {
840                 pr_debug("No more buffers in queue\n");
841                 END_USE(vq);
842                 return NULL;
843         }
844
845         /* Only get used array entries after they have been exposed by host. */
846         virtio_rmb(vq->weak_barriers);
847
848         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
849         i = virtio32_to_cpu(_vq->vdev,
850                         vq->split.vring.used->ring[last_used].id);
851         *len = virtio32_to_cpu(_vq->vdev,
852                         vq->split.vring.used->ring[last_used].len);
853
854         if (unlikely(i >= vq->split.vring.num)) {
855                 BAD_RING(vq, "id %u out of range\n", i);
856                 return NULL;
857         }
858         if (unlikely(!vq->split.desc_state[i].data)) {
859                 BAD_RING(vq, "id %u is not a head!\n", i);
860                 return NULL;
861         }
862
863         /* detach_buf_split clears data, so grab it now. */
864         ret = vq->split.desc_state[i].data;
865         detach_buf_split(vq, i, ctx);
866         vq->last_used_idx++;
867         /* If we expect an interrupt for the next entry, tell host
868          * by writing event index and flush out the write before
869          * the read in the next get_buf call. */
870         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
871                 virtio_store_mb(vq->weak_barriers,
872                                 &vring_used_event(&vq->split.vring),
873                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
874
875         LAST_ADD_TIME_INVALID(vq);
876
877         END_USE(vq);
878         return ret;
879 }
880
881 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
882 {
883         struct vring_virtqueue *vq = to_vvq(_vq);
884
885         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
886                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
887
888                 /*
889                  * If device triggered an event already it won't trigger one again:
890                  * no need to disable.
891                  */
892                 if (vq->event_triggered)
893                         return;
894
895                 if (vq->event)
896                         /* TODO: this is a hack. Figure out a cleaner value to write. */
897                         vring_used_event(&vq->split.vring) = 0x0;
898                 else
899                         vq->split.vring.avail->flags =
900                                 cpu_to_virtio16(_vq->vdev,
901                                                 vq->split.avail_flags_shadow);
902         }
903 }
904
905 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
906 {
907         struct vring_virtqueue *vq = to_vvq(_vq);
908         u16 last_used_idx;
909
910         START_USE(vq);
911
912         /* We optimistically turn back on interrupts, then check if there was
913          * more to do. */
914         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
915          * either clear the flags bit or point the event index at the next
916          * entry. Always do both to keep code simple. */
917         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
918                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
919                 if (!vq->event)
920                         vq->split.vring.avail->flags =
921                                 cpu_to_virtio16(_vq->vdev,
922                                                 vq->split.avail_flags_shadow);
923         }
924         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
925                         last_used_idx = vq->last_used_idx);
926         END_USE(vq);
927         return last_used_idx;
928 }
929
930 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
931 {
932         struct vring_virtqueue *vq = to_vvq(_vq);
933
934         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
935                         vq->split.vring.used->idx);
936 }
937
938 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
939 {
940         struct vring_virtqueue *vq = to_vvq(_vq);
941         u16 bufs;
942
943         START_USE(vq);
944
945         /* We optimistically turn back on interrupts, then check if there was
946          * more to do. */
947         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
948          * either clear the flags bit or point the event index at the next
949          * entry. Always update the event index to keep code simple. */
950         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
951                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
952                 if (!vq->event)
953                         vq->split.vring.avail->flags =
954                                 cpu_to_virtio16(_vq->vdev,
955                                                 vq->split.avail_flags_shadow);
956         }
957         /* TODO: tune this threshold */
958         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
959
960         virtio_store_mb(vq->weak_barriers,
961                         &vring_used_event(&vq->split.vring),
962                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
963
964         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
965                                         - vq->last_used_idx) > bufs)) {
966                 END_USE(vq);
967                 return false;
968         }
969
970         END_USE(vq);
971         return true;
972 }
973
974 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
975 {
976         struct vring_virtqueue *vq = to_vvq(_vq);
977         unsigned int i;
978         void *buf;
979
980         START_USE(vq);
981
982         for (i = 0; i < vq->split.vring.num; i++) {
983                 if (!vq->split.desc_state[i].data)
984                         continue;
985                 /* detach_buf_split clears data, so grab it now. */
986                 buf = vq->split.desc_state[i].data;
987                 detach_buf_split(vq, i, NULL);
988                 vq->split.avail_idx_shadow--;
989                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
990                                 vq->split.avail_idx_shadow);
991                 END_USE(vq);
992                 return buf;
993         }
994         /* That should have freed everything. */
995         BUG_ON(vq->vq.num_free != vq->split.vring.num);
996
997         END_USE(vq);
998         return NULL;
999 }
1000
1001 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1002                                        struct vring_virtqueue *vq)
1003 {
1004         struct virtio_device *vdev;
1005
1006         vdev = vq->vq.vdev;
1007
1008         vring_split->avail_flags_shadow = 0;
1009         vring_split->avail_idx_shadow = 0;
1010
1011         /* No callback?  Tell other side not to bother us. */
1012         if (!vq->vq.callback) {
1013                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1014                 if (!vq->event)
1015                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1016                                         vring_split->avail_flags_shadow);
1017         }
1018 }
1019
1020 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1021 {
1022         int num;
1023
1024         num = vq->split.vring.num;
1025
1026         vq->split.vring.avail->flags = 0;
1027         vq->split.vring.avail->idx = 0;
1028
1029         /* reset avail event */
1030         vq->split.vring.avail->ring[num] = 0;
1031
1032         vq->split.vring.used->flags = 0;
1033         vq->split.vring.used->idx = 0;
1034
1035         /* reset used event */
1036         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1037
1038         virtqueue_init(vq, num);
1039
1040         virtqueue_vring_init_split(&vq->split, vq);
1041 }
1042
1043 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1044                                          struct vring_virtqueue_split *vring_split)
1045 {
1046         vq->split = *vring_split;
1047
1048         /* Put everything in free lists. */
1049         vq->free_head = 0;
1050 }
1051
1052 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1053 {
1054         struct vring_desc_state_split *state;
1055         struct vring_desc_extra *extra;
1056         u32 num = vring_split->vring.num;
1057
1058         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1059         if (!state)
1060                 goto err_state;
1061
1062         extra = vring_alloc_desc_extra(num);
1063         if (!extra)
1064                 goto err_extra;
1065
1066         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1067
1068         vring_split->desc_state = state;
1069         vring_split->desc_extra = extra;
1070         return 0;
1071
1072 err_extra:
1073         kfree(state);
1074 err_state:
1075         return -ENOMEM;
1076 }
1077
1078 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1079                              struct virtio_device *vdev, struct device *dma_dev)
1080 {
1081         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1082                          vring_split->vring.desc,
1083                          vring_split->queue_dma_addr,
1084                          dma_dev);
1085
1086         kfree(vring_split->desc_state);
1087         kfree(vring_split->desc_extra);
1088 }
1089
1090 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1091                                    struct virtio_device *vdev,
1092                                    u32 num,
1093                                    unsigned int vring_align,
1094                                    bool may_reduce_num,
1095                                    struct device *dma_dev)
1096 {
1097         void *queue = NULL;
1098         dma_addr_t dma_addr;
1099
1100         /* We assume num is a power of 2. */
1101         if (!is_power_of_2(num)) {
1102                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1103                 return -EINVAL;
1104         }
1105
1106         /* TODO: allocate each queue chunk individually */
1107         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1108                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1109                                           &dma_addr,
1110                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1111                                           dma_dev);
1112                 if (queue)
1113                         break;
1114                 if (!may_reduce_num)
1115                         return -ENOMEM;
1116         }
1117
1118         if (!num)
1119                 return -ENOMEM;
1120
1121         if (!queue) {
1122                 /* Try to get a single page. You are my only hope! */
1123                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1124                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1125                                           dma_dev);
1126         }
1127         if (!queue)
1128                 return -ENOMEM;
1129
1130         vring_init(&vring_split->vring, num, queue, vring_align);
1131
1132         vring_split->queue_dma_addr = dma_addr;
1133         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1134
1135         vring_split->vring_align = vring_align;
1136         vring_split->may_reduce_num = may_reduce_num;
1137
1138         return 0;
1139 }
1140
1141 static struct virtqueue *vring_create_virtqueue_split(
1142         unsigned int index,
1143         unsigned int num,
1144         unsigned int vring_align,
1145         struct virtio_device *vdev,
1146         bool weak_barriers,
1147         bool may_reduce_num,
1148         bool context,
1149         bool (*notify)(struct virtqueue *),
1150         void (*callback)(struct virtqueue *),
1151         const char *name,
1152         struct device *dma_dev)
1153 {
1154         struct vring_virtqueue_split vring_split = {};
1155         struct virtqueue *vq;
1156         int err;
1157
1158         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1159                                       may_reduce_num, dma_dev);
1160         if (err)
1161                 return NULL;
1162
1163         vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1164                                    context, notify, callback, name, dma_dev);
1165         if (!vq) {
1166                 vring_free_split(&vring_split, vdev, dma_dev);
1167                 return NULL;
1168         }
1169
1170         to_vvq(vq)->we_own_ring = true;
1171
1172         return vq;
1173 }
1174
1175 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1176 {
1177         struct vring_virtqueue_split vring_split = {};
1178         struct vring_virtqueue *vq = to_vvq(_vq);
1179         struct virtio_device *vdev = _vq->vdev;
1180         int err;
1181
1182         err = vring_alloc_queue_split(&vring_split, vdev, num,
1183                                       vq->split.vring_align,
1184                                       vq->split.may_reduce_num,
1185                                       vring_dma_dev(vq));
1186         if (err)
1187                 goto err;
1188
1189         err = vring_alloc_state_extra_split(&vring_split);
1190         if (err)
1191                 goto err_state_extra;
1192
1193         vring_free(&vq->vq);
1194
1195         virtqueue_vring_init_split(&vring_split, vq);
1196
1197         virtqueue_init(vq, vring_split.vring.num);
1198         virtqueue_vring_attach_split(vq, &vring_split);
1199
1200         return 0;
1201
1202 err_state_extra:
1203         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1204 err:
1205         virtqueue_reinit_split(vq);
1206         return -ENOMEM;
1207 }
1208
1209
1210 /*
1211  * Packed ring specific functions - *_packed().
1212  */
1213 static bool packed_used_wrap_counter(u16 last_used_idx)
1214 {
1215         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1216 }
1217
1218 static u16 packed_last_used(u16 last_used_idx)
1219 {
1220         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1221 }
1222
1223 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1224                                      const struct vring_desc_extra *extra)
1225 {
1226         u16 flags;
1227
1228         flags = extra->flags;
1229
1230         if (flags & VRING_DESC_F_INDIRECT) {
1231                 if (!vq->use_dma_api)
1232                         return;
1233
1234                 dma_unmap_single(vring_dma_dev(vq),
1235                                  extra->addr, extra->len,
1236                                  (flags & VRING_DESC_F_WRITE) ?
1237                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1238         } else {
1239                 if (!vq->do_unmap)
1240                         return;
1241
1242                 dma_unmap_page(vring_dma_dev(vq),
1243                                extra->addr, extra->len,
1244                                (flags & VRING_DESC_F_WRITE) ?
1245                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1246         }
1247 }
1248
1249 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1250                                     const struct vring_packed_desc *desc)
1251 {
1252         u16 flags;
1253
1254         if (!vq->do_unmap)
1255                 return;
1256
1257         flags = le16_to_cpu(desc->flags);
1258
1259         dma_unmap_page(vring_dma_dev(vq),
1260                        le64_to_cpu(desc->addr),
1261                        le32_to_cpu(desc->len),
1262                        (flags & VRING_DESC_F_WRITE) ?
1263                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1264 }
1265
1266 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1267                                                        gfp_t gfp)
1268 {
1269         struct vring_packed_desc *desc;
1270
1271         /*
1272          * We require lowmem mappings for the descriptors because
1273          * otherwise virt_to_phys will give us bogus addresses in the
1274          * virtqueue.
1275          */
1276         gfp &= ~__GFP_HIGHMEM;
1277
1278         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1279
1280         return desc;
1281 }
1282
1283 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1284                                          struct scatterlist *sgs[],
1285                                          unsigned int total_sg,
1286                                          unsigned int out_sgs,
1287                                          unsigned int in_sgs,
1288                                          void *data,
1289                                          gfp_t gfp)
1290 {
1291         struct vring_packed_desc *desc;
1292         struct scatterlist *sg;
1293         unsigned int i, n, err_idx;
1294         u16 head, id;
1295         dma_addr_t addr;
1296
1297         head = vq->packed.next_avail_idx;
1298         desc = alloc_indirect_packed(total_sg, gfp);
1299         if (!desc)
1300                 return -ENOMEM;
1301
1302         if (unlikely(vq->vq.num_free < 1)) {
1303                 pr_debug("Can't add buf len 1 - avail = 0\n");
1304                 kfree(desc);
1305                 END_USE(vq);
1306                 return -ENOSPC;
1307         }
1308
1309         i = 0;
1310         id = vq->free_head;
1311         BUG_ON(id == vq->packed.vring.num);
1312
1313         for (n = 0; n < out_sgs + in_sgs; n++) {
1314                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1315                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1316                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1317                                 goto unmap_release;
1318
1319                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1320                                                 0 : VRING_DESC_F_WRITE);
1321                         desc[i].addr = cpu_to_le64(addr);
1322                         desc[i].len = cpu_to_le32(sg->length);
1323                         i++;
1324                 }
1325         }
1326
1327         /* Now that the indirect table is filled in, map it. */
1328         addr = vring_map_single(vq, desc,
1329                         total_sg * sizeof(struct vring_packed_desc),
1330                         DMA_TO_DEVICE);
1331         if (vring_mapping_error(vq, addr)) {
1332                 if (vq->premapped)
1333                         goto free_desc;
1334
1335                 goto unmap_release;
1336         }
1337
1338         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1339         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1340                                 sizeof(struct vring_packed_desc));
1341         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1342
1343         if (vq->do_unmap) {
1344                 vq->packed.desc_extra[id].addr = addr;
1345                 vq->packed.desc_extra[id].len = total_sg *
1346                                 sizeof(struct vring_packed_desc);
1347                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1348                                                   vq->packed.avail_used_flags;
1349         }
1350
1351         /*
1352          * A driver MUST NOT make the first descriptor in the list
1353          * available before all subsequent descriptors comprising
1354          * the list are made available.
1355          */
1356         virtio_wmb(vq->weak_barriers);
1357         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1358                                                 vq->packed.avail_used_flags);
1359
1360         /* We're using some buffers from the free list. */
1361         vq->vq.num_free -= 1;
1362
1363         /* Update free pointer */
1364         n = head + 1;
1365         if (n >= vq->packed.vring.num) {
1366                 n = 0;
1367                 vq->packed.avail_wrap_counter ^= 1;
1368                 vq->packed.avail_used_flags ^=
1369                                 1 << VRING_PACKED_DESC_F_AVAIL |
1370                                 1 << VRING_PACKED_DESC_F_USED;
1371         }
1372         vq->packed.next_avail_idx = n;
1373         vq->free_head = vq->packed.desc_extra[id].next;
1374
1375         /* Store token and indirect buffer state. */
1376         vq->packed.desc_state[id].num = 1;
1377         vq->packed.desc_state[id].data = data;
1378         vq->packed.desc_state[id].indir_desc = desc;
1379         vq->packed.desc_state[id].last = id;
1380
1381         vq->num_added += 1;
1382
1383         pr_debug("Added buffer head %i to %p\n", head, vq);
1384         END_USE(vq);
1385
1386         return 0;
1387
1388 unmap_release:
1389         err_idx = i;
1390
1391         for (i = 0; i < err_idx; i++)
1392                 vring_unmap_desc_packed(vq, &desc[i]);
1393
1394 free_desc:
1395         kfree(desc);
1396
1397         END_USE(vq);
1398         return -ENOMEM;
1399 }
1400
1401 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1402                                        struct scatterlist *sgs[],
1403                                        unsigned int total_sg,
1404                                        unsigned int out_sgs,
1405                                        unsigned int in_sgs,
1406                                        void *data,
1407                                        void *ctx,
1408                                        gfp_t gfp)
1409 {
1410         struct vring_virtqueue *vq = to_vvq(_vq);
1411         struct vring_packed_desc *desc;
1412         struct scatterlist *sg;
1413         unsigned int i, n, c, descs_used, err_idx;
1414         __le16 head_flags, flags;
1415         u16 head, id, prev, curr, avail_used_flags;
1416         int err;
1417
1418         START_USE(vq);
1419
1420         BUG_ON(data == NULL);
1421         BUG_ON(ctx && vq->indirect);
1422
1423         if (unlikely(vq->broken)) {
1424                 END_USE(vq);
1425                 return -EIO;
1426         }
1427
1428         LAST_ADD_TIME_UPDATE(vq);
1429
1430         BUG_ON(total_sg == 0);
1431
1432         if (virtqueue_use_indirect(vq, total_sg)) {
1433                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1434                                                     in_sgs, data, gfp);
1435                 if (err != -ENOMEM) {
1436                         END_USE(vq);
1437                         return err;
1438                 }
1439
1440                 /* fall back on direct */
1441         }
1442
1443         head = vq->packed.next_avail_idx;
1444         avail_used_flags = vq->packed.avail_used_flags;
1445
1446         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1447
1448         desc = vq->packed.vring.desc;
1449         i = head;
1450         descs_used = total_sg;
1451
1452         if (unlikely(vq->vq.num_free < descs_used)) {
1453                 pr_debug("Can't add buf len %i - avail = %i\n",
1454                          descs_used, vq->vq.num_free);
1455                 END_USE(vq);
1456                 return -ENOSPC;
1457         }
1458
1459         id = vq->free_head;
1460         BUG_ON(id == vq->packed.vring.num);
1461
1462         curr = id;
1463         c = 0;
1464         for (n = 0; n < out_sgs + in_sgs; n++) {
1465                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1466                         dma_addr_t addr;
1467
1468                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1469                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1470                                 goto unmap_release;
1471
1472                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1473                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1474                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1475                         if (i == head)
1476                                 head_flags = flags;
1477                         else
1478                                 desc[i].flags = flags;
1479
1480                         desc[i].addr = cpu_to_le64(addr);
1481                         desc[i].len = cpu_to_le32(sg->length);
1482                         desc[i].id = cpu_to_le16(id);
1483
1484                         if (unlikely(vq->do_unmap)) {
1485                                 vq->packed.desc_extra[curr].addr = addr;
1486                                 vq->packed.desc_extra[curr].len = sg->length;
1487                                 vq->packed.desc_extra[curr].flags =
1488                                         le16_to_cpu(flags);
1489                         }
1490                         prev = curr;
1491                         curr = vq->packed.desc_extra[curr].next;
1492
1493                         if ((unlikely(++i >= vq->packed.vring.num))) {
1494                                 i = 0;
1495                                 vq->packed.avail_used_flags ^=
1496                                         1 << VRING_PACKED_DESC_F_AVAIL |
1497                                         1 << VRING_PACKED_DESC_F_USED;
1498                         }
1499                 }
1500         }
1501
1502         if (i < head)
1503                 vq->packed.avail_wrap_counter ^= 1;
1504
1505         /* We're using some buffers from the free list. */
1506         vq->vq.num_free -= descs_used;
1507
1508         /* Update free pointer */
1509         vq->packed.next_avail_idx = i;
1510         vq->free_head = curr;
1511
1512         /* Store token. */
1513         vq->packed.desc_state[id].num = descs_used;
1514         vq->packed.desc_state[id].data = data;
1515         vq->packed.desc_state[id].indir_desc = ctx;
1516         vq->packed.desc_state[id].last = prev;
1517
1518         /*
1519          * A driver MUST NOT make the first descriptor in the list
1520          * available before all subsequent descriptors comprising
1521          * the list are made available.
1522          */
1523         virtio_wmb(vq->weak_barriers);
1524         vq->packed.vring.desc[head].flags = head_flags;
1525         vq->num_added += descs_used;
1526
1527         pr_debug("Added buffer head %i to %p\n", head, vq);
1528         END_USE(vq);
1529
1530         return 0;
1531
1532 unmap_release:
1533         err_idx = i;
1534         i = head;
1535         curr = vq->free_head;
1536
1537         vq->packed.avail_used_flags = avail_used_flags;
1538
1539         for (n = 0; n < total_sg; n++) {
1540                 if (i == err_idx)
1541                         break;
1542                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1543                 curr = vq->packed.desc_extra[curr].next;
1544                 i++;
1545                 if (i >= vq->packed.vring.num)
1546                         i = 0;
1547         }
1548
1549         END_USE(vq);
1550         return -EIO;
1551 }
1552
1553 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1554 {
1555         struct vring_virtqueue *vq = to_vvq(_vq);
1556         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1557         bool needs_kick;
1558         union {
1559                 struct {
1560                         __le16 off_wrap;
1561                         __le16 flags;
1562                 };
1563                 u32 u32;
1564         } snapshot;
1565
1566         START_USE(vq);
1567
1568         /*
1569          * We need to expose the new flags value before checking notification
1570          * suppressions.
1571          */
1572         virtio_mb(vq->weak_barriers);
1573
1574         old = vq->packed.next_avail_idx - vq->num_added;
1575         new = vq->packed.next_avail_idx;
1576         vq->num_added = 0;
1577
1578         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1579         flags = le16_to_cpu(snapshot.flags);
1580
1581         LAST_ADD_TIME_CHECK(vq);
1582         LAST_ADD_TIME_INVALID(vq);
1583
1584         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1585                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1586                 goto out;
1587         }
1588
1589         off_wrap = le16_to_cpu(snapshot.off_wrap);
1590
1591         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1592         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1593         if (wrap_counter != vq->packed.avail_wrap_counter)
1594                 event_idx -= vq->packed.vring.num;
1595
1596         needs_kick = vring_need_event(event_idx, new, old);
1597 out:
1598         END_USE(vq);
1599         return needs_kick;
1600 }
1601
1602 static void detach_buf_packed(struct vring_virtqueue *vq,
1603                               unsigned int id, void **ctx)
1604 {
1605         struct vring_desc_state_packed *state = NULL;
1606         struct vring_packed_desc *desc;
1607         unsigned int i, curr;
1608
1609         state = &vq->packed.desc_state[id];
1610
1611         /* Clear data ptr. */
1612         state->data = NULL;
1613
1614         vq->packed.desc_extra[state->last].next = vq->free_head;
1615         vq->free_head = id;
1616         vq->vq.num_free += state->num;
1617
1618         if (unlikely(vq->do_unmap)) {
1619                 curr = id;
1620                 for (i = 0; i < state->num; i++) {
1621                         vring_unmap_extra_packed(vq,
1622                                                  &vq->packed.desc_extra[curr]);
1623                         curr = vq->packed.desc_extra[curr].next;
1624                 }
1625         }
1626
1627         if (vq->indirect) {
1628                 u32 len;
1629
1630                 /* Free the indirect table, if any, now that it's unmapped. */
1631                 desc = state->indir_desc;
1632                 if (!desc)
1633                         return;
1634
1635                 if (vq->do_unmap) {
1636                         len = vq->packed.desc_extra[id].len;
1637                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1638                                         i++)
1639                                 vring_unmap_desc_packed(vq, &desc[i]);
1640                 }
1641                 kfree(desc);
1642                 state->indir_desc = NULL;
1643         } else if (ctx) {
1644                 *ctx = state->indir_desc;
1645         }
1646 }
1647
1648 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1649                                        u16 idx, bool used_wrap_counter)
1650 {
1651         bool avail, used;
1652         u16 flags;
1653
1654         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1655         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1656         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1657
1658         return avail == used && used == used_wrap_counter;
1659 }
1660
1661 static bool more_used_packed(const struct vring_virtqueue *vq)
1662 {
1663         u16 last_used;
1664         u16 last_used_idx;
1665         bool used_wrap_counter;
1666
1667         last_used_idx = READ_ONCE(vq->last_used_idx);
1668         last_used = packed_last_used(last_used_idx);
1669         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1670         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1671 }
1672
1673 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1674                                           unsigned int *len,
1675                                           void **ctx)
1676 {
1677         struct vring_virtqueue *vq = to_vvq(_vq);
1678         u16 last_used, id, last_used_idx;
1679         bool used_wrap_counter;
1680         void *ret;
1681
1682         START_USE(vq);
1683
1684         if (unlikely(vq->broken)) {
1685                 END_USE(vq);
1686                 return NULL;
1687         }
1688
1689         if (!more_used_packed(vq)) {
1690                 pr_debug("No more buffers in queue\n");
1691                 END_USE(vq);
1692                 return NULL;
1693         }
1694
1695         /* Only get used elements after they have been exposed by host. */
1696         virtio_rmb(vq->weak_barriers);
1697
1698         last_used_idx = READ_ONCE(vq->last_used_idx);
1699         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1700         last_used = packed_last_used(last_used_idx);
1701         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1702         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1703
1704         if (unlikely(id >= vq->packed.vring.num)) {
1705                 BAD_RING(vq, "id %u out of range\n", id);
1706                 return NULL;
1707         }
1708         if (unlikely(!vq->packed.desc_state[id].data)) {
1709                 BAD_RING(vq, "id %u is not a head!\n", id);
1710                 return NULL;
1711         }
1712
1713         /* detach_buf_packed clears data, so grab it now. */
1714         ret = vq->packed.desc_state[id].data;
1715         detach_buf_packed(vq, id, ctx);
1716
1717         last_used += vq->packed.desc_state[id].num;
1718         if (unlikely(last_used >= vq->packed.vring.num)) {
1719                 last_used -= vq->packed.vring.num;
1720                 used_wrap_counter ^= 1;
1721         }
1722
1723         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1724         WRITE_ONCE(vq->last_used_idx, last_used);
1725
1726         /*
1727          * If we expect an interrupt for the next entry, tell host
1728          * by writing event index and flush out the write before
1729          * the read in the next get_buf call.
1730          */
1731         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1732                 virtio_store_mb(vq->weak_barriers,
1733                                 &vq->packed.vring.driver->off_wrap,
1734                                 cpu_to_le16(vq->last_used_idx));
1735
1736         LAST_ADD_TIME_INVALID(vq);
1737
1738         END_USE(vq);
1739         return ret;
1740 }
1741
1742 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1743 {
1744         struct vring_virtqueue *vq = to_vvq(_vq);
1745
1746         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1747                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1748
1749                 /*
1750                  * If device triggered an event already it won't trigger one again:
1751                  * no need to disable.
1752                  */
1753                 if (vq->event_triggered)
1754                         return;
1755
1756                 vq->packed.vring.driver->flags =
1757                         cpu_to_le16(vq->packed.event_flags_shadow);
1758         }
1759 }
1760
1761 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1762 {
1763         struct vring_virtqueue *vq = to_vvq(_vq);
1764
1765         START_USE(vq);
1766
1767         /*
1768          * We optimistically turn back on interrupts, then check if there was
1769          * more to do.
1770          */
1771
1772         if (vq->event) {
1773                 vq->packed.vring.driver->off_wrap =
1774                         cpu_to_le16(vq->last_used_idx);
1775                 /*
1776                  * We need to update event offset and event wrap
1777                  * counter first before updating event flags.
1778                  */
1779                 virtio_wmb(vq->weak_barriers);
1780         }
1781
1782         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1783                 vq->packed.event_flags_shadow = vq->event ?
1784                                 VRING_PACKED_EVENT_FLAG_DESC :
1785                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1786                 vq->packed.vring.driver->flags =
1787                                 cpu_to_le16(vq->packed.event_flags_shadow);
1788         }
1789
1790         END_USE(vq);
1791         return vq->last_used_idx;
1792 }
1793
1794 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1795 {
1796         struct vring_virtqueue *vq = to_vvq(_vq);
1797         bool wrap_counter;
1798         u16 used_idx;
1799
1800         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1801         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1802
1803         return is_used_desc_packed(vq, used_idx, wrap_counter);
1804 }
1805
1806 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1807 {
1808         struct vring_virtqueue *vq = to_vvq(_vq);
1809         u16 used_idx, wrap_counter, last_used_idx;
1810         u16 bufs;
1811
1812         START_USE(vq);
1813
1814         /*
1815          * We optimistically turn back on interrupts, then check if there was
1816          * more to do.
1817          */
1818
1819         if (vq->event) {
1820                 /* TODO: tune this threshold */
1821                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1822                 last_used_idx = READ_ONCE(vq->last_used_idx);
1823                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1824
1825                 used_idx = packed_last_used(last_used_idx) + bufs;
1826                 if (used_idx >= vq->packed.vring.num) {
1827                         used_idx -= vq->packed.vring.num;
1828                         wrap_counter ^= 1;
1829                 }
1830
1831                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1832                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1833
1834                 /*
1835                  * We need to update event offset and event wrap
1836                  * counter first before updating event flags.
1837                  */
1838                 virtio_wmb(vq->weak_barriers);
1839         }
1840
1841         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1842                 vq->packed.event_flags_shadow = vq->event ?
1843                                 VRING_PACKED_EVENT_FLAG_DESC :
1844                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1845                 vq->packed.vring.driver->flags =
1846                                 cpu_to_le16(vq->packed.event_flags_shadow);
1847         }
1848
1849         /*
1850          * We need to update event suppression structure first
1851          * before re-checking for more used buffers.
1852          */
1853         virtio_mb(vq->weak_barriers);
1854
1855         last_used_idx = READ_ONCE(vq->last_used_idx);
1856         wrap_counter = packed_used_wrap_counter(last_used_idx);
1857         used_idx = packed_last_used(last_used_idx);
1858         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1859                 END_USE(vq);
1860                 return false;
1861         }
1862
1863         END_USE(vq);
1864         return true;
1865 }
1866
1867 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1868 {
1869         struct vring_virtqueue *vq = to_vvq(_vq);
1870         unsigned int i;
1871         void *buf;
1872
1873         START_USE(vq);
1874
1875         for (i = 0; i < vq->packed.vring.num; i++) {
1876                 if (!vq->packed.desc_state[i].data)
1877                         continue;
1878                 /* detach_buf clears data, so grab it now. */
1879                 buf = vq->packed.desc_state[i].data;
1880                 detach_buf_packed(vq, i, NULL);
1881                 END_USE(vq);
1882                 return buf;
1883         }
1884         /* That should have freed everything. */
1885         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1886
1887         END_USE(vq);
1888         return NULL;
1889 }
1890
1891 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1892 {
1893         struct vring_desc_extra *desc_extra;
1894         unsigned int i;
1895
1896         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1897                                    GFP_KERNEL);
1898         if (!desc_extra)
1899                 return NULL;
1900
1901         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1902
1903         for (i = 0; i < num - 1; i++)
1904                 desc_extra[i].next = i + 1;
1905
1906         return desc_extra;
1907 }
1908
1909 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1910                               struct virtio_device *vdev,
1911                               struct device *dma_dev)
1912 {
1913         if (vring_packed->vring.desc)
1914                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1915                                  vring_packed->vring.desc,
1916                                  vring_packed->ring_dma_addr,
1917                                  dma_dev);
1918
1919         if (vring_packed->vring.driver)
1920                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1921                                  vring_packed->vring.driver,
1922                                  vring_packed->driver_event_dma_addr,
1923                                  dma_dev);
1924
1925         if (vring_packed->vring.device)
1926                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1927                                  vring_packed->vring.device,
1928                                  vring_packed->device_event_dma_addr,
1929                                  dma_dev);
1930
1931         kfree(vring_packed->desc_state);
1932         kfree(vring_packed->desc_extra);
1933 }
1934
1935 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1936                                     struct virtio_device *vdev,
1937                                     u32 num, struct device *dma_dev)
1938 {
1939         struct vring_packed_desc *ring;
1940         struct vring_packed_desc_event *driver, *device;
1941         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1942         size_t ring_size_in_bytes, event_size_in_bytes;
1943
1944         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1945
1946         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1947                                  &ring_dma_addr,
1948                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1949                                  dma_dev);
1950         if (!ring)
1951                 goto err;
1952
1953         vring_packed->vring.desc         = ring;
1954         vring_packed->ring_dma_addr      = ring_dma_addr;
1955         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1956
1957         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1958
1959         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1960                                    &driver_event_dma_addr,
1961                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1962                                    dma_dev);
1963         if (!driver)
1964                 goto err;
1965
1966         vring_packed->vring.driver          = driver;
1967         vring_packed->event_size_in_bytes   = event_size_in_bytes;
1968         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1969
1970         device = vring_alloc_queue(vdev, event_size_in_bytes,
1971                                    &device_event_dma_addr,
1972                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1973                                    dma_dev);
1974         if (!device)
1975                 goto err;
1976
1977         vring_packed->vring.device          = device;
1978         vring_packed->device_event_dma_addr = device_event_dma_addr;
1979
1980         vring_packed->vring.num = num;
1981
1982         return 0;
1983
1984 err:
1985         vring_free_packed(vring_packed, vdev, dma_dev);
1986         return -ENOMEM;
1987 }
1988
1989 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1990 {
1991         struct vring_desc_state_packed *state;
1992         struct vring_desc_extra *extra;
1993         u32 num = vring_packed->vring.num;
1994
1995         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1996         if (!state)
1997                 goto err_desc_state;
1998
1999         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2000
2001         extra = vring_alloc_desc_extra(num);
2002         if (!extra)
2003                 goto err_desc_extra;
2004
2005         vring_packed->desc_state = state;
2006         vring_packed->desc_extra = extra;
2007
2008         return 0;
2009
2010 err_desc_extra:
2011         kfree(state);
2012 err_desc_state:
2013         return -ENOMEM;
2014 }
2015
2016 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2017                                         bool callback)
2018 {
2019         vring_packed->next_avail_idx = 0;
2020         vring_packed->avail_wrap_counter = 1;
2021         vring_packed->event_flags_shadow = 0;
2022         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2023
2024         /* No callback?  Tell other side not to bother us. */
2025         if (!callback) {
2026                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2027                 vring_packed->vring.driver->flags =
2028                         cpu_to_le16(vring_packed->event_flags_shadow);
2029         }
2030 }
2031
2032 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2033                                           struct vring_virtqueue_packed *vring_packed)
2034 {
2035         vq->packed = *vring_packed;
2036
2037         /* Put everything in free lists. */
2038         vq->free_head = 0;
2039 }
2040
2041 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2042 {
2043         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2044         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2045
2046         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2047         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2048
2049         virtqueue_init(vq, vq->packed.vring.num);
2050         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2051 }
2052
2053 static struct virtqueue *vring_create_virtqueue_packed(
2054         unsigned int index,
2055         unsigned int num,
2056         unsigned int vring_align,
2057         struct virtio_device *vdev,
2058         bool weak_barriers,
2059         bool may_reduce_num,
2060         bool context,
2061         bool (*notify)(struct virtqueue *),
2062         void (*callback)(struct virtqueue *),
2063         const char *name,
2064         struct device *dma_dev)
2065 {
2066         struct vring_virtqueue_packed vring_packed = {};
2067         struct vring_virtqueue *vq;
2068         int err;
2069
2070         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2071                 goto err_ring;
2072
2073         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2074         if (!vq)
2075                 goto err_vq;
2076
2077         vq->vq.callback = callback;
2078         vq->vq.vdev = vdev;
2079         vq->vq.name = name;
2080         vq->vq.index = index;
2081         vq->vq.reset = false;
2082         vq->we_own_ring = true;
2083         vq->notify = notify;
2084         vq->weak_barriers = weak_barriers;
2085 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2086         vq->broken = true;
2087 #else
2088         vq->broken = false;
2089 #endif
2090         vq->packed_ring = true;
2091         vq->dma_dev = dma_dev;
2092         vq->use_dma_api = vring_use_dma_api(vdev);
2093         vq->premapped = false;
2094         vq->do_unmap = vq->use_dma_api;
2095
2096         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2097                 !context;
2098         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2099
2100         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2101                 vq->weak_barriers = false;
2102
2103         err = vring_alloc_state_extra_packed(&vring_packed);
2104         if (err)
2105                 goto err_state_extra;
2106
2107         virtqueue_vring_init_packed(&vring_packed, !!callback);
2108
2109         virtqueue_init(vq, num);
2110         virtqueue_vring_attach_packed(vq, &vring_packed);
2111
2112         spin_lock(&vdev->vqs_list_lock);
2113         list_add_tail(&vq->vq.list, &vdev->vqs);
2114         spin_unlock(&vdev->vqs_list_lock);
2115         return &vq->vq;
2116
2117 err_state_extra:
2118         kfree(vq);
2119 err_vq:
2120         vring_free_packed(&vring_packed, vdev, dma_dev);
2121 err_ring:
2122         return NULL;
2123 }
2124
2125 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2126 {
2127         struct vring_virtqueue_packed vring_packed = {};
2128         struct vring_virtqueue *vq = to_vvq(_vq);
2129         struct virtio_device *vdev = _vq->vdev;
2130         int err;
2131
2132         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2133                 goto err_ring;
2134
2135         err = vring_alloc_state_extra_packed(&vring_packed);
2136         if (err)
2137                 goto err_state_extra;
2138
2139         vring_free(&vq->vq);
2140
2141         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2142
2143         virtqueue_init(vq, vring_packed.vring.num);
2144         virtqueue_vring_attach_packed(vq, &vring_packed);
2145
2146         return 0;
2147
2148 err_state_extra:
2149         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2150 err_ring:
2151         virtqueue_reinit_packed(vq);
2152         return -ENOMEM;
2153 }
2154
2155
2156 /*
2157  * Generic functions and exported symbols.
2158  */
2159
2160 static inline int virtqueue_add(struct virtqueue *_vq,
2161                                 struct scatterlist *sgs[],
2162                                 unsigned int total_sg,
2163                                 unsigned int out_sgs,
2164                                 unsigned int in_sgs,
2165                                 void *data,
2166                                 void *ctx,
2167                                 gfp_t gfp)
2168 {
2169         struct vring_virtqueue *vq = to_vvq(_vq);
2170
2171         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2172                                         out_sgs, in_sgs, data, ctx, gfp) :
2173                                  virtqueue_add_split(_vq, sgs, total_sg,
2174                                         out_sgs, in_sgs, data, ctx, gfp);
2175 }
2176
2177 /**
2178  * virtqueue_add_sgs - expose buffers to other end
2179  * @_vq: the struct virtqueue we're talking about.
2180  * @sgs: array of terminated scatterlists.
2181  * @out_sgs: the number of scatterlists readable by other side
2182  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2183  * @data: the token identifying the buffer.
2184  * @gfp: how to do memory allocations (if necessary).
2185  *
2186  * Caller must ensure we don't call this with other virtqueue operations
2187  * at the same time (except where noted).
2188  *
2189  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2190  */
2191 int virtqueue_add_sgs(struct virtqueue *_vq,
2192                       struct scatterlist *sgs[],
2193                       unsigned int out_sgs,
2194                       unsigned int in_sgs,
2195                       void *data,
2196                       gfp_t gfp)
2197 {
2198         unsigned int i, total_sg = 0;
2199
2200         /* Count them first. */
2201         for (i = 0; i < out_sgs + in_sgs; i++) {
2202                 struct scatterlist *sg;
2203
2204                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2205                         total_sg++;
2206         }
2207         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2208                              data, NULL, gfp);
2209 }
2210 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2211
2212 /**
2213  * virtqueue_add_outbuf - expose output buffers to other end
2214  * @vq: the struct virtqueue we're talking about.
2215  * @sg: scatterlist (must be well-formed and terminated!)
2216  * @num: the number of entries in @sg readable by other side
2217  * @data: the token identifying the buffer.
2218  * @gfp: how to do memory allocations (if necessary).
2219  *
2220  * Caller must ensure we don't call this with other virtqueue operations
2221  * at the same time (except where noted).
2222  *
2223  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2224  */
2225 int virtqueue_add_outbuf(struct virtqueue *vq,
2226                          struct scatterlist *sg, unsigned int num,
2227                          void *data,
2228                          gfp_t gfp)
2229 {
2230         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2231 }
2232 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2233
2234 /**
2235  * virtqueue_add_inbuf - expose input buffers to other end
2236  * @vq: the struct virtqueue we're talking about.
2237  * @sg: scatterlist (must be well-formed and terminated!)
2238  * @num: the number of entries in @sg writable by other side
2239  * @data: the token identifying the buffer.
2240  * @gfp: how to do memory allocations (if necessary).
2241  *
2242  * Caller must ensure we don't call this with other virtqueue operations
2243  * at the same time (except where noted).
2244  *
2245  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2246  */
2247 int virtqueue_add_inbuf(struct virtqueue *vq,
2248                         struct scatterlist *sg, unsigned int num,
2249                         void *data,
2250                         gfp_t gfp)
2251 {
2252         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2253 }
2254 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2255
2256 /**
2257  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2258  * @vq: the struct virtqueue we're talking about.
2259  * @sg: scatterlist (must be well-formed and terminated!)
2260  * @num: the number of entries in @sg writable by other side
2261  * @data: the token identifying the buffer.
2262  * @ctx: extra context for the token
2263  * @gfp: how to do memory allocations (if necessary).
2264  *
2265  * Caller must ensure we don't call this with other virtqueue operations
2266  * at the same time (except where noted).
2267  *
2268  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2269  */
2270 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2271                         struct scatterlist *sg, unsigned int num,
2272                         void *data,
2273                         void *ctx,
2274                         gfp_t gfp)
2275 {
2276         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2277 }
2278 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2279
2280 /**
2281  * virtqueue_dma_dev - get the dma dev
2282  * @_vq: the struct virtqueue we're talking about.
2283  *
2284  * Returns the dma dev. That can been used for dma api.
2285  */
2286 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2287 {
2288         struct vring_virtqueue *vq = to_vvq(_vq);
2289
2290         if (vq->use_dma_api)
2291                 return vring_dma_dev(vq);
2292         else
2293                 return NULL;
2294 }
2295 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2296
2297 /**
2298  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2299  * @_vq: the struct virtqueue
2300  *
2301  * Instead of virtqueue_kick(), you can do:
2302  *      if (virtqueue_kick_prepare(vq))
2303  *              virtqueue_notify(vq);
2304  *
2305  * This is sometimes useful because the virtqueue_kick_prepare() needs
2306  * to be serialized, but the actual virtqueue_notify() call does not.
2307  */
2308 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2309 {
2310         struct vring_virtqueue *vq = to_vvq(_vq);
2311
2312         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2313                                  virtqueue_kick_prepare_split(_vq);
2314 }
2315 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2316
2317 /**
2318  * virtqueue_notify - second half of split virtqueue_kick call.
2319  * @_vq: the struct virtqueue
2320  *
2321  * This does not need to be serialized.
2322  *
2323  * Returns false if host notify failed or queue is broken, otherwise true.
2324  */
2325 bool virtqueue_notify(struct virtqueue *_vq)
2326 {
2327         struct vring_virtqueue *vq = to_vvq(_vq);
2328
2329         if (unlikely(vq->broken))
2330                 return false;
2331
2332         /* Prod other side to tell it about changes. */
2333         if (!vq->notify(_vq)) {
2334                 vq->broken = true;
2335                 return false;
2336         }
2337         return true;
2338 }
2339 EXPORT_SYMBOL_GPL(virtqueue_notify);
2340
2341 /**
2342  * virtqueue_kick - update after add_buf
2343  * @vq: the struct virtqueue
2344  *
2345  * After one or more virtqueue_add_* calls, invoke this to kick
2346  * the other side.
2347  *
2348  * Caller must ensure we don't call this with other virtqueue
2349  * operations at the same time (except where noted).
2350  *
2351  * Returns false if kick failed, otherwise true.
2352  */
2353 bool virtqueue_kick(struct virtqueue *vq)
2354 {
2355         if (virtqueue_kick_prepare(vq))
2356                 return virtqueue_notify(vq);
2357         return true;
2358 }
2359 EXPORT_SYMBOL_GPL(virtqueue_kick);
2360
2361 /**
2362  * virtqueue_get_buf_ctx - get the next used buffer
2363  * @_vq: the struct virtqueue we're talking about.
2364  * @len: the length written into the buffer
2365  * @ctx: extra context for the token
2366  *
2367  * If the device wrote data into the buffer, @len will be set to the
2368  * amount written.  This means you don't need to clear the buffer
2369  * beforehand to ensure there's no data leakage in the case of short
2370  * writes.
2371  *
2372  * Caller must ensure we don't call this with other virtqueue
2373  * operations at the same time (except where noted).
2374  *
2375  * Returns NULL if there are no used buffers, or the "data" token
2376  * handed to virtqueue_add_*().
2377  */
2378 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2379                             void **ctx)
2380 {
2381         struct vring_virtqueue *vq = to_vvq(_vq);
2382
2383         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2384                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2385 }
2386 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2387
2388 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2389 {
2390         return virtqueue_get_buf_ctx(_vq, len, NULL);
2391 }
2392 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2393 /**
2394  * virtqueue_disable_cb - disable callbacks
2395  * @_vq: the struct virtqueue we're talking about.
2396  *
2397  * Note that this is not necessarily synchronous, hence unreliable and only
2398  * useful as an optimization.
2399  *
2400  * Unlike other operations, this need not be serialized.
2401  */
2402 void virtqueue_disable_cb(struct virtqueue *_vq)
2403 {
2404         struct vring_virtqueue *vq = to_vvq(_vq);
2405
2406         if (vq->packed_ring)
2407                 virtqueue_disable_cb_packed(_vq);
2408         else
2409                 virtqueue_disable_cb_split(_vq);
2410 }
2411 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2412
2413 /**
2414  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2415  * @_vq: the struct virtqueue we're talking about.
2416  *
2417  * This re-enables callbacks; it returns current queue state
2418  * in an opaque unsigned value. This value should be later tested by
2419  * virtqueue_poll, to detect a possible race between the driver checking for
2420  * more work, and enabling callbacks.
2421  *
2422  * Caller must ensure we don't call this with other virtqueue
2423  * operations at the same time (except where noted).
2424  */
2425 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2426 {
2427         struct vring_virtqueue *vq = to_vvq(_vq);
2428
2429         if (vq->event_triggered)
2430                 vq->event_triggered = false;
2431
2432         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2433                                  virtqueue_enable_cb_prepare_split(_vq);
2434 }
2435 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2436
2437 /**
2438  * virtqueue_poll - query pending used buffers
2439  * @_vq: the struct virtqueue we're talking about.
2440  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2441  *
2442  * Returns "true" if there are pending used buffers in the queue.
2443  *
2444  * This does not need to be serialized.
2445  */
2446 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2447 {
2448         struct vring_virtqueue *vq = to_vvq(_vq);
2449
2450         if (unlikely(vq->broken))
2451                 return false;
2452
2453         virtio_mb(vq->weak_barriers);
2454         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2455                                  virtqueue_poll_split(_vq, last_used_idx);
2456 }
2457 EXPORT_SYMBOL_GPL(virtqueue_poll);
2458
2459 /**
2460  * virtqueue_enable_cb - restart callbacks after disable_cb.
2461  * @_vq: the struct virtqueue we're talking about.
2462  *
2463  * This re-enables callbacks; it returns "false" if there are pending
2464  * buffers in the queue, to detect a possible race between the driver
2465  * checking for more work, and enabling callbacks.
2466  *
2467  * Caller must ensure we don't call this with other virtqueue
2468  * operations at the same time (except where noted).
2469  */
2470 bool virtqueue_enable_cb(struct virtqueue *_vq)
2471 {
2472         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2473
2474         return !virtqueue_poll(_vq, last_used_idx);
2475 }
2476 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2477
2478 /**
2479  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2480  * @_vq: the struct virtqueue we're talking about.
2481  *
2482  * This re-enables callbacks but hints to the other side to delay
2483  * interrupts until most of the available buffers have been processed;
2484  * it returns "false" if there are many pending buffers in the queue,
2485  * to detect a possible race between the driver checking for more work,
2486  * and enabling callbacks.
2487  *
2488  * Caller must ensure we don't call this with other virtqueue
2489  * operations at the same time (except where noted).
2490  */
2491 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2492 {
2493         struct vring_virtqueue *vq = to_vvq(_vq);
2494
2495         if (vq->event_triggered)
2496                 vq->event_triggered = false;
2497
2498         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2499                                  virtqueue_enable_cb_delayed_split(_vq);
2500 }
2501 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2502
2503 /**
2504  * virtqueue_detach_unused_buf - detach first unused buffer
2505  * @_vq: the struct virtqueue we're talking about.
2506  *
2507  * Returns NULL or the "data" token handed to virtqueue_add_*().
2508  * This is not valid on an active queue; it is useful for device
2509  * shutdown or the reset queue.
2510  */
2511 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2512 {
2513         struct vring_virtqueue *vq = to_vvq(_vq);
2514
2515         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2516                                  virtqueue_detach_unused_buf_split(_vq);
2517 }
2518 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2519
2520 static inline bool more_used(const struct vring_virtqueue *vq)
2521 {
2522         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2523 }
2524
2525 /**
2526  * vring_interrupt - notify a virtqueue on an interrupt
2527  * @irq: the IRQ number (ignored)
2528  * @_vq: the struct virtqueue to notify
2529  *
2530  * Calls the callback function of @_vq to process the virtqueue
2531  * notification.
2532  */
2533 irqreturn_t vring_interrupt(int irq, void *_vq)
2534 {
2535         struct vring_virtqueue *vq = to_vvq(_vq);
2536
2537         if (!more_used(vq)) {
2538                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2539                 return IRQ_NONE;
2540         }
2541
2542         if (unlikely(vq->broken)) {
2543 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2544                 dev_warn_once(&vq->vq.vdev->dev,
2545                               "virtio vring IRQ raised before DRIVER_OK");
2546                 return IRQ_NONE;
2547 #else
2548                 return IRQ_HANDLED;
2549 #endif
2550         }
2551
2552         /* Just a hint for performance: so it's ok that this can be racy! */
2553         if (vq->event)
2554                 vq->event_triggered = true;
2555
2556         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2557         if (vq->vq.callback)
2558                 vq->vq.callback(&vq->vq);
2559
2560         return IRQ_HANDLED;
2561 }
2562 EXPORT_SYMBOL_GPL(vring_interrupt);
2563
2564 /* Only available for split ring */
2565 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2566                                                struct vring_virtqueue_split *vring_split,
2567                                                struct virtio_device *vdev,
2568                                                bool weak_barriers,
2569                                                bool context,
2570                                                bool (*notify)(struct virtqueue *),
2571                                                void (*callback)(struct virtqueue *),
2572                                                const char *name,
2573                                                struct device *dma_dev)
2574 {
2575         struct vring_virtqueue *vq;
2576         int err;
2577
2578         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2579                 return NULL;
2580
2581         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2582         if (!vq)
2583                 return NULL;
2584
2585         vq->packed_ring = false;
2586         vq->vq.callback = callback;
2587         vq->vq.vdev = vdev;
2588         vq->vq.name = name;
2589         vq->vq.index = index;
2590         vq->vq.reset = false;
2591         vq->we_own_ring = false;
2592         vq->notify = notify;
2593         vq->weak_barriers = weak_barriers;
2594 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2595         vq->broken = true;
2596 #else
2597         vq->broken = false;
2598 #endif
2599         vq->dma_dev = dma_dev;
2600         vq->use_dma_api = vring_use_dma_api(vdev);
2601         vq->premapped = false;
2602         vq->do_unmap = vq->use_dma_api;
2603
2604         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2605                 !context;
2606         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2607
2608         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2609                 vq->weak_barriers = false;
2610
2611         err = vring_alloc_state_extra_split(vring_split);
2612         if (err) {
2613                 kfree(vq);
2614                 return NULL;
2615         }
2616
2617         virtqueue_vring_init_split(vring_split, vq);
2618
2619         virtqueue_init(vq, vring_split->vring.num);
2620         virtqueue_vring_attach_split(vq, vring_split);
2621
2622         spin_lock(&vdev->vqs_list_lock);
2623         list_add_tail(&vq->vq.list, &vdev->vqs);
2624         spin_unlock(&vdev->vqs_list_lock);
2625         return &vq->vq;
2626 }
2627
2628 struct virtqueue *vring_create_virtqueue(
2629         unsigned int index,
2630         unsigned int num,
2631         unsigned int vring_align,
2632         struct virtio_device *vdev,
2633         bool weak_barriers,
2634         bool may_reduce_num,
2635         bool context,
2636         bool (*notify)(struct virtqueue *),
2637         void (*callback)(struct virtqueue *),
2638         const char *name)
2639 {
2640
2641         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2642                 return vring_create_virtqueue_packed(index, num, vring_align,
2643                                 vdev, weak_barriers, may_reduce_num,
2644                                 context, notify, callback, name, vdev->dev.parent);
2645
2646         return vring_create_virtqueue_split(index, num, vring_align,
2647                         vdev, weak_barriers, may_reduce_num,
2648                         context, notify, callback, name, vdev->dev.parent);
2649 }
2650 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2651
2652 struct virtqueue *vring_create_virtqueue_dma(
2653         unsigned int index,
2654         unsigned int num,
2655         unsigned int vring_align,
2656         struct virtio_device *vdev,
2657         bool weak_barriers,
2658         bool may_reduce_num,
2659         bool context,
2660         bool (*notify)(struct virtqueue *),
2661         void (*callback)(struct virtqueue *),
2662         const char *name,
2663         struct device *dma_dev)
2664 {
2665
2666         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2667                 return vring_create_virtqueue_packed(index, num, vring_align,
2668                                 vdev, weak_barriers, may_reduce_num,
2669                                 context, notify, callback, name, dma_dev);
2670
2671         return vring_create_virtqueue_split(index, num, vring_align,
2672                         vdev, weak_barriers, may_reduce_num,
2673                         context, notify, callback, name, dma_dev);
2674 }
2675 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2676
2677 /**
2678  * virtqueue_resize - resize the vring of vq
2679  * @_vq: the struct virtqueue we're talking about.
2680  * @num: new ring num
2681  * @recycle: callback to recycle unused buffers
2682  *
2683  * When it is really necessary to create a new vring, it will set the current vq
2684  * into the reset state. Then call the passed callback to recycle the buffer
2685  * that is no longer used. Only after the new vring is successfully created, the
2686  * old vring will be released.
2687  *
2688  * Caller must ensure we don't call this with other virtqueue operations
2689  * at the same time (except where noted).
2690  *
2691  * Returns zero or a negative error.
2692  * 0: success.
2693  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2694  *  vq can still work normally
2695  * -EBUSY: Failed to sync with device, vq may not work properly
2696  * -ENOENT: Transport or device not supported
2697  * -E2BIG/-EINVAL: num error
2698  * -EPERM: Operation not permitted
2699  *
2700  */
2701 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2702                      void (*recycle)(struct virtqueue *vq, void *buf))
2703 {
2704         struct vring_virtqueue *vq = to_vvq(_vq);
2705         struct virtio_device *vdev = vq->vq.vdev;
2706         void *buf;
2707         int err;
2708
2709         if (!vq->we_own_ring)
2710                 return -EPERM;
2711
2712         if (num > vq->vq.num_max)
2713                 return -E2BIG;
2714
2715         if (!num)
2716                 return -EINVAL;
2717
2718         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2719                 return 0;
2720
2721         if (!vdev->config->disable_vq_and_reset)
2722                 return -ENOENT;
2723
2724         if (!vdev->config->enable_vq_after_reset)
2725                 return -ENOENT;
2726
2727         err = vdev->config->disable_vq_and_reset(_vq);
2728         if (err)
2729                 return err;
2730
2731         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2732                 recycle(_vq, buf);
2733
2734         if (vq->packed_ring)
2735                 err = virtqueue_resize_packed(_vq, num);
2736         else
2737                 err = virtqueue_resize_split(_vq, num);
2738
2739         if (vdev->config->enable_vq_after_reset(_vq))
2740                 return -EBUSY;
2741
2742         return err;
2743 }
2744 EXPORT_SYMBOL_GPL(virtqueue_resize);
2745
2746 /**
2747  * virtqueue_set_dma_premapped - set the vring premapped mode
2748  * @_vq: the struct virtqueue we're talking about.
2749  *
2750  * Enable the premapped mode of the vq.
2751  *
2752  * The vring in premapped mode does not do dma internally, so the driver must
2753  * do dma mapping in advance. The driver must pass the dma_address through
2754  * dma_address of scatterlist. When the driver got a used buffer from
2755  * the vring, it has to unmap the dma address.
2756  *
2757  * This function must be called immediately after creating the vq, or after vq
2758  * reset, and before adding any buffers to it.
2759  *
2760  * Caller must ensure we don't call this with other virtqueue operations
2761  * at the same time (except where noted).
2762  *
2763  * Returns zero or a negative error.
2764  * 0: success.
2765  * -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
2766  */
2767 int virtqueue_set_dma_premapped(struct virtqueue *_vq)
2768 {
2769         struct vring_virtqueue *vq = to_vvq(_vq);
2770         u32 num;
2771
2772         START_USE(vq);
2773
2774         num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2775
2776         if (num != vq->vq.num_free) {
2777                 END_USE(vq);
2778                 return -EINVAL;
2779         }
2780
2781         if (!vq->use_dma_api) {
2782                 END_USE(vq);
2783                 return -EINVAL;
2784         }
2785
2786         vq->premapped = true;
2787         vq->do_unmap = false;
2788
2789         END_USE(vq);
2790
2791         return 0;
2792 }
2793 EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
2794
2795 /* Only available for split ring */
2796 struct virtqueue *vring_new_virtqueue(unsigned int index,
2797                                       unsigned int num,
2798                                       unsigned int vring_align,
2799                                       struct virtio_device *vdev,
2800                                       bool weak_barriers,
2801                                       bool context,
2802                                       void *pages,
2803                                       bool (*notify)(struct virtqueue *vq),
2804                                       void (*callback)(struct virtqueue *vq),
2805                                       const char *name)
2806 {
2807         struct vring_virtqueue_split vring_split = {};
2808
2809         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2810                 return NULL;
2811
2812         vring_init(&vring_split.vring, num, pages, vring_align);
2813         return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2814                                      context, notify, callback, name,
2815                                      vdev->dev.parent);
2816 }
2817 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2818
2819 static void vring_free(struct virtqueue *_vq)
2820 {
2821         struct vring_virtqueue *vq = to_vvq(_vq);
2822
2823         if (vq->we_own_ring) {
2824                 if (vq->packed_ring) {
2825                         vring_free_queue(vq->vq.vdev,
2826                                          vq->packed.ring_size_in_bytes,
2827                                          vq->packed.vring.desc,
2828                                          vq->packed.ring_dma_addr,
2829                                          vring_dma_dev(vq));
2830
2831                         vring_free_queue(vq->vq.vdev,
2832                                          vq->packed.event_size_in_bytes,
2833                                          vq->packed.vring.driver,
2834                                          vq->packed.driver_event_dma_addr,
2835                                          vring_dma_dev(vq));
2836
2837                         vring_free_queue(vq->vq.vdev,
2838                                          vq->packed.event_size_in_bytes,
2839                                          vq->packed.vring.device,
2840                                          vq->packed.device_event_dma_addr,
2841                                          vring_dma_dev(vq));
2842
2843                         kfree(vq->packed.desc_state);
2844                         kfree(vq->packed.desc_extra);
2845                 } else {
2846                         vring_free_queue(vq->vq.vdev,
2847                                          vq->split.queue_size_in_bytes,
2848                                          vq->split.vring.desc,
2849                                          vq->split.queue_dma_addr,
2850                                          vring_dma_dev(vq));
2851                 }
2852         }
2853         if (!vq->packed_ring) {
2854                 kfree(vq->split.desc_state);
2855                 kfree(vq->split.desc_extra);
2856         }
2857 }
2858
2859 void vring_del_virtqueue(struct virtqueue *_vq)
2860 {
2861         struct vring_virtqueue *vq = to_vvq(_vq);
2862
2863         spin_lock(&vq->vq.vdev->vqs_list_lock);
2864         list_del(&_vq->list);
2865         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2866
2867         vring_free(_vq);
2868
2869         kfree(vq);
2870 }
2871 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2872
2873 u32 vring_notification_data(struct virtqueue *_vq)
2874 {
2875         struct vring_virtqueue *vq = to_vvq(_vq);
2876         u16 next;
2877
2878         if (vq->packed_ring)
2879                 next = (vq->packed.next_avail_idx &
2880                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2881                         vq->packed.avail_wrap_counter <<
2882                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2883         else
2884                 next = vq->split.avail_idx_shadow;
2885
2886         return next << 16 | _vq->index;
2887 }
2888 EXPORT_SYMBOL_GPL(vring_notification_data);
2889
2890 /* Manipulates transport-specific feature bits. */
2891 void vring_transport_features(struct virtio_device *vdev)
2892 {
2893         unsigned int i;
2894
2895         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2896                 switch (i) {
2897                 case VIRTIO_RING_F_INDIRECT_DESC:
2898                         break;
2899                 case VIRTIO_RING_F_EVENT_IDX:
2900                         break;
2901                 case VIRTIO_F_VERSION_1:
2902                         break;
2903                 case VIRTIO_F_ACCESS_PLATFORM:
2904                         break;
2905                 case VIRTIO_F_RING_PACKED:
2906                         break;
2907                 case VIRTIO_F_ORDER_PLATFORM:
2908                         break;
2909                 case VIRTIO_F_NOTIFICATION_DATA:
2910                         break;
2911                 default:
2912                         /* We don't understand this bit. */
2913                         __virtio_clear_bit(vdev, i);
2914                 }
2915         }
2916 }
2917 EXPORT_SYMBOL_GPL(vring_transport_features);
2918
2919 /**
2920  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2921  * @_vq: the struct virtqueue containing the vring of interest.
2922  *
2923  * Returns the size of the vring.  This is mainly used for boasting to
2924  * userspace.  Unlike other operations, this need not be serialized.
2925  */
2926 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2927 {
2928
2929         const struct vring_virtqueue *vq = to_vvq(_vq);
2930
2931         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2932 }
2933 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2934
2935 /*
2936  * This function should only be called by the core, not directly by the driver.
2937  */
2938 void __virtqueue_break(struct virtqueue *_vq)
2939 {
2940         struct vring_virtqueue *vq = to_vvq(_vq);
2941
2942         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2943         WRITE_ONCE(vq->broken, true);
2944 }
2945 EXPORT_SYMBOL_GPL(__virtqueue_break);
2946
2947 /*
2948  * This function should only be called by the core, not directly by the driver.
2949  */
2950 void __virtqueue_unbreak(struct virtqueue *_vq)
2951 {
2952         struct vring_virtqueue *vq = to_vvq(_vq);
2953
2954         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2955         WRITE_ONCE(vq->broken, false);
2956 }
2957 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2958
2959 bool virtqueue_is_broken(const struct virtqueue *_vq)
2960 {
2961         const struct vring_virtqueue *vq = to_vvq(_vq);
2962
2963         return READ_ONCE(vq->broken);
2964 }
2965 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2966
2967 /*
2968  * This should prevent the device from being used, allowing drivers to
2969  * recover.  You may need to grab appropriate locks to flush.
2970  */
2971 void virtio_break_device(struct virtio_device *dev)
2972 {
2973         struct virtqueue *_vq;
2974
2975         spin_lock(&dev->vqs_list_lock);
2976         list_for_each_entry(_vq, &dev->vqs, list) {
2977                 struct vring_virtqueue *vq = to_vvq(_vq);
2978
2979                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2980                 WRITE_ONCE(vq->broken, true);
2981         }
2982         spin_unlock(&dev->vqs_list_lock);
2983 }
2984 EXPORT_SYMBOL_GPL(virtio_break_device);
2985
2986 /*
2987  * This should allow the device to be used by the driver. You may
2988  * need to grab appropriate locks to flush the write to
2989  * vq->broken. This should only be used in some specific case e.g
2990  * (probing and restoring). This function should only be called by the
2991  * core, not directly by the driver.
2992  */
2993 void __virtio_unbreak_device(struct virtio_device *dev)
2994 {
2995         struct virtqueue *_vq;
2996
2997         spin_lock(&dev->vqs_list_lock);
2998         list_for_each_entry(_vq, &dev->vqs, list) {
2999                 struct vring_virtqueue *vq = to_vvq(_vq);
3000
3001                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3002                 WRITE_ONCE(vq->broken, false);
3003         }
3004         spin_unlock(&dev->vqs_list_lock);
3005 }
3006 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3007
3008 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3009 {
3010         const struct vring_virtqueue *vq = to_vvq(_vq);
3011
3012         BUG_ON(!vq->we_own_ring);
3013
3014         if (vq->packed_ring)
3015                 return vq->packed.ring_dma_addr;
3016
3017         return vq->split.queue_dma_addr;
3018 }
3019 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3020
3021 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3022 {
3023         const struct vring_virtqueue *vq = to_vvq(_vq);
3024
3025         BUG_ON(!vq->we_own_ring);
3026
3027         if (vq->packed_ring)
3028                 return vq->packed.driver_event_dma_addr;
3029
3030         return vq->split.queue_dma_addr +
3031                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3032 }
3033 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3034
3035 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3036 {
3037         const struct vring_virtqueue *vq = to_vvq(_vq);
3038
3039         BUG_ON(!vq->we_own_ring);
3040
3041         if (vq->packed_ring)
3042                 return vq->packed.device_event_dma_addr;
3043
3044         return vq->split.queue_dma_addr +
3045                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3046 }
3047 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3048
3049 /* Only available for split ring */
3050 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3051 {
3052         return &to_vvq(vq)->split.vring;
3053 }
3054 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3055
3056 MODULE_LICENSE("GPL");