Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[platform/kernel/linux-rpi.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
73 };
74
75 struct vring_desc_state_packed {
76         void *data;                     /* Data for callback. */
77         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
78         u16 num;                        /* Descriptor list length. */
79         u16 last;                       /* The last desc state in a list. */
80 };
81
82 struct vring_desc_extra {
83         dma_addr_t addr;                /* Descriptor DMA addr. */
84         u32 len;                        /* Descriptor length. */
85         u16 flags;                      /* Descriptor flags. */
86         u16 next;                       /* The next desc state in a list. */
87 };
88
89 struct vring_virtqueue_split {
90         /* Actual memory layout for this queue. */
91         struct vring vring;
92
93         /* Last written value to avail->flags */
94         u16 avail_flags_shadow;
95
96         /*
97          * Last written value to avail->idx in
98          * guest byte order.
99          */
100         u16 avail_idx_shadow;
101
102         /* Per-descriptor state. */
103         struct vring_desc_state_split *desc_state;
104         struct vring_desc_extra *desc_extra;
105
106         /* DMA address and size information */
107         dma_addr_t queue_dma_addr;
108         size_t queue_size_in_bytes;
109
110         /*
111          * The parameters for creating vrings are reserved for creating new
112          * vring.
113          */
114         u32 vring_align;
115         bool may_reduce_num;
116 };
117
118 struct vring_virtqueue_packed {
119         /* Actual memory layout for this queue. */
120         struct {
121                 unsigned int num;
122                 struct vring_packed_desc *desc;
123                 struct vring_packed_desc_event *driver;
124                 struct vring_packed_desc_event *device;
125         } vring;
126
127         /* Driver ring wrap counter. */
128         bool avail_wrap_counter;
129
130         /* Avail used flags. */
131         u16 avail_used_flags;
132
133         /* Index of the next avail descriptor. */
134         u16 next_avail_idx;
135
136         /*
137          * Last written value to driver->flags in
138          * guest byte order.
139          */
140         u16 event_flags_shadow;
141
142         /* Per-descriptor state. */
143         struct vring_desc_state_packed *desc_state;
144         struct vring_desc_extra *desc_extra;
145
146         /* DMA address and size information */
147         dma_addr_t ring_dma_addr;
148         dma_addr_t driver_event_dma_addr;
149         dma_addr_t device_event_dma_addr;
150         size_t ring_size_in_bytes;
151         size_t event_size_in_bytes;
152 };
153
154 struct vring_virtqueue {
155         struct virtqueue vq;
156
157         /* Is this a packed ring? */
158         bool packed_ring;
159
160         /* Is DMA API used? */
161         bool use_dma_api;
162
163         /* Can we use weak barriers? */
164         bool weak_barriers;
165
166         /* Other side has made a mess, don't try any more. */
167         bool broken;
168
169         /* Host supports indirect buffers */
170         bool indirect;
171
172         /* Host publishes avail event idx */
173         bool event;
174
175         /* Do DMA mapping by driver */
176         bool premapped;
177
178         /* Do unmap or not for desc. Just when premapped is False and
179          * use_dma_api is true, this is true.
180          */
181         bool do_unmap;
182
183         /* Head of free buffer list. */
184         unsigned int free_head;
185         /* Number we've added since last sync. */
186         unsigned int num_added;
187
188         /* Last used index  we've seen.
189          * for split ring, it just contains last used index
190          * for packed ring:
191          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
192          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
193          */
194         u16 last_used_idx;
195
196         /* Hint for event idx: already triggered no need to disable. */
197         bool event_triggered;
198
199         union {
200                 /* Available for split ring */
201                 struct vring_virtqueue_split split;
202
203                 /* Available for packed ring */
204                 struct vring_virtqueue_packed packed;
205         };
206
207         /* How to notify other side. FIXME: commonalize hcalls! */
208         bool (*notify)(struct virtqueue *vq);
209
210         /* DMA, allocation, and size information */
211         bool we_own_ring;
212
213         /* Device used for doing DMA */
214         struct device *dma_dev;
215
216 #ifdef DEBUG
217         /* They're supposed to lock for us. */
218         unsigned int in_use;
219
220         /* Figure out if their kicks are too delayed. */
221         bool last_add_time_valid;
222         ktime_t last_add_time;
223 #endif
224 };
225
226 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
227                                                struct vring_virtqueue_split *vring_split,
228                                                struct virtio_device *vdev,
229                                                bool weak_barriers,
230                                                bool context,
231                                                bool (*notify)(struct virtqueue *),
232                                                void (*callback)(struct virtqueue *),
233                                                const char *name,
234                                                struct device *dma_dev);
235 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
236 static void vring_free(struct virtqueue *_vq);
237
238 /*
239  * Helpers.
240  */
241
242 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
243
244 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
245                                    unsigned int total_sg)
246 {
247         /*
248          * If the host supports indirect descriptor tables, and we have multiple
249          * buffers, then go indirect. FIXME: tune this threshold
250          */
251         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
252 }
253
254 /*
255  * Modern virtio devices have feature bits to specify whether they need a
256  * quirk and bypass the IOMMU. If not there, just use the DMA API.
257  *
258  * If there, the interaction between virtio and DMA API is messy.
259  *
260  * On most systems with virtio, physical addresses match bus addresses,
261  * and it doesn't particularly matter whether we use the DMA API.
262  *
263  * On some systems, including Xen and any system with a physical device
264  * that speaks virtio behind a physical IOMMU, we must use the DMA API
265  * for virtio DMA to work at all.
266  *
267  * On other systems, including SPARC and PPC64, virtio-pci devices are
268  * enumerated as though they are behind an IOMMU, but the virtio host
269  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
270  * there or somehow map everything as the identity.
271  *
272  * For the time being, we preserve historic behavior and bypass the DMA
273  * API.
274  *
275  * TODO: install a per-device DMA ops structure that does the right thing
276  * taking into account all the above quirks, and use the DMA API
277  * unconditionally on data path.
278  */
279
280 static bool vring_use_dma_api(const struct virtio_device *vdev)
281 {
282         if (!virtio_has_dma_quirk(vdev))
283                 return true;
284
285         /* Otherwise, we are left to guess. */
286         /*
287          * In theory, it's possible to have a buggy QEMU-supposed
288          * emulated Q35 IOMMU and Xen enabled at the same time.  On
289          * such a configuration, virtio has never worked and will
290          * not work without an even larger kludge.  Instead, enable
291          * the DMA API if we're a Xen guest, which at least allows
292          * all of the sensible Xen configurations to work correctly.
293          */
294         if (xen_domain())
295                 return true;
296
297         return false;
298 }
299
300 size_t virtio_max_dma_size(const struct virtio_device *vdev)
301 {
302         size_t max_segment_size = SIZE_MAX;
303
304         if (vring_use_dma_api(vdev))
305                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
306
307         return max_segment_size;
308 }
309 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
310
311 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
312                                dma_addr_t *dma_handle, gfp_t flag,
313                                struct device *dma_dev)
314 {
315         if (vring_use_dma_api(vdev)) {
316                 return dma_alloc_coherent(dma_dev, size,
317                                           dma_handle, flag);
318         } else {
319                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
320
321                 if (queue) {
322                         phys_addr_t phys_addr = virt_to_phys(queue);
323                         *dma_handle = (dma_addr_t)phys_addr;
324
325                         /*
326                          * Sanity check: make sure we dind't truncate
327                          * the address.  The only arches I can find that
328                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
329                          * are certain non-highmem MIPS and x86
330                          * configurations, but these configurations
331                          * should never allocate physical pages above 32
332                          * bits, so this is fine.  Just in case, throw a
333                          * warning and abort if we end up with an
334                          * unrepresentable address.
335                          */
336                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
337                                 free_pages_exact(queue, PAGE_ALIGN(size));
338                                 return NULL;
339                         }
340                 }
341                 return queue;
342         }
343 }
344
345 static void vring_free_queue(struct virtio_device *vdev, size_t size,
346                              void *queue, dma_addr_t dma_handle,
347                              struct device *dma_dev)
348 {
349         if (vring_use_dma_api(vdev))
350                 dma_free_coherent(dma_dev, size, queue, dma_handle);
351         else
352                 free_pages_exact(queue, PAGE_ALIGN(size));
353 }
354
355 /*
356  * The DMA ops on various arches are rather gnarly right now, and
357  * making all of the arch DMA ops work on the vring device itself
358  * is a mess.
359  */
360 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
361 {
362         return vq->dma_dev;
363 }
364
365 /* Map one sg entry. */
366 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
367                             enum dma_data_direction direction, dma_addr_t *addr)
368 {
369         if (vq->premapped) {
370                 *addr = sg_dma_address(sg);
371                 return 0;
372         }
373
374         if (!vq->use_dma_api) {
375                 /*
376                  * If DMA is not used, KMSAN doesn't know that the scatterlist
377                  * is initialized by the hardware. Explicitly check/unpoison it
378                  * depending on the direction.
379                  */
380                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
381                 *addr = (dma_addr_t)sg_phys(sg);
382                 return 0;
383         }
384
385         /*
386          * We can't use dma_map_sg, because we don't use scatterlists in
387          * the way it expects (we don't guarantee that the scatterlist
388          * will exist for the lifetime of the mapping).
389          */
390         *addr = dma_map_page(vring_dma_dev(vq),
391                             sg_page(sg), sg->offset, sg->length,
392                             direction);
393
394         if (dma_mapping_error(vring_dma_dev(vq), *addr))
395                 return -ENOMEM;
396
397         return 0;
398 }
399
400 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
401                                    void *cpu_addr, size_t size,
402                                    enum dma_data_direction direction)
403 {
404         if (!vq->use_dma_api)
405                 return (dma_addr_t)virt_to_phys(cpu_addr);
406
407         return dma_map_single(vring_dma_dev(vq),
408                               cpu_addr, size, direction);
409 }
410
411 static int vring_mapping_error(const struct vring_virtqueue *vq,
412                                dma_addr_t addr)
413 {
414         if (!vq->use_dma_api)
415                 return 0;
416
417         return dma_mapping_error(vring_dma_dev(vq), addr);
418 }
419
420 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
421 {
422         vq->vq.num_free = num;
423
424         if (vq->packed_ring)
425                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
426         else
427                 vq->last_used_idx = 0;
428
429         vq->event_triggered = false;
430         vq->num_added = 0;
431
432 #ifdef DEBUG
433         vq->in_use = false;
434         vq->last_add_time_valid = false;
435 #endif
436 }
437
438
439 /*
440  * Split ring specific functions - *_split().
441  */
442
443 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
444                                            const struct vring_desc *desc)
445 {
446         u16 flags;
447
448         if (!vq->do_unmap)
449                 return;
450
451         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
452
453         dma_unmap_page(vring_dma_dev(vq),
454                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
455                        virtio32_to_cpu(vq->vq.vdev, desc->len),
456                        (flags & VRING_DESC_F_WRITE) ?
457                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
458 }
459
460 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
461                                           unsigned int i)
462 {
463         struct vring_desc_extra *extra = vq->split.desc_extra;
464         u16 flags;
465
466         flags = extra[i].flags;
467
468         if (flags & VRING_DESC_F_INDIRECT) {
469                 if (!vq->use_dma_api)
470                         goto out;
471
472                 dma_unmap_single(vring_dma_dev(vq),
473                                  extra[i].addr,
474                                  extra[i].len,
475                                  (flags & VRING_DESC_F_WRITE) ?
476                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
477         } else {
478                 if (!vq->do_unmap)
479                         goto out;
480
481                 dma_unmap_page(vring_dma_dev(vq),
482                                extra[i].addr,
483                                extra[i].len,
484                                (flags & VRING_DESC_F_WRITE) ?
485                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
486         }
487
488 out:
489         return extra[i].next;
490 }
491
492 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
493                                                unsigned int total_sg,
494                                                gfp_t gfp)
495 {
496         struct vring_desc *desc;
497         unsigned int i;
498
499         /*
500          * We require lowmem mappings for the descriptors because
501          * otherwise virt_to_phys will give us bogus addresses in the
502          * virtqueue.
503          */
504         gfp &= ~__GFP_HIGHMEM;
505
506         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
507         if (!desc)
508                 return NULL;
509
510         for (i = 0; i < total_sg; i++)
511                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
512         return desc;
513 }
514
515 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
516                                                     struct vring_desc *desc,
517                                                     unsigned int i,
518                                                     dma_addr_t addr,
519                                                     unsigned int len,
520                                                     u16 flags,
521                                                     bool indirect)
522 {
523         struct vring_virtqueue *vring = to_vvq(vq);
524         struct vring_desc_extra *extra = vring->split.desc_extra;
525         u16 next;
526
527         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
528         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
529         desc[i].len = cpu_to_virtio32(vq->vdev, len);
530
531         if (!indirect) {
532                 next = extra[i].next;
533                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
534
535                 extra[i].addr = addr;
536                 extra[i].len = len;
537                 extra[i].flags = flags;
538         } else
539                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
540
541         return next;
542 }
543
544 static inline int virtqueue_add_split(struct virtqueue *_vq,
545                                       struct scatterlist *sgs[],
546                                       unsigned int total_sg,
547                                       unsigned int out_sgs,
548                                       unsigned int in_sgs,
549                                       void *data,
550                                       void *ctx,
551                                       gfp_t gfp)
552 {
553         struct vring_virtqueue *vq = to_vvq(_vq);
554         struct scatterlist *sg;
555         struct vring_desc *desc;
556         unsigned int i, n, avail, descs_used, prev, err_idx;
557         int head;
558         bool indirect;
559
560         START_USE(vq);
561
562         BUG_ON(data == NULL);
563         BUG_ON(ctx && vq->indirect);
564
565         if (unlikely(vq->broken)) {
566                 END_USE(vq);
567                 return -EIO;
568         }
569
570         LAST_ADD_TIME_UPDATE(vq);
571
572         BUG_ON(total_sg == 0);
573
574         head = vq->free_head;
575
576         if (virtqueue_use_indirect(vq, total_sg))
577                 desc = alloc_indirect_split(_vq, total_sg, gfp);
578         else {
579                 desc = NULL;
580                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
581         }
582
583         if (desc) {
584                 /* Use a single buffer which doesn't continue */
585                 indirect = true;
586                 /* Set up rest to use this indirect table. */
587                 i = 0;
588                 descs_used = 1;
589         } else {
590                 indirect = false;
591                 desc = vq->split.vring.desc;
592                 i = head;
593                 descs_used = total_sg;
594         }
595
596         if (unlikely(vq->vq.num_free < descs_used)) {
597                 pr_debug("Can't add buf len %i - avail = %i\n",
598                          descs_used, vq->vq.num_free);
599                 /* FIXME: for historical reasons, we force a notify here if
600                  * there are outgoing parts to the buffer.  Presumably the
601                  * host should service the ring ASAP. */
602                 if (out_sgs)
603                         vq->notify(&vq->vq);
604                 if (indirect)
605                         kfree(desc);
606                 END_USE(vq);
607                 return -ENOSPC;
608         }
609
610         for (n = 0; n < out_sgs; n++) {
611                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
612                         dma_addr_t addr;
613
614                         if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
615                                 goto unmap_release;
616
617                         prev = i;
618                         /* Note that we trust indirect descriptor
619                          * table since it use stream DMA mapping.
620                          */
621                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
622                                                      VRING_DESC_F_NEXT,
623                                                      indirect);
624                 }
625         }
626         for (; n < (out_sgs + in_sgs); n++) {
627                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
628                         dma_addr_t addr;
629
630                         if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
631                                 goto unmap_release;
632
633                         prev = i;
634                         /* Note that we trust indirect descriptor
635                          * table since it use stream DMA mapping.
636                          */
637                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
638                                                      sg->length,
639                                                      VRING_DESC_F_NEXT |
640                                                      VRING_DESC_F_WRITE,
641                                                      indirect);
642                 }
643         }
644         /* Last one doesn't continue. */
645         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
646         if (!indirect && vq->do_unmap)
647                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
648                         ~VRING_DESC_F_NEXT;
649
650         if (indirect) {
651                 /* Now that the indirect table is filled in, map it. */
652                 dma_addr_t addr = vring_map_single(
653                         vq, desc, total_sg * sizeof(struct vring_desc),
654                         DMA_TO_DEVICE);
655                 if (vring_mapping_error(vq, addr)) {
656                         if (vq->premapped)
657                                 goto free_indirect;
658
659                         goto unmap_release;
660                 }
661
662                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
663                                          head, addr,
664                                          total_sg * sizeof(struct vring_desc),
665                                          VRING_DESC_F_INDIRECT,
666                                          false);
667         }
668
669         /* We're using some buffers from the free list. */
670         vq->vq.num_free -= descs_used;
671
672         /* Update free pointer */
673         if (indirect)
674                 vq->free_head = vq->split.desc_extra[head].next;
675         else
676                 vq->free_head = i;
677
678         /* Store token and indirect buffer state. */
679         vq->split.desc_state[head].data = data;
680         if (indirect)
681                 vq->split.desc_state[head].indir_desc = desc;
682         else
683                 vq->split.desc_state[head].indir_desc = ctx;
684
685         /* Put entry in available array (but don't update avail->idx until they
686          * do sync). */
687         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
688         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
689
690         /* Descriptors and available array need to be set before we expose the
691          * new available array entries. */
692         virtio_wmb(vq->weak_barriers);
693         vq->split.avail_idx_shadow++;
694         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
695                                                 vq->split.avail_idx_shadow);
696         vq->num_added++;
697
698         pr_debug("Added buffer head %i to %p\n", head, vq);
699         END_USE(vq);
700
701         /* This is very unlikely, but theoretically possible.  Kick
702          * just in case. */
703         if (unlikely(vq->num_added == (1 << 16) - 1))
704                 virtqueue_kick(_vq);
705
706         return 0;
707
708 unmap_release:
709         err_idx = i;
710
711         if (indirect)
712                 i = 0;
713         else
714                 i = head;
715
716         for (n = 0; n < total_sg; n++) {
717                 if (i == err_idx)
718                         break;
719                 if (indirect) {
720                         vring_unmap_one_split_indirect(vq, &desc[i]);
721                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
722                 } else
723                         i = vring_unmap_one_split(vq, i);
724         }
725
726 free_indirect:
727         if (indirect)
728                 kfree(desc);
729
730         END_USE(vq);
731         return -ENOMEM;
732 }
733
734 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
735 {
736         struct vring_virtqueue *vq = to_vvq(_vq);
737         u16 new, old;
738         bool needs_kick;
739
740         START_USE(vq);
741         /* We need to expose available array entries before checking avail
742          * event. */
743         virtio_mb(vq->weak_barriers);
744
745         old = vq->split.avail_idx_shadow - vq->num_added;
746         new = vq->split.avail_idx_shadow;
747         vq->num_added = 0;
748
749         LAST_ADD_TIME_CHECK(vq);
750         LAST_ADD_TIME_INVALID(vq);
751
752         if (vq->event) {
753                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
754                                         vring_avail_event(&vq->split.vring)),
755                                               new, old);
756         } else {
757                 needs_kick = !(vq->split.vring.used->flags &
758                                         cpu_to_virtio16(_vq->vdev,
759                                                 VRING_USED_F_NO_NOTIFY));
760         }
761         END_USE(vq);
762         return needs_kick;
763 }
764
765 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
766                              void **ctx)
767 {
768         unsigned int i, j;
769         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
770
771         /* Clear data ptr. */
772         vq->split.desc_state[head].data = NULL;
773
774         /* Put back on free list: unmap first-level descriptors and find end */
775         i = head;
776
777         while (vq->split.vring.desc[i].flags & nextflag) {
778                 vring_unmap_one_split(vq, i);
779                 i = vq->split.desc_extra[i].next;
780                 vq->vq.num_free++;
781         }
782
783         vring_unmap_one_split(vq, i);
784         vq->split.desc_extra[i].next = vq->free_head;
785         vq->free_head = head;
786
787         /* Plus final descriptor */
788         vq->vq.num_free++;
789
790         if (vq->indirect) {
791                 struct vring_desc *indir_desc =
792                                 vq->split.desc_state[head].indir_desc;
793                 u32 len;
794
795                 /* Free the indirect table, if any, now that it's unmapped. */
796                 if (!indir_desc)
797                         return;
798
799                 len = vq->split.desc_extra[head].len;
800
801                 BUG_ON(!(vq->split.desc_extra[head].flags &
802                                 VRING_DESC_F_INDIRECT));
803                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
804
805                 if (vq->do_unmap) {
806                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
807                                 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
808                 }
809
810                 kfree(indir_desc);
811                 vq->split.desc_state[head].indir_desc = NULL;
812         } else if (ctx) {
813                 *ctx = vq->split.desc_state[head].indir_desc;
814         }
815 }
816
817 static bool more_used_split(const struct vring_virtqueue *vq)
818 {
819         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
820                         vq->split.vring.used->idx);
821 }
822
823 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
824                                          unsigned int *len,
825                                          void **ctx)
826 {
827         struct vring_virtqueue *vq = to_vvq(_vq);
828         void *ret;
829         unsigned int i;
830         u16 last_used;
831
832         START_USE(vq);
833
834         if (unlikely(vq->broken)) {
835                 END_USE(vq);
836                 return NULL;
837         }
838
839         if (!more_used_split(vq)) {
840                 pr_debug("No more buffers in queue\n");
841                 END_USE(vq);
842                 return NULL;
843         }
844
845         /* Only get used array entries after they have been exposed by host. */
846         virtio_rmb(vq->weak_barriers);
847
848         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
849         i = virtio32_to_cpu(_vq->vdev,
850                         vq->split.vring.used->ring[last_used].id);
851         *len = virtio32_to_cpu(_vq->vdev,
852                         vq->split.vring.used->ring[last_used].len);
853
854         if (unlikely(i >= vq->split.vring.num)) {
855                 BAD_RING(vq, "id %u out of range\n", i);
856                 return NULL;
857         }
858         if (unlikely(!vq->split.desc_state[i].data)) {
859                 BAD_RING(vq, "id %u is not a head!\n", i);
860                 return NULL;
861         }
862
863         /* detach_buf_split clears data, so grab it now. */
864         ret = vq->split.desc_state[i].data;
865         detach_buf_split(vq, i, ctx);
866         vq->last_used_idx++;
867         /* If we expect an interrupt for the next entry, tell host
868          * by writing event index and flush out the write before
869          * the read in the next get_buf call. */
870         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
871                 virtio_store_mb(vq->weak_barriers,
872                                 &vring_used_event(&vq->split.vring),
873                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
874
875         LAST_ADD_TIME_INVALID(vq);
876
877         END_USE(vq);
878         return ret;
879 }
880
881 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
882 {
883         struct vring_virtqueue *vq = to_vvq(_vq);
884
885         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
886                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
887
888                 /*
889                  * If device triggered an event already it won't trigger one again:
890                  * no need to disable.
891                  */
892                 if (vq->event_triggered)
893                         return;
894
895                 if (vq->event)
896                         /* TODO: this is a hack. Figure out a cleaner value to write. */
897                         vring_used_event(&vq->split.vring) = 0x0;
898                 else
899                         vq->split.vring.avail->flags =
900                                 cpu_to_virtio16(_vq->vdev,
901                                                 vq->split.avail_flags_shadow);
902         }
903 }
904
905 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
906 {
907         struct vring_virtqueue *vq = to_vvq(_vq);
908         u16 last_used_idx;
909
910         START_USE(vq);
911
912         /* We optimistically turn back on interrupts, then check if there was
913          * more to do. */
914         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
915          * either clear the flags bit or point the event index at the next
916          * entry. Always do both to keep code simple. */
917         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
918                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
919                 if (!vq->event)
920                         vq->split.vring.avail->flags =
921                                 cpu_to_virtio16(_vq->vdev,
922                                                 vq->split.avail_flags_shadow);
923         }
924         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
925                         last_used_idx = vq->last_used_idx);
926         END_USE(vq);
927         return last_used_idx;
928 }
929
930 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
931 {
932         struct vring_virtqueue *vq = to_vvq(_vq);
933
934         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
935                         vq->split.vring.used->idx);
936 }
937
938 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
939 {
940         struct vring_virtqueue *vq = to_vvq(_vq);
941         u16 bufs;
942
943         START_USE(vq);
944
945         /* We optimistically turn back on interrupts, then check if there was
946          * more to do. */
947         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
948          * either clear the flags bit or point the event index at the next
949          * entry. Always update the event index to keep code simple. */
950         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
951                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
952                 if (!vq->event)
953                         vq->split.vring.avail->flags =
954                                 cpu_to_virtio16(_vq->vdev,
955                                                 vq->split.avail_flags_shadow);
956         }
957         /* TODO: tune this threshold */
958         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
959
960         virtio_store_mb(vq->weak_barriers,
961                         &vring_used_event(&vq->split.vring),
962                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
963
964         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
965                                         - vq->last_used_idx) > bufs)) {
966                 END_USE(vq);
967                 return false;
968         }
969
970         END_USE(vq);
971         return true;
972 }
973
974 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
975 {
976         struct vring_virtqueue *vq = to_vvq(_vq);
977         unsigned int i;
978         void *buf;
979
980         START_USE(vq);
981
982         for (i = 0; i < vq->split.vring.num; i++) {
983                 if (!vq->split.desc_state[i].data)
984                         continue;
985                 /* detach_buf_split clears data, so grab it now. */
986                 buf = vq->split.desc_state[i].data;
987                 detach_buf_split(vq, i, NULL);
988                 vq->split.avail_idx_shadow--;
989                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
990                                 vq->split.avail_idx_shadow);
991                 END_USE(vq);
992                 return buf;
993         }
994         /* That should have freed everything. */
995         BUG_ON(vq->vq.num_free != vq->split.vring.num);
996
997         END_USE(vq);
998         return NULL;
999 }
1000
1001 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1002                                        struct vring_virtqueue *vq)
1003 {
1004         struct virtio_device *vdev;
1005
1006         vdev = vq->vq.vdev;
1007
1008         vring_split->avail_flags_shadow = 0;
1009         vring_split->avail_idx_shadow = 0;
1010
1011         /* No callback?  Tell other side not to bother us. */
1012         if (!vq->vq.callback) {
1013                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1014                 if (!vq->event)
1015                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1016                                         vring_split->avail_flags_shadow);
1017         }
1018 }
1019
1020 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1021 {
1022         int num;
1023
1024         num = vq->split.vring.num;
1025
1026         vq->split.vring.avail->flags = 0;
1027         vq->split.vring.avail->idx = 0;
1028
1029         /* reset avail event */
1030         vq->split.vring.avail->ring[num] = 0;
1031
1032         vq->split.vring.used->flags = 0;
1033         vq->split.vring.used->idx = 0;
1034
1035         /* reset used event */
1036         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1037
1038         virtqueue_init(vq, num);
1039
1040         virtqueue_vring_init_split(&vq->split, vq);
1041 }
1042
1043 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1044                                          struct vring_virtqueue_split *vring_split)
1045 {
1046         vq->split = *vring_split;
1047
1048         /* Put everything in free lists. */
1049         vq->free_head = 0;
1050 }
1051
1052 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1053 {
1054         struct vring_desc_state_split *state;
1055         struct vring_desc_extra *extra;
1056         u32 num = vring_split->vring.num;
1057
1058         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1059         if (!state)
1060                 goto err_state;
1061
1062         extra = vring_alloc_desc_extra(num);
1063         if (!extra)
1064                 goto err_extra;
1065
1066         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1067
1068         vring_split->desc_state = state;
1069         vring_split->desc_extra = extra;
1070         return 0;
1071
1072 err_extra:
1073         kfree(state);
1074 err_state:
1075         return -ENOMEM;
1076 }
1077
1078 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1079                              struct virtio_device *vdev, struct device *dma_dev)
1080 {
1081         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1082                          vring_split->vring.desc,
1083                          vring_split->queue_dma_addr,
1084                          dma_dev);
1085
1086         kfree(vring_split->desc_state);
1087         kfree(vring_split->desc_extra);
1088 }
1089
1090 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1091                                    struct virtio_device *vdev,
1092                                    u32 num,
1093                                    unsigned int vring_align,
1094                                    bool may_reduce_num,
1095                                    struct device *dma_dev)
1096 {
1097         void *queue = NULL;
1098         dma_addr_t dma_addr;
1099
1100         /* We assume num is a power of 2. */
1101         if (!is_power_of_2(num)) {
1102                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1103                 return -EINVAL;
1104         }
1105
1106         /* TODO: allocate each queue chunk individually */
1107         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1108                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1109                                           &dma_addr,
1110                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1111                                           dma_dev);
1112                 if (queue)
1113                         break;
1114                 if (!may_reduce_num)
1115                         return -ENOMEM;
1116         }
1117
1118         if (!num)
1119                 return -ENOMEM;
1120
1121         if (!queue) {
1122                 /* Try to get a single page. You are my only hope! */
1123                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1124                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1125                                           dma_dev);
1126         }
1127         if (!queue)
1128                 return -ENOMEM;
1129
1130         vring_init(&vring_split->vring, num, queue, vring_align);
1131
1132         vring_split->queue_dma_addr = dma_addr;
1133         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1134
1135         vring_split->vring_align = vring_align;
1136         vring_split->may_reduce_num = may_reduce_num;
1137
1138         return 0;
1139 }
1140
1141 static struct virtqueue *vring_create_virtqueue_split(
1142         unsigned int index,
1143         unsigned int num,
1144         unsigned int vring_align,
1145         struct virtio_device *vdev,
1146         bool weak_barriers,
1147         bool may_reduce_num,
1148         bool context,
1149         bool (*notify)(struct virtqueue *),
1150         void (*callback)(struct virtqueue *),
1151         const char *name,
1152         struct device *dma_dev)
1153 {
1154         struct vring_virtqueue_split vring_split = {};
1155         struct virtqueue *vq;
1156         int err;
1157
1158         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1159                                       may_reduce_num, dma_dev);
1160         if (err)
1161                 return NULL;
1162
1163         vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1164                                    context, notify, callback, name, dma_dev);
1165         if (!vq) {
1166                 vring_free_split(&vring_split, vdev, dma_dev);
1167                 return NULL;
1168         }
1169
1170         to_vvq(vq)->we_own_ring = true;
1171
1172         return vq;
1173 }
1174
1175 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1176 {
1177         struct vring_virtqueue_split vring_split = {};
1178         struct vring_virtqueue *vq = to_vvq(_vq);
1179         struct virtio_device *vdev = _vq->vdev;
1180         int err;
1181
1182         err = vring_alloc_queue_split(&vring_split, vdev, num,
1183                                       vq->split.vring_align,
1184                                       vq->split.may_reduce_num,
1185                                       vring_dma_dev(vq));
1186         if (err)
1187                 goto err;
1188
1189         err = vring_alloc_state_extra_split(&vring_split);
1190         if (err)
1191                 goto err_state_extra;
1192
1193         vring_free(&vq->vq);
1194
1195         virtqueue_vring_init_split(&vring_split, vq);
1196
1197         virtqueue_init(vq, vring_split.vring.num);
1198         virtqueue_vring_attach_split(vq, &vring_split);
1199
1200         return 0;
1201
1202 err_state_extra:
1203         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1204 err:
1205         virtqueue_reinit_split(vq);
1206         return -ENOMEM;
1207 }
1208
1209
1210 /*
1211  * Packed ring specific functions - *_packed().
1212  */
1213 static bool packed_used_wrap_counter(u16 last_used_idx)
1214 {
1215         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1216 }
1217
1218 static u16 packed_last_used(u16 last_used_idx)
1219 {
1220         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1221 }
1222
1223 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1224                                      const struct vring_desc_extra *extra)
1225 {
1226         u16 flags;
1227
1228         flags = extra->flags;
1229
1230         if (flags & VRING_DESC_F_INDIRECT) {
1231                 if (!vq->use_dma_api)
1232                         return;
1233
1234                 dma_unmap_single(vring_dma_dev(vq),
1235                                  extra->addr, extra->len,
1236                                  (flags & VRING_DESC_F_WRITE) ?
1237                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1238         } else {
1239                 if (!vq->do_unmap)
1240                         return;
1241
1242                 dma_unmap_page(vring_dma_dev(vq),
1243                                extra->addr, extra->len,
1244                                (flags & VRING_DESC_F_WRITE) ?
1245                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1246         }
1247 }
1248
1249 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1250                                     const struct vring_packed_desc *desc)
1251 {
1252         u16 flags;
1253
1254         if (!vq->do_unmap)
1255                 return;
1256
1257         flags = le16_to_cpu(desc->flags);
1258
1259         dma_unmap_page(vring_dma_dev(vq),
1260                        le64_to_cpu(desc->addr),
1261                        le32_to_cpu(desc->len),
1262                        (flags & VRING_DESC_F_WRITE) ?
1263                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1264 }
1265
1266 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1267                                                        gfp_t gfp)
1268 {
1269         struct vring_packed_desc *desc;
1270
1271         /*
1272          * We require lowmem mappings for the descriptors because
1273          * otherwise virt_to_phys will give us bogus addresses in the
1274          * virtqueue.
1275          */
1276         gfp &= ~__GFP_HIGHMEM;
1277
1278         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1279
1280         return desc;
1281 }
1282
1283 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1284                                          struct scatterlist *sgs[],
1285                                          unsigned int total_sg,
1286                                          unsigned int out_sgs,
1287                                          unsigned int in_sgs,
1288                                          void *data,
1289                                          gfp_t gfp)
1290 {
1291         struct vring_packed_desc *desc;
1292         struct scatterlist *sg;
1293         unsigned int i, n, err_idx;
1294         u16 head, id;
1295         dma_addr_t addr;
1296
1297         head = vq->packed.next_avail_idx;
1298         desc = alloc_indirect_packed(total_sg, gfp);
1299         if (!desc)
1300                 return -ENOMEM;
1301
1302         if (unlikely(vq->vq.num_free < 1)) {
1303                 pr_debug("Can't add buf len 1 - avail = 0\n");
1304                 kfree(desc);
1305                 END_USE(vq);
1306                 return -ENOSPC;
1307         }
1308
1309         i = 0;
1310         id = vq->free_head;
1311         BUG_ON(id == vq->packed.vring.num);
1312
1313         for (n = 0; n < out_sgs + in_sgs; n++) {
1314                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1315                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1316                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1317                                 goto unmap_release;
1318
1319                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1320                                                 0 : VRING_DESC_F_WRITE);
1321                         desc[i].addr = cpu_to_le64(addr);
1322                         desc[i].len = cpu_to_le32(sg->length);
1323                         i++;
1324                 }
1325         }
1326
1327         /* Now that the indirect table is filled in, map it. */
1328         addr = vring_map_single(vq, desc,
1329                         total_sg * sizeof(struct vring_packed_desc),
1330                         DMA_TO_DEVICE);
1331         if (vring_mapping_error(vq, addr)) {
1332                 if (vq->premapped)
1333                         goto free_desc;
1334
1335                 goto unmap_release;
1336         }
1337
1338         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1339         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1340                                 sizeof(struct vring_packed_desc));
1341         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1342
1343         if (vq->do_unmap) {
1344                 vq->packed.desc_extra[id].addr = addr;
1345                 vq->packed.desc_extra[id].len = total_sg *
1346                                 sizeof(struct vring_packed_desc);
1347                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1348                                                   vq->packed.avail_used_flags;
1349         }
1350
1351         /*
1352          * A driver MUST NOT make the first descriptor in the list
1353          * available before all subsequent descriptors comprising
1354          * the list are made available.
1355          */
1356         virtio_wmb(vq->weak_barriers);
1357         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1358                                                 vq->packed.avail_used_flags);
1359
1360         /* We're using some buffers from the free list. */
1361         vq->vq.num_free -= 1;
1362
1363         /* Update free pointer */
1364         n = head + 1;
1365         if (n >= vq->packed.vring.num) {
1366                 n = 0;
1367                 vq->packed.avail_wrap_counter ^= 1;
1368                 vq->packed.avail_used_flags ^=
1369                                 1 << VRING_PACKED_DESC_F_AVAIL |
1370                                 1 << VRING_PACKED_DESC_F_USED;
1371         }
1372         vq->packed.next_avail_idx = n;
1373         vq->free_head = vq->packed.desc_extra[id].next;
1374
1375         /* Store token and indirect buffer state. */
1376         vq->packed.desc_state[id].num = 1;
1377         vq->packed.desc_state[id].data = data;
1378         vq->packed.desc_state[id].indir_desc = desc;
1379         vq->packed.desc_state[id].last = id;
1380
1381         vq->num_added += 1;
1382
1383         pr_debug("Added buffer head %i to %p\n", head, vq);
1384         END_USE(vq);
1385
1386         return 0;
1387
1388 unmap_release:
1389         err_idx = i;
1390
1391         for (i = 0; i < err_idx; i++)
1392                 vring_unmap_desc_packed(vq, &desc[i]);
1393
1394 free_desc:
1395         kfree(desc);
1396
1397         END_USE(vq);
1398         return -ENOMEM;
1399 }
1400
1401 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1402                                        struct scatterlist *sgs[],
1403                                        unsigned int total_sg,
1404                                        unsigned int out_sgs,
1405                                        unsigned int in_sgs,
1406                                        void *data,
1407                                        void *ctx,
1408                                        gfp_t gfp)
1409 {
1410         struct vring_virtqueue *vq = to_vvq(_vq);
1411         struct vring_packed_desc *desc;
1412         struct scatterlist *sg;
1413         unsigned int i, n, c, descs_used, err_idx;
1414         __le16 head_flags, flags;
1415         u16 head, id, prev, curr, avail_used_flags;
1416         int err;
1417
1418         START_USE(vq);
1419
1420         BUG_ON(data == NULL);
1421         BUG_ON(ctx && vq->indirect);
1422
1423         if (unlikely(vq->broken)) {
1424                 END_USE(vq);
1425                 return -EIO;
1426         }
1427
1428         LAST_ADD_TIME_UPDATE(vq);
1429
1430         BUG_ON(total_sg == 0);
1431
1432         if (virtqueue_use_indirect(vq, total_sg)) {
1433                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1434                                                     in_sgs, data, gfp);
1435                 if (err != -ENOMEM) {
1436                         END_USE(vq);
1437                         return err;
1438                 }
1439
1440                 /* fall back on direct */
1441         }
1442
1443         head = vq->packed.next_avail_idx;
1444         avail_used_flags = vq->packed.avail_used_flags;
1445
1446         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1447
1448         desc = vq->packed.vring.desc;
1449         i = head;
1450         descs_used = total_sg;
1451
1452         if (unlikely(vq->vq.num_free < descs_used)) {
1453                 pr_debug("Can't add buf len %i - avail = %i\n",
1454                          descs_used, vq->vq.num_free);
1455                 END_USE(vq);
1456                 return -ENOSPC;
1457         }
1458
1459         id = vq->free_head;
1460         BUG_ON(id == vq->packed.vring.num);
1461
1462         curr = id;
1463         c = 0;
1464         for (n = 0; n < out_sgs + in_sgs; n++) {
1465                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1466                         dma_addr_t addr;
1467
1468                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1469                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1470                                 goto unmap_release;
1471
1472                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1473                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1474                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1475                         if (i == head)
1476                                 head_flags = flags;
1477                         else
1478                                 desc[i].flags = flags;
1479
1480                         desc[i].addr = cpu_to_le64(addr);
1481                         desc[i].len = cpu_to_le32(sg->length);
1482                         desc[i].id = cpu_to_le16(id);
1483
1484                         if (unlikely(vq->do_unmap)) {
1485                                 vq->packed.desc_extra[curr].addr = addr;
1486                                 vq->packed.desc_extra[curr].len = sg->length;
1487                                 vq->packed.desc_extra[curr].flags =
1488                                         le16_to_cpu(flags);
1489                         }
1490                         prev = curr;
1491                         curr = vq->packed.desc_extra[curr].next;
1492
1493                         if ((unlikely(++i >= vq->packed.vring.num))) {
1494                                 i = 0;
1495                                 vq->packed.avail_used_flags ^=
1496                                         1 << VRING_PACKED_DESC_F_AVAIL |
1497                                         1 << VRING_PACKED_DESC_F_USED;
1498                         }
1499                 }
1500         }
1501
1502         if (i <= head)
1503                 vq->packed.avail_wrap_counter ^= 1;
1504
1505         /* We're using some buffers from the free list. */
1506         vq->vq.num_free -= descs_used;
1507
1508         /* Update free pointer */
1509         vq->packed.next_avail_idx = i;
1510         vq->free_head = curr;
1511
1512         /* Store token. */
1513         vq->packed.desc_state[id].num = descs_used;
1514         vq->packed.desc_state[id].data = data;
1515         vq->packed.desc_state[id].indir_desc = ctx;
1516         vq->packed.desc_state[id].last = prev;
1517
1518         /*
1519          * A driver MUST NOT make the first descriptor in the list
1520          * available before all subsequent descriptors comprising
1521          * the list are made available.
1522          */
1523         virtio_wmb(vq->weak_barriers);
1524         vq->packed.vring.desc[head].flags = head_flags;
1525         vq->num_added += descs_used;
1526
1527         pr_debug("Added buffer head %i to %p\n", head, vq);
1528         END_USE(vq);
1529
1530         return 0;
1531
1532 unmap_release:
1533         err_idx = i;
1534         i = head;
1535         curr = vq->free_head;
1536
1537         vq->packed.avail_used_flags = avail_used_flags;
1538
1539         for (n = 0; n < total_sg; n++) {
1540                 if (i == err_idx)
1541                         break;
1542                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1543                 curr = vq->packed.desc_extra[curr].next;
1544                 i++;
1545                 if (i >= vq->packed.vring.num)
1546                         i = 0;
1547         }
1548
1549         END_USE(vq);
1550         return -EIO;
1551 }
1552
1553 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1554 {
1555         struct vring_virtqueue *vq = to_vvq(_vq);
1556         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1557         bool needs_kick;
1558         union {
1559                 struct {
1560                         __le16 off_wrap;
1561                         __le16 flags;
1562                 };
1563                 u32 u32;
1564         } snapshot;
1565
1566         START_USE(vq);
1567
1568         /*
1569          * We need to expose the new flags value before checking notification
1570          * suppressions.
1571          */
1572         virtio_mb(vq->weak_barriers);
1573
1574         old = vq->packed.next_avail_idx - vq->num_added;
1575         new = vq->packed.next_avail_idx;
1576         vq->num_added = 0;
1577
1578         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1579         flags = le16_to_cpu(snapshot.flags);
1580
1581         LAST_ADD_TIME_CHECK(vq);
1582         LAST_ADD_TIME_INVALID(vq);
1583
1584         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1585                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1586                 goto out;
1587         }
1588
1589         off_wrap = le16_to_cpu(snapshot.off_wrap);
1590
1591         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1592         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1593         if (wrap_counter != vq->packed.avail_wrap_counter)
1594                 event_idx -= vq->packed.vring.num;
1595
1596         needs_kick = vring_need_event(event_idx, new, old);
1597 out:
1598         END_USE(vq);
1599         return needs_kick;
1600 }
1601
1602 static void detach_buf_packed(struct vring_virtqueue *vq,
1603                               unsigned int id, void **ctx)
1604 {
1605         struct vring_desc_state_packed *state = NULL;
1606         struct vring_packed_desc *desc;
1607         unsigned int i, curr;
1608
1609         state = &vq->packed.desc_state[id];
1610
1611         /* Clear data ptr. */
1612         state->data = NULL;
1613
1614         vq->packed.desc_extra[state->last].next = vq->free_head;
1615         vq->free_head = id;
1616         vq->vq.num_free += state->num;
1617
1618         if (unlikely(vq->do_unmap)) {
1619                 curr = id;
1620                 for (i = 0; i < state->num; i++) {
1621                         vring_unmap_extra_packed(vq,
1622                                                  &vq->packed.desc_extra[curr]);
1623                         curr = vq->packed.desc_extra[curr].next;
1624                 }
1625         }
1626
1627         if (vq->indirect) {
1628                 u32 len;
1629
1630                 /* Free the indirect table, if any, now that it's unmapped. */
1631                 desc = state->indir_desc;
1632                 if (!desc)
1633                         return;
1634
1635                 if (vq->do_unmap) {
1636                         len = vq->packed.desc_extra[id].len;
1637                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1638                                         i++)
1639                                 vring_unmap_desc_packed(vq, &desc[i]);
1640                 }
1641                 kfree(desc);
1642                 state->indir_desc = NULL;
1643         } else if (ctx) {
1644                 *ctx = state->indir_desc;
1645         }
1646 }
1647
1648 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1649                                        u16 idx, bool used_wrap_counter)
1650 {
1651         bool avail, used;
1652         u16 flags;
1653
1654         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1655         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1656         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1657
1658         return avail == used && used == used_wrap_counter;
1659 }
1660
1661 static bool more_used_packed(const struct vring_virtqueue *vq)
1662 {
1663         u16 last_used;
1664         u16 last_used_idx;
1665         bool used_wrap_counter;
1666
1667         last_used_idx = READ_ONCE(vq->last_used_idx);
1668         last_used = packed_last_used(last_used_idx);
1669         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1670         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1671 }
1672
1673 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1674                                           unsigned int *len,
1675                                           void **ctx)
1676 {
1677         struct vring_virtqueue *vq = to_vvq(_vq);
1678         u16 last_used, id, last_used_idx;
1679         bool used_wrap_counter;
1680         void *ret;
1681
1682         START_USE(vq);
1683
1684         if (unlikely(vq->broken)) {
1685                 END_USE(vq);
1686                 return NULL;
1687         }
1688
1689         if (!more_used_packed(vq)) {
1690                 pr_debug("No more buffers in queue\n");
1691                 END_USE(vq);
1692                 return NULL;
1693         }
1694
1695         /* Only get used elements after they have been exposed by host. */
1696         virtio_rmb(vq->weak_barriers);
1697
1698         last_used_idx = READ_ONCE(vq->last_used_idx);
1699         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1700         last_used = packed_last_used(last_used_idx);
1701         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1702         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1703
1704         if (unlikely(id >= vq->packed.vring.num)) {
1705                 BAD_RING(vq, "id %u out of range\n", id);
1706                 return NULL;
1707         }
1708         if (unlikely(!vq->packed.desc_state[id].data)) {
1709                 BAD_RING(vq, "id %u is not a head!\n", id);
1710                 return NULL;
1711         }
1712
1713         /* detach_buf_packed clears data, so grab it now. */
1714         ret = vq->packed.desc_state[id].data;
1715         detach_buf_packed(vq, id, ctx);
1716
1717         last_used += vq->packed.desc_state[id].num;
1718         if (unlikely(last_used >= vq->packed.vring.num)) {
1719                 last_used -= vq->packed.vring.num;
1720                 used_wrap_counter ^= 1;
1721         }
1722
1723         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1724         WRITE_ONCE(vq->last_used_idx, last_used);
1725
1726         /*
1727          * If we expect an interrupt for the next entry, tell host
1728          * by writing event index and flush out the write before
1729          * the read in the next get_buf call.
1730          */
1731         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1732                 virtio_store_mb(vq->weak_barriers,
1733                                 &vq->packed.vring.driver->off_wrap,
1734                                 cpu_to_le16(vq->last_used_idx));
1735
1736         LAST_ADD_TIME_INVALID(vq);
1737
1738         END_USE(vq);
1739         return ret;
1740 }
1741
1742 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1743 {
1744         struct vring_virtqueue *vq = to_vvq(_vq);
1745
1746         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1747                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1748
1749                 /*
1750                  * If device triggered an event already it won't trigger one again:
1751                  * no need to disable.
1752                  */
1753                 if (vq->event_triggered)
1754                         return;
1755
1756                 vq->packed.vring.driver->flags =
1757                         cpu_to_le16(vq->packed.event_flags_shadow);
1758         }
1759 }
1760
1761 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1762 {
1763         struct vring_virtqueue *vq = to_vvq(_vq);
1764
1765         START_USE(vq);
1766
1767         /*
1768          * We optimistically turn back on interrupts, then check if there was
1769          * more to do.
1770          */
1771
1772         if (vq->event) {
1773                 vq->packed.vring.driver->off_wrap =
1774                         cpu_to_le16(vq->last_used_idx);
1775                 /*
1776                  * We need to update event offset and event wrap
1777                  * counter first before updating event flags.
1778                  */
1779                 virtio_wmb(vq->weak_barriers);
1780         }
1781
1782         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1783                 vq->packed.event_flags_shadow = vq->event ?
1784                                 VRING_PACKED_EVENT_FLAG_DESC :
1785                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1786                 vq->packed.vring.driver->flags =
1787                                 cpu_to_le16(vq->packed.event_flags_shadow);
1788         }
1789
1790         END_USE(vq);
1791         return vq->last_used_idx;
1792 }
1793
1794 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1795 {
1796         struct vring_virtqueue *vq = to_vvq(_vq);
1797         bool wrap_counter;
1798         u16 used_idx;
1799
1800         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1801         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1802
1803         return is_used_desc_packed(vq, used_idx, wrap_counter);
1804 }
1805
1806 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1807 {
1808         struct vring_virtqueue *vq = to_vvq(_vq);
1809         u16 used_idx, wrap_counter, last_used_idx;
1810         u16 bufs;
1811
1812         START_USE(vq);
1813
1814         /*
1815          * We optimistically turn back on interrupts, then check if there was
1816          * more to do.
1817          */
1818
1819         if (vq->event) {
1820                 /* TODO: tune this threshold */
1821                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1822                 last_used_idx = READ_ONCE(vq->last_used_idx);
1823                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1824
1825                 used_idx = packed_last_used(last_used_idx) + bufs;
1826                 if (used_idx >= vq->packed.vring.num) {
1827                         used_idx -= vq->packed.vring.num;
1828                         wrap_counter ^= 1;
1829                 }
1830
1831                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1832                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1833
1834                 /*
1835                  * We need to update event offset and event wrap
1836                  * counter first before updating event flags.
1837                  */
1838                 virtio_wmb(vq->weak_barriers);
1839         }
1840
1841         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1842                 vq->packed.event_flags_shadow = vq->event ?
1843                                 VRING_PACKED_EVENT_FLAG_DESC :
1844                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1845                 vq->packed.vring.driver->flags =
1846                                 cpu_to_le16(vq->packed.event_flags_shadow);
1847         }
1848
1849         /*
1850          * We need to update event suppression structure first
1851          * before re-checking for more used buffers.
1852          */
1853         virtio_mb(vq->weak_barriers);
1854
1855         last_used_idx = READ_ONCE(vq->last_used_idx);
1856         wrap_counter = packed_used_wrap_counter(last_used_idx);
1857         used_idx = packed_last_used(last_used_idx);
1858         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1859                 END_USE(vq);
1860                 return false;
1861         }
1862
1863         END_USE(vq);
1864         return true;
1865 }
1866
1867 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1868 {
1869         struct vring_virtqueue *vq = to_vvq(_vq);
1870         unsigned int i;
1871         void *buf;
1872
1873         START_USE(vq);
1874
1875         for (i = 0; i < vq->packed.vring.num; i++) {
1876                 if (!vq->packed.desc_state[i].data)
1877                         continue;
1878                 /* detach_buf clears data, so grab it now. */
1879                 buf = vq->packed.desc_state[i].data;
1880                 detach_buf_packed(vq, i, NULL);
1881                 END_USE(vq);
1882                 return buf;
1883         }
1884         /* That should have freed everything. */
1885         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1886
1887         END_USE(vq);
1888         return NULL;
1889 }
1890
1891 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1892 {
1893         struct vring_desc_extra *desc_extra;
1894         unsigned int i;
1895
1896         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1897                                    GFP_KERNEL);
1898         if (!desc_extra)
1899                 return NULL;
1900
1901         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1902
1903         for (i = 0; i < num - 1; i++)
1904                 desc_extra[i].next = i + 1;
1905
1906         return desc_extra;
1907 }
1908
1909 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1910                               struct virtio_device *vdev,
1911                               struct device *dma_dev)
1912 {
1913         if (vring_packed->vring.desc)
1914                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1915                                  vring_packed->vring.desc,
1916                                  vring_packed->ring_dma_addr,
1917                                  dma_dev);
1918
1919         if (vring_packed->vring.driver)
1920                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1921                                  vring_packed->vring.driver,
1922                                  vring_packed->driver_event_dma_addr,
1923                                  dma_dev);
1924
1925         if (vring_packed->vring.device)
1926                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1927                                  vring_packed->vring.device,
1928                                  vring_packed->device_event_dma_addr,
1929                                  dma_dev);
1930
1931         kfree(vring_packed->desc_state);
1932         kfree(vring_packed->desc_extra);
1933 }
1934
1935 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1936                                     struct virtio_device *vdev,
1937                                     u32 num, struct device *dma_dev)
1938 {
1939         struct vring_packed_desc *ring;
1940         struct vring_packed_desc_event *driver, *device;
1941         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1942         size_t ring_size_in_bytes, event_size_in_bytes;
1943
1944         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1945
1946         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1947                                  &ring_dma_addr,
1948                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1949                                  dma_dev);
1950         if (!ring)
1951                 goto err;
1952
1953         vring_packed->vring.desc         = ring;
1954         vring_packed->ring_dma_addr      = ring_dma_addr;
1955         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1956
1957         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1958
1959         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1960                                    &driver_event_dma_addr,
1961                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1962                                    dma_dev);
1963         if (!driver)
1964                 goto err;
1965
1966         vring_packed->vring.driver          = driver;
1967         vring_packed->event_size_in_bytes   = event_size_in_bytes;
1968         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1969
1970         device = vring_alloc_queue(vdev, event_size_in_bytes,
1971                                    &device_event_dma_addr,
1972                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1973                                    dma_dev);
1974         if (!device)
1975                 goto err;
1976
1977         vring_packed->vring.device          = device;
1978         vring_packed->device_event_dma_addr = device_event_dma_addr;
1979
1980         vring_packed->vring.num = num;
1981
1982         return 0;
1983
1984 err:
1985         vring_free_packed(vring_packed, vdev, dma_dev);
1986         return -ENOMEM;
1987 }
1988
1989 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1990 {
1991         struct vring_desc_state_packed *state;
1992         struct vring_desc_extra *extra;
1993         u32 num = vring_packed->vring.num;
1994
1995         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1996         if (!state)
1997                 goto err_desc_state;
1998
1999         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2000
2001         extra = vring_alloc_desc_extra(num);
2002         if (!extra)
2003                 goto err_desc_extra;
2004
2005         vring_packed->desc_state = state;
2006         vring_packed->desc_extra = extra;
2007
2008         return 0;
2009
2010 err_desc_extra:
2011         kfree(state);
2012 err_desc_state:
2013         return -ENOMEM;
2014 }
2015
2016 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2017                                         bool callback)
2018 {
2019         vring_packed->next_avail_idx = 0;
2020         vring_packed->avail_wrap_counter = 1;
2021         vring_packed->event_flags_shadow = 0;
2022         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2023
2024         /* No callback?  Tell other side not to bother us. */
2025         if (!callback) {
2026                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2027                 vring_packed->vring.driver->flags =
2028                         cpu_to_le16(vring_packed->event_flags_shadow);
2029         }
2030 }
2031
2032 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2033                                           struct vring_virtqueue_packed *vring_packed)
2034 {
2035         vq->packed = *vring_packed;
2036
2037         /* Put everything in free lists. */
2038         vq->free_head = 0;
2039 }
2040
2041 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2042 {
2043         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2044         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2045
2046         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2047         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2048
2049         virtqueue_init(vq, vq->packed.vring.num);
2050         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2051 }
2052
2053 static struct virtqueue *vring_create_virtqueue_packed(
2054         unsigned int index,
2055         unsigned int num,
2056         unsigned int vring_align,
2057         struct virtio_device *vdev,
2058         bool weak_barriers,
2059         bool may_reduce_num,
2060         bool context,
2061         bool (*notify)(struct virtqueue *),
2062         void (*callback)(struct virtqueue *),
2063         const char *name,
2064         struct device *dma_dev)
2065 {
2066         struct vring_virtqueue_packed vring_packed = {};
2067         struct vring_virtqueue *vq;
2068         int err;
2069
2070         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2071                 goto err_ring;
2072
2073         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2074         if (!vq)
2075                 goto err_vq;
2076
2077         vq->vq.callback = callback;
2078         vq->vq.vdev = vdev;
2079         vq->vq.name = name;
2080         vq->vq.index = index;
2081         vq->vq.reset = false;
2082         vq->we_own_ring = true;
2083         vq->notify = notify;
2084         vq->weak_barriers = weak_barriers;
2085 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2086         vq->broken = true;
2087 #else
2088         vq->broken = false;
2089 #endif
2090         vq->packed_ring = true;
2091         vq->dma_dev = dma_dev;
2092         vq->use_dma_api = vring_use_dma_api(vdev);
2093         vq->premapped = false;
2094         vq->do_unmap = vq->use_dma_api;
2095
2096         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2097                 !context;
2098         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2099
2100         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2101                 vq->weak_barriers = false;
2102
2103         err = vring_alloc_state_extra_packed(&vring_packed);
2104         if (err)
2105                 goto err_state_extra;
2106
2107         virtqueue_vring_init_packed(&vring_packed, !!callback);
2108
2109         virtqueue_init(vq, num);
2110         virtqueue_vring_attach_packed(vq, &vring_packed);
2111
2112         spin_lock(&vdev->vqs_list_lock);
2113         list_add_tail(&vq->vq.list, &vdev->vqs);
2114         spin_unlock(&vdev->vqs_list_lock);
2115         return &vq->vq;
2116
2117 err_state_extra:
2118         kfree(vq);
2119 err_vq:
2120         vring_free_packed(&vring_packed, vdev, dma_dev);
2121 err_ring:
2122         return NULL;
2123 }
2124
2125 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2126 {
2127         struct vring_virtqueue_packed vring_packed = {};
2128         struct vring_virtqueue *vq = to_vvq(_vq);
2129         struct virtio_device *vdev = _vq->vdev;
2130         int err;
2131
2132         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2133                 goto err_ring;
2134
2135         err = vring_alloc_state_extra_packed(&vring_packed);
2136         if (err)
2137                 goto err_state_extra;
2138
2139         vring_free(&vq->vq);
2140
2141         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2142
2143         virtqueue_init(vq, vring_packed.vring.num);
2144         virtqueue_vring_attach_packed(vq, &vring_packed);
2145
2146         return 0;
2147
2148 err_state_extra:
2149         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2150 err_ring:
2151         virtqueue_reinit_packed(vq);
2152         return -ENOMEM;
2153 }
2154
2155 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2156                                          void (*recycle)(struct virtqueue *vq, void *buf))
2157 {
2158         struct vring_virtqueue *vq = to_vvq(_vq);
2159         struct virtio_device *vdev = vq->vq.vdev;
2160         void *buf;
2161         int err;
2162
2163         if (!vq->we_own_ring)
2164                 return -EPERM;
2165
2166         if (!vdev->config->disable_vq_and_reset)
2167                 return -ENOENT;
2168
2169         if (!vdev->config->enable_vq_after_reset)
2170                 return -ENOENT;
2171
2172         err = vdev->config->disable_vq_and_reset(_vq);
2173         if (err)
2174                 return err;
2175
2176         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2177                 recycle(_vq, buf);
2178
2179         return 0;
2180 }
2181
2182 static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2183 {
2184         struct vring_virtqueue *vq = to_vvq(_vq);
2185         struct virtio_device *vdev = vq->vq.vdev;
2186
2187         if (vdev->config->enable_vq_after_reset(_vq))
2188                 return -EBUSY;
2189
2190         return 0;
2191 }
2192
2193 /*
2194  * Generic functions and exported symbols.
2195  */
2196
2197 static inline int virtqueue_add(struct virtqueue *_vq,
2198                                 struct scatterlist *sgs[],
2199                                 unsigned int total_sg,
2200                                 unsigned int out_sgs,
2201                                 unsigned int in_sgs,
2202                                 void *data,
2203                                 void *ctx,
2204                                 gfp_t gfp)
2205 {
2206         struct vring_virtqueue *vq = to_vvq(_vq);
2207
2208         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2209                                         out_sgs, in_sgs, data, ctx, gfp) :
2210                                  virtqueue_add_split(_vq, sgs, total_sg,
2211                                         out_sgs, in_sgs, data, ctx, gfp);
2212 }
2213
2214 /**
2215  * virtqueue_add_sgs - expose buffers to other end
2216  * @_vq: the struct virtqueue we're talking about.
2217  * @sgs: array of terminated scatterlists.
2218  * @out_sgs: the number of scatterlists readable by other side
2219  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2220  * @data: the token identifying the buffer.
2221  * @gfp: how to do memory allocations (if necessary).
2222  *
2223  * Caller must ensure we don't call this with other virtqueue operations
2224  * at the same time (except where noted).
2225  *
2226  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2227  */
2228 int virtqueue_add_sgs(struct virtqueue *_vq,
2229                       struct scatterlist *sgs[],
2230                       unsigned int out_sgs,
2231                       unsigned int in_sgs,
2232                       void *data,
2233                       gfp_t gfp)
2234 {
2235         unsigned int i, total_sg = 0;
2236
2237         /* Count them first. */
2238         for (i = 0; i < out_sgs + in_sgs; i++) {
2239                 struct scatterlist *sg;
2240
2241                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2242                         total_sg++;
2243         }
2244         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2245                              data, NULL, gfp);
2246 }
2247 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2248
2249 /**
2250  * virtqueue_add_outbuf - expose output buffers to other end
2251  * @vq: the struct virtqueue we're talking about.
2252  * @sg: scatterlist (must be well-formed and terminated!)
2253  * @num: the number of entries in @sg readable by other side
2254  * @data: the token identifying the buffer.
2255  * @gfp: how to do memory allocations (if necessary).
2256  *
2257  * Caller must ensure we don't call this with other virtqueue operations
2258  * at the same time (except where noted).
2259  *
2260  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2261  */
2262 int virtqueue_add_outbuf(struct virtqueue *vq,
2263                          struct scatterlist *sg, unsigned int num,
2264                          void *data,
2265                          gfp_t gfp)
2266 {
2267         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2268 }
2269 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2270
2271 /**
2272  * virtqueue_add_inbuf - expose input buffers to other end
2273  * @vq: the struct virtqueue we're talking about.
2274  * @sg: scatterlist (must be well-formed and terminated!)
2275  * @num: the number of entries in @sg writable by other side
2276  * @data: the token identifying the buffer.
2277  * @gfp: how to do memory allocations (if necessary).
2278  *
2279  * Caller must ensure we don't call this with other virtqueue operations
2280  * at the same time (except where noted).
2281  *
2282  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2283  */
2284 int virtqueue_add_inbuf(struct virtqueue *vq,
2285                         struct scatterlist *sg, unsigned int num,
2286                         void *data,
2287                         gfp_t gfp)
2288 {
2289         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2290 }
2291 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2292
2293 /**
2294  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2295  * @vq: the struct virtqueue we're talking about.
2296  * @sg: scatterlist (must be well-formed and terminated!)
2297  * @num: the number of entries in @sg writable by other side
2298  * @data: the token identifying the buffer.
2299  * @ctx: extra context for the token
2300  * @gfp: how to do memory allocations (if necessary).
2301  *
2302  * Caller must ensure we don't call this with other virtqueue operations
2303  * at the same time (except where noted).
2304  *
2305  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2306  */
2307 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2308                         struct scatterlist *sg, unsigned int num,
2309                         void *data,
2310                         void *ctx,
2311                         gfp_t gfp)
2312 {
2313         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2314 }
2315 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2316
2317 /**
2318  * virtqueue_dma_dev - get the dma dev
2319  * @_vq: the struct virtqueue we're talking about.
2320  *
2321  * Returns the dma dev. That can been used for dma api.
2322  */
2323 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2324 {
2325         struct vring_virtqueue *vq = to_vvq(_vq);
2326
2327         if (vq->use_dma_api)
2328                 return vring_dma_dev(vq);
2329         else
2330                 return NULL;
2331 }
2332 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2333
2334 /**
2335  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2336  * @_vq: the struct virtqueue
2337  *
2338  * Instead of virtqueue_kick(), you can do:
2339  *      if (virtqueue_kick_prepare(vq))
2340  *              virtqueue_notify(vq);
2341  *
2342  * This is sometimes useful because the virtqueue_kick_prepare() needs
2343  * to be serialized, but the actual virtqueue_notify() call does not.
2344  */
2345 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2346 {
2347         struct vring_virtqueue *vq = to_vvq(_vq);
2348
2349         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2350                                  virtqueue_kick_prepare_split(_vq);
2351 }
2352 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2353
2354 /**
2355  * virtqueue_notify - second half of split virtqueue_kick call.
2356  * @_vq: the struct virtqueue
2357  *
2358  * This does not need to be serialized.
2359  *
2360  * Returns false if host notify failed or queue is broken, otherwise true.
2361  */
2362 bool virtqueue_notify(struct virtqueue *_vq)
2363 {
2364         struct vring_virtqueue *vq = to_vvq(_vq);
2365
2366         if (unlikely(vq->broken))
2367                 return false;
2368
2369         /* Prod other side to tell it about changes. */
2370         if (!vq->notify(_vq)) {
2371                 vq->broken = true;
2372                 return false;
2373         }
2374         return true;
2375 }
2376 EXPORT_SYMBOL_GPL(virtqueue_notify);
2377
2378 /**
2379  * virtqueue_kick - update after add_buf
2380  * @vq: the struct virtqueue
2381  *
2382  * After one or more virtqueue_add_* calls, invoke this to kick
2383  * the other side.
2384  *
2385  * Caller must ensure we don't call this with other virtqueue
2386  * operations at the same time (except where noted).
2387  *
2388  * Returns false if kick failed, otherwise true.
2389  */
2390 bool virtqueue_kick(struct virtqueue *vq)
2391 {
2392         if (virtqueue_kick_prepare(vq))
2393                 return virtqueue_notify(vq);
2394         return true;
2395 }
2396 EXPORT_SYMBOL_GPL(virtqueue_kick);
2397
2398 /**
2399  * virtqueue_get_buf_ctx - get the next used buffer
2400  * @_vq: the struct virtqueue we're talking about.
2401  * @len: the length written into the buffer
2402  * @ctx: extra context for the token
2403  *
2404  * If the device wrote data into the buffer, @len will be set to the
2405  * amount written.  This means you don't need to clear the buffer
2406  * beforehand to ensure there's no data leakage in the case of short
2407  * writes.
2408  *
2409  * Caller must ensure we don't call this with other virtqueue
2410  * operations at the same time (except where noted).
2411  *
2412  * Returns NULL if there are no used buffers, or the "data" token
2413  * handed to virtqueue_add_*().
2414  */
2415 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2416                             void **ctx)
2417 {
2418         struct vring_virtqueue *vq = to_vvq(_vq);
2419
2420         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2421                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2422 }
2423 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2424
2425 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2426 {
2427         return virtqueue_get_buf_ctx(_vq, len, NULL);
2428 }
2429 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2430 /**
2431  * virtqueue_disable_cb - disable callbacks
2432  * @_vq: the struct virtqueue we're talking about.
2433  *
2434  * Note that this is not necessarily synchronous, hence unreliable and only
2435  * useful as an optimization.
2436  *
2437  * Unlike other operations, this need not be serialized.
2438  */
2439 void virtqueue_disable_cb(struct virtqueue *_vq)
2440 {
2441         struct vring_virtqueue *vq = to_vvq(_vq);
2442
2443         if (vq->packed_ring)
2444                 virtqueue_disable_cb_packed(_vq);
2445         else
2446                 virtqueue_disable_cb_split(_vq);
2447 }
2448 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2449
2450 /**
2451  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2452  * @_vq: the struct virtqueue we're talking about.
2453  *
2454  * This re-enables callbacks; it returns current queue state
2455  * in an opaque unsigned value. This value should be later tested by
2456  * virtqueue_poll, to detect a possible race between the driver checking for
2457  * more work, and enabling callbacks.
2458  *
2459  * Caller must ensure we don't call this with other virtqueue
2460  * operations at the same time (except where noted).
2461  */
2462 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2463 {
2464         struct vring_virtqueue *vq = to_vvq(_vq);
2465
2466         if (vq->event_triggered)
2467                 vq->event_triggered = false;
2468
2469         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2470                                  virtqueue_enable_cb_prepare_split(_vq);
2471 }
2472 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2473
2474 /**
2475  * virtqueue_poll - query pending used buffers
2476  * @_vq: the struct virtqueue we're talking about.
2477  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2478  *
2479  * Returns "true" if there are pending used buffers in the queue.
2480  *
2481  * This does not need to be serialized.
2482  */
2483 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2484 {
2485         struct vring_virtqueue *vq = to_vvq(_vq);
2486
2487         if (unlikely(vq->broken))
2488                 return false;
2489
2490         virtio_mb(vq->weak_barriers);
2491         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2492                                  virtqueue_poll_split(_vq, last_used_idx);
2493 }
2494 EXPORT_SYMBOL_GPL(virtqueue_poll);
2495
2496 /**
2497  * virtqueue_enable_cb - restart callbacks after disable_cb.
2498  * @_vq: the struct virtqueue we're talking about.
2499  *
2500  * This re-enables callbacks; it returns "false" if there are pending
2501  * buffers in the queue, to detect a possible race between the driver
2502  * checking for more work, and enabling callbacks.
2503  *
2504  * Caller must ensure we don't call this with other virtqueue
2505  * operations at the same time (except where noted).
2506  */
2507 bool virtqueue_enable_cb(struct virtqueue *_vq)
2508 {
2509         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2510
2511         return !virtqueue_poll(_vq, last_used_idx);
2512 }
2513 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2514
2515 /**
2516  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2517  * @_vq: the struct virtqueue we're talking about.
2518  *
2519  * This re-enables callbacks but hints to the other side to delay
2520  * interrupts until most of the available buffers have been processed;
2521  * it returns "false" if there are many pending buffers in the queue,
2522  * to detect a possible race between the driver checking for more work,
2523  * and enabling callbacks.
2524  *
2525  * Caller must ensure we don't call this with other virtqueue
2526  * operations at the same time (except where noted).
2527  */
2528 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2529 {
2530         struct vring_virtqueue *vq = to_vvq(_vq);
2531
2532         if (vq->event_triggered)
2533                 vq->event_triggered = false;
2534
2535         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2536                                  virtqueue_enable_cb_delayed_split(_vq);
2537 }
2538 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2539
2540 /**
2541  * virtqueue_detach_unused_buf - detach first unused buffer
2542  * @_vq: the struct virtqueue we're talking about.
2543  *
2544  * Returns NULL or the "data" token handed to virtqueue_add_*().
2545  * This is not valid on an active queue; it is useful for device
2546  * shutdown or the reset queue.
2547  */
2548 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2549 {
2550         struct vring_virtqueue *vq = to_vvq(_vq);
2551
2552         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2553                                  virtqueue_detach_unused_buf_split(_vq);
2554 }
2555 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2556
2557 static inline bool more_used(const struct vring_virtqueue *vq)
2558 {
2559         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2560 }
2561
2562 /**
2563  * vring_interrupt - notify a virtqueue on an interrupt
2564  * @irq: the IRQ number (ignored)
2565  * @_vq: the struct virtqueue to notify
2566  *
2567  * Calls the callback function of @_vq to process the virtqueue
2568  * notification.
2569  */
2570 irqreturn_t vring_interrupt(int irq, void *_vq)
2571 {
2572         struct vring_virtqueue *vq = to_vvq(_vq);
2573
2574         if (!more_used(vq)) {
2575                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2576                 return IRQ_NONE;
2577         }
2578
2579         if (unlikely(vq->broken)) {
2580 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2581                 dev_warn_once(&vq->vq.vdev->dev,
2582                               "virtio vring IRQ raised before DRIVER_OK");
2583                 return IRQ_NONE;
2584 #else
2585                 return IRQ_HANDLED;
2586 #endif
2587         }
2588
2589         /* Just a hint for performance: so it's ok that this can be racy! */
2590         if (vq->event)
2591                 vq->event_triggered = true;
2592
2593         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2594         if (vq->vq.callback)
2595                 vq->vq.callback(&vq->vq);
2596
2597         return IRQ_HANDLED;
2598 }
2599 EXPORT_SYMBOL_GPL(vring_interrupt);
2600
2601 /* Only available for split ring */
2602 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2603                                                struct vring_virtqueue_split *vring_split,
2604                                                struct virtio_device *vdev,
2605                                                bool weak_barriers,
2606                                                bool context,
2607                                                bool (*notify)(struct virtqueue *),
2608                                                void (*callback)(struct virtqueue *),
2609                                                const char *name,
2610                                                struct device *dma_dev)
2611 {
2612         struct vring_virtqueue *vq;
2613         int err;
2614
2615         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2616                 return NULL;
2617
2618         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2619         if (!vq)
2620                 return NULL;
2621
2622         vq->packed_ring = false;
2623         vq->vq.callback = callback;
2624         vq->vq.vdev = vdev;
2625         vq->vq.name = name;
2626         vq->vq.index = index;
2627         vq->vq.reset = false;
2628         vq->we_own_ring = false;
2629         vq->notify = notify;
2630         vq->weak_barriers = weak_barriers;
2631 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2632         vq->broken = true;
2633 #else
2634         vq->broken = false;
2635 #endif
2636         vq->dma_dev = dma_dev;
2637         vq->use_dma_api = vring_use_dma_api(vdev);
2638         vq->premapped = false;
2639         vq->do_unmap = vq->use_dma_api;
2640
2641         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2642                 !context;
2643         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2644
2645         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2646                 vq->weak_barriers = false;
2647
2648         err = vring_alloc_state_extra_split(vring_split);
2649         if (err) {
2650                 kfree(vq);
2651                 return NULL;
2652         }
2653
2654         virtqueue_vring_init_split(vring_split, vq);
2655
2656         virtqueue_init(vq, vring_split->vring.num);
2657         virtqueue_vring_attach_split(vq, vring_split);
2658
2659         spin_lock(&vdev->vqs_list_lock);
2660         list_add_tail(&vq->vq.list, &vdev->vqs);
2661         spin_unlock(&vdev->vqs_list_lock);
2662         return &vq->vq;
2663 }
2664
2665 struct virtqueue *vring_create_virtqueue(
2666         unsigned int index,
2667         unsigned int num,
2668         unsigned int vring_align,
2669         struct virtio_device *vdev,
2670         bool weak_barriers,
2671         bool may_reduce_num,
2672         bool context,
2673         bool (*notify)(struct virtqueue *),
2674         void (*callback)(struct virtqueue *),
2675         const char *name)
2676 {
2677
2678         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2679                 return vring_create_virtqueue_packed(index, num, vring_align,
2680                                 vdev, weak_barriers, may_reduce_num,
2681                                 context, notify, callback, name, vdev->dev.parent);
2682
2683         return vring_create_virtqueue_split(index, num, vring_align,
2684                         vdev, weak_barriers, may_reduce_num,
2685                         context, notify, callback, name, vdev->dev.parent);
2686 }
2687 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2688
2689 struct virtqueue *vring_create_virtqueue_dma(
2690         unsigned int index,
2691         unsigned int num,
2692         unsigned int vring_align,
2693         struct virtio_device *vdev,
2694         bool weak_barriers,
2695         bool may_reduce_num,
2696         bool context,
2697         bool (*notify)(struct virtqueue *),
2698         void (*callback)(struct virtqueue *),
2699         const char *name,
2700         struct device *dma_dev)
2701 {
2702
2703         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2704                 return vring_create_virtqueue_packed(index, num, vring_align,
2705                                 vdev, weak_barriers, may_reduce_num,
2706                                 context, notify, callback, name, dma_dev);
2707
2708         return vring_create_virtqueue_split(index, num, vring_align,
2709                         vdev, weak_barriers, may_reduce_num,
2710                         context, notify, callback, name, dma_dev);
2711 }
2712 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2713
2714 /**
2715  * virtqueue_resize - resize the vring of vq
2716  * @_vq: the struct virtqueue we're talking about.
2717  * @num: new ring num
2718  * @recycle: callback to recycle unused buffers
2719  *
2720  * When it is really necessary to create a new vring, it will set the current vq
2721  * into the reset state. Then call the passed callback to recycle the buffer
2722  * that is no longer used. Only after the new vring is successfully created, the
2723  * old vring will be released.
2724  *
2725  * Caller must ensure we don't call this with other virtqueue operations
2726  * at the same time (except where noted).
2727  *
2728  * Returns zero or a negative error.
2729  * 0: success.
2730  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2731  *  vq can still work normally
2732  * -EBUSY: Failed to sync with device, vq may not work properly
2733  * -ENOENT: Transport or device not supported
2734  * -E2BIG/-EINVAL: num error
2735  * -EPERM: Operation not permitted
2736  *
2737  */
2738 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2739                      void (*recycle)(struct virtqueue *vq, void *buf))
2740 {
2741         struct vring_virtqueue *vq = to_vvq(_vq);
2742         int err;
2743
2744         if (num > vq->vq.num_max)
2745                 return -E2BIG;
2746
2747         if (!num)
2748                 return -EINVAL;
2749
2750         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2751                 return 0;
2752
2753         err = virtqueue_disable_and_recycle(_vq, recycle);
2754         if (err)
2755                 return err;
2756
2757         if (vq->packed_ring)
2758                 err = virtqueue_resize_packed(_vq, num);
2759         else
2760                 err = virtqueue_resize_split(_vq, num);
2761
2762         return virtqueue_enable_after_reset(_vq);
2763 }
2764 EXPORT_SYMBOL_GPL(virtqueue_resize);
2765
2766 /**
2767  * virtqueue_set_dma_premapped - set the vring premapped mode
2768  * @_vq: the struct virtqueue we're talking about.
2769  *
2770  * Enable the premapped mode of the vq.
2771  *
2772  * The vring in premapped mode does not do dma internally, so the driver must
2773  * do dma mapping in advance. The driver must pass the dma_address through
2774  * dma_address of scatterlist. When the driver got a used buffer from
2775  * the vring, it has to unmap the dma address.
2776  *
2777  * This function must be called immediately after creating the vq, or after vq
2778  * reset, and before adding any buffers to it.
2779  *
2780  * Caller must ensure we don't call this with other virtqueue operations
2781  * at the same time (except where noted).
2782  *
2783  * Returns zero or a negative error.
2784  * 0: success.
2785  * -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
2786  */
2787 int virtqueue_set_dma_premapped(struct virtqueue *_vq)
2788 {
2789         struct vring_virtqueue *vq = to_vvq(_vq);
2790         u32 num;
2791
2792         START_USE(vq);
2793
2794         num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2795
2796         if (num != vq->vq.num_free) {
2797                 END_USE(vq);
2798                 return -EINVAL;
2799         }
2800
2801         if (!vq->use_dma_api) {
2802                 END_USE(vq);
2803                 return -EINVAL;
2804         }
2805
2806         vq->premapped = true;
2807         vq->do_unmap = false;
2808
2809         END_USE(vq);
2810
2811         return 0;
2812 }
2813 EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
2814
2815 /**
2816  * virtqueue_reset - detach and recycle all unused buffers
2817  * @_vq: the struct virtqueue we're talking about.
2818  * @recycle: callback to recycle unused buffers
2819  *
2820  * Caller must ensure we don't call this with other virtqueue operations
2821  * at the same time (except where noted).
2822  *
2823  * Returns zero or a negative error.
2824  * 0: success.
2825  * -EBUSY: Failed to sync with device, vq may not work properly
2826  * -ENOENT: Transport or device not supported
2827  * -EPERM: Operation not permitted
2828  */
2829 int virtqueue_reset(struct virtqueue *_vq,
2830                     void (*recycle)(struct virtqueue *vq, void *buf))
2831 {
2832         struct vring_virtqueue *vq = to_vvq(_vq);
2833         int err;
2834
2835         err = virtqueue_disable_and_recycle(_vq, recycle);
2836         if (err)
2837                 return err;
2838
2839         if (vq->packed_ring)
2840                 virtqueue_reinit_packed(vq);
2841         else
2842                 virtqueue_reinit_split(vq);
2843
2844         return virtqueue_enable_after_reset(_vq);
2845 }
2846 EXPORT_SYMBOL_GPL(virtqueue_reset);
2847
2848 /* Only available for split ring */
2849 struct virtqueue *vring_new_virtqueue(unsigned int index,
2850                                       unsigned int num,
2851                                       unsigned int vring_align,
2852                                       struct virtio_device *vdev,
2853                                       bool weak_barriers,
2854                                       bool context,
2855                                       void *pages,
2856                                       bool (*notify)(struct virtqueue *vq),
2857                                       void (*callback)(struct virtqueue *vq),
2858                                       const char *name)
2859 {
2860         struct vring_virtqueue_split vring_split = {};
2861
2862         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2863                 return NULL;
2864
2865         vring_init(&vring_split.vring, num, pages, vring_align);
2866         return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2867                                      context, notify, callback, name,
2868                                      vdev->dev.parent);
2869 }
2870 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2871
2872 static void vring_free(struct virtqueue *_vq)
2873 {
2874         struct vring_virtqueue *vq = to_vvq(_vq);
2875
2876         if (vq->we_own_ring) {
2877                 if (vq->packed_ring) {
2878                         vring_free_queue(vq->vq.vdev,
2879                                          vq->packed.ring_size_in_bytes,
2880                                          vq->packed.vring.desc,
2881                                          vq->packed.ring_dma_addr,
2882                                          vring_dma_dev(vq));
2883
2884                         vring_free_queue(vq->vq.vdev,
2885                                          vq->packed.event_size_in_bytes,
2886                                          vq->packed.vring.driver,
2887                                          vq->packed.driver_event_dma_addr,
2888                                          vring_dma_dev(vq));
2889
2890                         vring_free_queue(vq->vq.vdev,
2891                                          vq->packed.event_size_in_bytes,
2892                                          vq->packed.vring.device,
2893                                          vq->packed.device_event_dma_addr,
2894                                          vring_dma_dev(vq));
2895
2896                         kfree(vq->packed.desc_state);
2897                         kfree(vq->packed.desc_extra);
2898                 } else {
2899                         vring_free_queue(vq->vq.vdev,
2900                                          vq->split.queue_size_in_bytes,
2901                                          vq->split.vring.desc,
2902                                          vq->split.queue_dma_addr,
2903                                          vring_dma_dev(vq));
2904                 }
2905         }
2906         if (!vq->packed_ring) {
2907                 kfree(vq->split.desc_state);
2908                 kfree(vq->split.desc_extra);
2909         }
2910 }
2911
2912 void vring_del_virtqueue(struct virtqueue *_vq)
2913 {
2914         struct vring_virtqueue *vq = to_vvq(_vq);
2915
2916         spin_lock(&vq->vq.vdev->vqs_list_lock);
2917         list_del(&_vq->list);
2918         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2919
2920         vring_free(_vq);
2921
2922         kfree(vq);
2923 }
2924 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2925
2926 u32 vring_notification_data(struct virtqueue *_vq)
2927 {
2928         struct vring_virtqueue *vq = to_vvq(_vq);
2929         u16 next;
2930
2931         if (vq->packed_ring)
2932                 next = (vq->packed.next_avail_idx &
2933                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2934                         vq->packed.avail_wrap_counter <<
2935                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2936         else
2937                 next = vq->split.avail_idx_shadow;
2938
2939         return next << 16 | _vq->index;
2940 }
2941 EXPORT_SYMBOL_GPL(vring_notification_data);
2942
2943 /* Manipulates transport-specific feature bits. */
2944 void vring_transport_features(struct virtio_device *vdev)
2945 {
2946         unsigned int i;
2947
2948         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2949                 switch (i) {
2950                 case VIRTIO_RING_F_INDIRECT_DESC:
2951                         break;
2952                 case VIRTIO_RING_F_EVENT_IDX:
2953                         break;
2954                 case VIRTIO_F_VERSION_1:
2955                         break;
2956                 case VIRTIO_F_ACCESS_PLATFORM:
2957                         break;
2958                 case VIRTIO_F_RING_PACKED:
2959                         break;
2960                 case VIRTIO_F_ORDER_PLATFORM:
2961                         break;
2962                 case VIRTIO_F_NOTIFICATION_DATA:
2963                         break;
2964                 default:
2965                         /* We don't understand this bit. */
2966                         __virtio_clear_bit(vdev, i);
2967                 }
2968         }
2969 }
2970 EXPORT_SYMBOL_GPL(vring_transport_features);
2971
2972 /**
2973  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2974  * @_vq: the struct virtqueue containing the vring of interest.
2975  *
2976  * Returns the size of the vring.  This is mainly used for boasting to
2977  * userspace.  Unlike other operations, this need not be serialized.
2978  */
2979 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2980 {
2981
2982         const struct vring_virtqueue *vq = to_vvq(_vq);
2983
2984         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2985 }
2986 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2987
2988 /*
2989  * This function should only be called by the core, not directly by the driver.
2990  */
2991 void __virtqueue_break(struct virtqueue *_vq)
2992 {
2993         struct vring_virtqueue *vq = to_vvq(_vq);
2994
2995         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2996         WRITE_ONCE(vq->broken, true);
2997 }
2998 EXPORT_SYMBOL_GPL(__virtqueue_break);
2999
3000 /*
3001  * This function should only be called by the core, not directly by the driver.
3002  */
3003 void __virtqueue_unbreak(struct virtqueue *_vq)
3004 {
3005         struct vring_virtqueue *vq = to_vvq(_vq);
3006
3007         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3008         WRITE_ONCE(vq->broken, false);
3009 }
3010 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3011
3012 bool virtqueue_is_broken(const struct virtqueue *_vq)
3013 {
3014         const struct vring_virtqueue *vq = to_vvq(_vq);
3015
3016         return READ_ONCE(vq->broken);
3017 }
3018 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3019
3020 /*
3021  * This should prevent the device from being used, allowing drivers to
3022  * recover.  You may need to grab appropriate locks to flush.
3023  */
3024 void virtio_break_device(struct virtio_device *dev)
3025 {
3026         struct virtqueue *_vq;
3027
3028         spin_lock(&dev->vqs_list_lock);
3029         list_for_each_entry(_vq, &dev->vqs, list) {
3030                 struct vring_virtqueue *vq = to_vvq(_vq);
3031
3032                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3033                 WRITE_ONCE(vq->broken, true);
3034         }
3035         spin_unlock(&dev->vqs_list_lock);
3036 }
3037 EXPORT_SYMBOL_GPL(virtio_break_device);
3038
3039 /*
3040  * This should allow the device to be used by the driver. You may
3041  * need to grab appropriate locks to flush the write to
3042  * vq->broken. This should only be used in some specific case e.g
3043  * (probing and restoring). This function should only be called by the
3044  * core, not directly by the driver.
3045  */
3046 void __virtio_unbreak_device(struct virtio_device *dev)
3047 {
3048         struct virtqueue *_vq;
3049
3050         spin_lock(&dev->vqs_list_lock);
3051         list_for_each_entry(_vq, &dev->vqs, list) {
3052                 struct vring_virtqueue *vq = to_vvq(_vq);
3053
3054                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3055                 WRITE_ONCE(vq->broken, false);
3056         }
3057         spin_unlock(&dev->vqs_list_lock);
3058 }
3059 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3060
3061 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3062 {
3063         const struct vring_virtqueue *vq = to_vvq(_vq);
3064
3065         BUG_ON(!vq->we_own_ring);
3066
3067         if (vq->packed_ring)
3068                 return vq->packed.ring_dma_addr;
3069
3070         return vq->split.queue_dma_addr;
3071 }
3072 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3073
3074 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3075 {
3076         const struct vring_virtqueue *vq = to_vvq(_vq);
3077
3078         BUG_ON(!vq->we_own_ring);
3079
3080         if (vq->packed_ring)
3081                 return vq->packed.driver_event_dma_addr;
3082
3083         return vq->split.queue_dma_addr +
3084                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3085 }
3086 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3087
3088 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3089 {
3090         const struct vring_virtqueue *vq = to_vvq(_vq);
3091
3092         BUG_ON(!vq->we_own_ring);
3093
3094         if (vq->packed_ring)
3095                 return vq->packed.device_event_dma_addr;
3096
3097         return vq->split.queue_dma_addr +
3098                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3099 }
3100 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3101
3102 /* Only available for split ring */
3103 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3104 {
3105         return &to_vvq(vq)->split.vring;
3106 }
3107 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3108
3109 /**
3110  * virtqueue_dma_map_single_attrs - map DMA for _vq
3111  * @_vq: the struct virtqueue we're talking about.
3112  * @ptr: the pointer of the buffer to do dma
3113  * @size: the size of the buffer to do dma
3114  * @dir: DMA direction
3115  * @attrs: DMA Attrs
3116  *
3117  * The caller calls this to do dma mapping in advance. The DMA address can be
3118  * passed to this _vq when it is in pre-mapped mode.
3119  *
3120  * return DMA address. Caller should check that by virtqueue_dma_mapping_error().
3121  */
3122 dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr,
3123                                           size_t size,
3124                                           enum dma_data_direction dir,
3125                                           unsigned long attrs)
3126 {
3127         struct vring_virtqueue *vq = to_vvq(_vq);
3128
3129         if (!vq->use_dma_api)
3130                 return (dma_addr_t)virt_to_phys(ptr);
3131
3132         return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs);
3133 }
3134 EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs);
3135
3136 /**
3137  * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq
3138  * @_vq: the struct virtqueue we're talking about.
3139  * @addr: the dma address to unmap
3140  * @size: the size of the buffer
3141  * @dir: DMA direction
3142  * @attrs: DMA Attrs
3143  *
3144  * Unmap the address that is mapped by the virtqueue_dma_map_* APIs.
3145  *
3146  */
3147 void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
3148                                       size_t size, enum dma_data_direction dir,
3149                                       unsigned long attrs)
3150 {
3151         struct vring_virtqueue *vq = to_vvq(_vq);
3152
3153         if (!vq->use_dma_api)
3154                 return;
3155
3156         dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs);
3157 }
3158 EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs);
3159
3160 /**
3161  * virtqueue_dma_mapping_error - check dma address
3162  * @_vq: the struct virtqueue we're talking about.
3163  * @addr: DMA address
3164  *
3165  * Returns 0 means dma valid. Other means invalid dma address.
3166  */
3167 int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr)
3168 {
3169         struct vring_virtqueue *vq = to_vvq(_vq);
3170
3171         if (!vq->use_dma_api)
3172                 return 0;
3173
3174         return dma_mapping_error(vring_dma_dev(vq), addr);
3175 }
3176 EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error);
3177
3178 /**
3179  * virtqueue_dma_need_sync - check a dma address needs sync
3180  * @_vq: the struct virtqueue we're talking about.
3181  * @addr: DMA address
3182  *
3183  * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be
3184  * synchronized
3185  *
3186  * return bool
3187  */
3188 bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr)
3189 {
3190         struct vring_virtqueue *vq = to_vvq(_vq);
3191
3192         if (!vq->use_dma_api)
3193                 return false;
3194
3195         return dma_need_sync(vring_dma_dev(vq), addr);
3196 }
3197 EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync);
3198
3199 /**
3200  * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu
3201  * @_vq: the struct virtqueue we're talking about.
3202  * @addr: DMA address
3203  * @offset: DMA address offset
3204  * @size: buf size for sync
3205  * @dir: DMA direction
3206  *
3207  * Before calling this function, use virtqueue_dma_need_sync() to confirm that
3208  * the DMA address really needs to be synchronized
3209  *
3210  */
3211 void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq,
3212                                              dma_addr_t addr,
3213                                              unsigned long offset, size_t size,
3214                                              enum dma_data_direction dir)
3215 {
3216         struct vring_virtqueue *vq = to_vvq(_vq);
3217         struct device *dev = vring_dma_dev(vq);
3218
3219         if (!vq->use_dma_api)
3220                 return;
3221
3222         dma_sync_single_range_for_cpu(dev, addr, offset, size,
3223                                       DMA_BIDIRECTIONAL);
3224 }
3225 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu);
3226
3227 /**
3228  * virtqueue_dma_sync_single_range_for_device - dma sync for device
3229  * @_vq: the struct virtqueue we're talking about.
3230  * @addr: DMA address
3231  * @offset: DMA address offset
3232  * @size: buf size for sync
3233  * @dir: DMA direction
3234  *
3235  * Before calling this function, use virtqueue_dma_need_sync() to confirm that
3236  * the DMA address really needs to be synchronized
3237  */
3238 void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq,
3239                                                 dma_addr_t addr,
3240                                                 unsigned long offset, size_t size,
3241                                                 enum dma_data_direction dir)
3242 {
3243         struct vring_virtqueue *vq = to_vvq(_vq);
3244         struct device *dev = vring_dma_dev(vq);
3245
3246         if (!vq->use_dma_api)
3247                 return;
3248
3249         dma_sync_single_range_for_device(dev, addr, offset, size,
3250                                          DMA_BIDIRECTIONAL);
3251 }
3252 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device);
3253
3254 MODULE_LICENSE("GPL");