virtio_ring: put mapping error check in vring_map_one_sg
[platform/kernel/linux-rpi.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)                             \
21         do {                                                    \
22                 dev_err(&(_vq)->vq.vdev->dev,                   \
23                         "%s:"fmt, (_vq)->vq.name, ##args);      \
24                 BUG();                                          \
25         } while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)                                          \
28         do {                                                    \
29                 if ((_vq)->in_use)                              \
30                         panic("%s:in_use = %i\n",               \
31                               (_vq)->vq.name, (_vq)->in_use);   \
32                 (_vq)->in_use = __LINE__;                       \
33         } while (0)
34 #define END_USE(_vq) \
35         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)                               \
37         do {                                                    \
38                 ktime_t now = ktime_get();                      \
39                                                                 \
40                 /* No kick or get, with .1 second between?  Warn. */ \
41                 if ((_vq)->last_add_time_valid)                 \
42                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
43                                 (_vq)->last_add_time)) > 100);  \
44                 (_vq)->last_add_time = now;                     \
45                 (_vq)->last_add_time_valid = true;              \
46         } while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)                                \
48         do {                                                    \
49                 if ((_vq)->last_add_time_valid) {               \
50                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51                                       (_vq)->last_add_time)) > 100); \
52                 }                                               \
53         } while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)                              \
55         ((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)                             \
58         do {                                                    \
59                 dev_err(&_vq->vq.vdev->dev,                     \
60                         "%s:"fmt, (_vq)->vq.name, ##args);      \
61                 (_vq)->broken = true;                           \
62         } while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69
70 struct vring_desc_state_split {
71         void *data;                     /* Data for callback. */
72         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
73 };
74
75 struct vring_desc_state_packed {
76         void *data;                     /* Data for callback. */
77         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
78         u16 num;                        /* Descriptor list length. */
79         u16 last;                       /* The last desc state in a list. */
80 };
81
82 struct vring_desc_extra {
83         dma_addr_t addr;                /* Descriptor DMA addr. */
84         u32 len;                        /* Descriptor length. */
85         u16 flags;                      /* Descriptor flags. */
86         u16 next;                       /* The next desc state in a list. */
87 };
88
89 struct vring_virtqueue_split {
90         /* Actual memory layout for this queue. */
91         struct vring vring;
92
93         /* Last written value to avail->flags */
94         u16 avail_flags_shadow;
95
96         /*
97          * Last written value to avail->idx in
98          * guest byte order.
99          */
100         u16 avail_idx_shadow;
101
102         /* Per-descriptor state. */
103         struct vring_desc_state_split *desc_state;
104         struct vring_desc_extra *desc_extra;
105
106         /* DMA address and size information */
107         dma_addr_t queue_dma_addr;
108         size_t queue_size_in_bytes;
109
110         /*
111          * The parameters for creating vrings are reserved for creating new
112          * vring.
113          */
114         u32 vring_align;
115         bool may_reduce_num;
116 };
117
118 struct vring_virtqueue_packed {
119         /* Actual memory layout for this queue. */
120         struct {
121                 unsigned int num;
122                 struct vring_packed_desc *desc;
123                 struct vring_packed_desc_event *driver;
124                 struct vring_packed_desc_event *device;
125         } vring;
126
127         /* Driver ring wrap counter. */
128         bool avail_wrap_counter;
129
130         /* Avail used flags. */
131         u16 avail_used_flags;
132
133         /* Index of the next avail descriptor. */
134         u16 next_avail_idx;
135
136         /*
137          * Last written value to driver->flags in
138          * guest byte order.
139          */
140         u16 event_flags_shadow;
141
142         /* Per-descriptor state. */
143         struct vring_desc_state_packed *desc_state;
144         struct vring_desc_extra *desc_extra;
145
146         /* DMA address and size information */
147         dma_addr_t ring_dma_addr;
148         dma_addr_t driver_event_dma_addr;
149         dma_addr_t device_event_dma_addr;
150         size_t ring_size_in_bytes;
151         size_t event_size_in_bytes;
152 };
153
154 struct vring_virtqueue {
155         struct virtqueue vq;
156
157         /* Is this a packed ring? */
158         bool packed_ring;
159
160         /* Is DMA API used? */
161         bool use_dma_api;
162
163         /* Can we use weak barriers? */
164         bool weak_barriers;
165
166         /* Other side has made a mess, don't try any more. */
167         bool broken;
168
169         /* Host supports indirect buffers */
170         bool indirect;
171
172         /* Host publishes avail event idx */
173         bool event;
174
175         /* Head of free buffer list. */
176         unsigned int free_head;
177         /* Number we've added since last sync. */
178         unsigned int num_added;
179
180         /* Last used index  we've seen.
181          * for split ring, it just contains last used index
182          * for packed ring:
183          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
184          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
185          */
186         u16 last_used_idx;
187
188         /* Hint for event idx: already triggered no need to disable. */
189         bool event_triggered;
190
191         union {
192                 /* Available for split ring */
193                 struct vring_virtqueue_split split;
194
195                 /* Available for packed ring */
196                 struct vring_virtqueue_packed packed;
197         };
198
199         /* How to notify other side. FIXME: commonalize hcalls! */
200         bool (*notify)(struct virtqueue *vq);
201
202         /* DMA, allocation, and size information */
203         bool we_own_ring;
204
205         /* Device used for doing DMA */
206         struct device *dma_dev;
207
208 #ifdef DEBUG
209         /* They're supposed to lock for us. */
210         unsigned int in_use;
211
212         /* Figure out if their kicks are too delayed. */
213         bool last_add_time_valid;
214         ktime_t last_add_time;
215 #endif
216 };
217
218 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
219                                                struct vring_virtqueue_split *vring_split,
220                                                struct virtio_device *vdev,
221                                                bool weak_barriers,
222                                                bool context,
223                                                bool (*notify)(struct virtqueue *),
224                                                void (*callback)(struct virtqueue *),
225                                                const char *name,
226                                                struct device *dma_dev);
227 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
228 static void vring_free(struct virtqueue *_vq);
229
230 /*
231  * Helpers.
232  */
233
234 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
235
236 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
237                                    unsigned int total_sg)
238 {
239         /*
240          * If the host supports indirect descriptor tables, and we have multiple
241          * buffers, then go indirect. FIXME: tune this threshold
242          */
243         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
244 }
245
246 /*
247  * Modern virtio devices have feature bits to specify whether they need a
248  * quirk and bypass the IOMMU. If not there, just use the DMA API.
249  *
250  * If there, the interaction between virtio and DMA API is messy.
251  *
252  * On most systems with virtio, physical addresses match bus addresses,
253  * and it doesn't particularly matter whether we use the DMA API.
254  *
255  * On some systems, including Xen and any system with a physical device
256  * that speaks virtio behind a physical IOMMU, we must use the DMA API
257  * for virtio DMA to work at all.
258  *
259  * On other systems, including SPARC and PPC64, virtio-pci devices are
260  * enumerated as though they are behind an IOMMU, but the virtio host
261  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
262  * there or somehow map everything as the identity.
263  *
264  * For the time being, we preserve historic behavior and bypass the DMA
265  * API.
266  *
267  * TODO: install a per-device DMA ops structure that does the right thing
268  * taking into account all the above quirks, and use the DMA API
269  * unconditionally on data path.
270  */
271
272 static bool vring_use_dma_api(const struct virtio_device *vdev)
273 {
274         if (!virtio_has_dma_quirk(vdev))
275                 return true;
276
277         /* Otherwise, we are left to guess. */
278         /*
279          * In theory, it's possible to have a buggy QEMU-supposed
280          * emulated Q35 IOMMU and Xen enabled at the same time.  On
281          * such a configuration, virtio has never worked and will
282          * not work without an even larger kludge.  Instead, enable
283          * the DMA API if we're a Xen guest, which at least allows
284          * all of the sensible Xen configurations to work correctly.
285          */
286         if (xen_domain())
287                 return true;
288
289         return false;
290 }
291
292 size_t virtio_max_dma_size(const struct virtio_device *vdev)
293 {
294         size_t max_segment_size = SIZE_MAX;
295
296         if (vring_use_dma_api(vdev))
297                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
298
299         return max_segment_size;
300 }
301 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
302
303 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
304                                dma_addr_t *dma_handle, gfp_t flag,
305                                struct device *dma_dev)
306 {
307         if (vring_use_dma_api(vdev)) {
308                 return dma_alloc_coherent(dma_dev, size,
309                                           dma_handle, flag);
310         } else {
311                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
312
313                 if (queue) {
314                         phys_addr_t phys_addr = virt_to_phys(queue);
315                         *dma_handle = (dma_addr_t)phys_addr;
316
317                         /*
318                          * Sanity check: make sure we dind't truncate
319                          * the address.  The only arches I can find that
320                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
321                          * are certain non-highmem MIPS and x86
322                          * configurations, but these configurations
323                          * should never allocate physical pages above 32
324                          * bits, so this is fine.  Just in case, throw a
325                          * warning and abort if we end up with an
326                          * unrepresentable address.
327                          */
328                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
329                                 free_pages_exact(queue, PAGE_ALIGN(size));
330                                 return NULL;
331                         }
332                 }
333                 return queue;
334         }
335 }
336
337 static void vring_free_queue(struct virtio_device *vdev, size_t size,
338                              void *queue, dma_addr_t dma_handle,
339                              struct device *dma_dev)
340 {
341         if (vring_use_dma_api(vdev))
342                 dma_free_coherent(dma_dev, size, queue, dma_handle);
343         else
344                 free_pages_exact(queue, PAGE_ALIGN(size));
345 }
346
347 /*
348  * The DMA ops on various arches are rather gnarly right now, and
349  * making all of the arch DMA ops work on the vring device itself
350  * is a mess.
351  */
352 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
353 {
354         return vq->dma_dev;
355 }
356
357 /* Map one sg entry. */
358 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
359                             enum dma_data_direction direction, dma_addr_t *addr)
360 {
361         if (!vq->use_dma_api) {
362                 /*
363                  * If DMA is not used, KMSAN doesn't know that the scatterlist
364                  * is initialized by the hardware. Explicitly check/unpoison it
365                  * depending on the direction.
366                  */
367                 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
368                 *addr = (dma_addr_t)sg_phys(sg);
369                 return 0;
370         }
371
372         /*
373          * We can't use dma_map_sg, because we don't use scatterlists in
374          * the way it expects (we don't guarantee that the scatterlist
375          * will exist for the lifetime of the mapping).
376          */
377         *addr = dma_map_page(vring_dma_dev(vq),
378                             sg_page(sg), sg->offset, sg->length,
379                             direction);
380
381         if (dma_mapping_error(vring_dma_dev(vq), *addr))
382                 return -ENOMEM;
383
384         return 0;
385 }
386
387 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
388                                    void *cpu_addr, size_t size,
389                                    enum dma_data_direction direction)
390 {
391         if (!vq->use_dma_api)
392                 return (dma_addr_t)virt_to_phys(cpu_addr);
393
394         return dma_map_single(vring_dma_dev(vq),
395                               cpu_addr, size, direction);
396 }
397
398 static int vring_mapping_error(const struct vring_virtqueue *vq,
399                                dma_addr_t addr)
400 {
401         if (!vq->use_dma_api)
402                 return 0;
403
404         return dma_mapping_error(vring_dma_dev(vq), addr);
405 }
406
407 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
408 {
409         vq->vq.num_free = num;
410
411         if (vq->packed_ring)
412                 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
413         else
414                 vq->last_used_idx = 0;
415
416         vq->event_triggered = false;
417         vq->num_added = 0;
418
419 #ifdef DEBUG
420         vq->in_use = false;
421         vq->last_add_time_valid = false;
422 #endif
423 }
424
425
426 /*
427  * Split ring specific functions - *_split().
428  */
429
430 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
431                                            const struct vring_desc *desc)
432 {
433         u16 flags;
434
435         if (!vq->use_dma_api)
436                 return;
437
438         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
439
440         dma_unmap_page(vring_dma_dev(vq),
441                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
442                        virtio32_to_cpu(vq->vq.vdev, desc->len),
443                        (flags & VRING_DESC_F_WRITE) ?
444                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
445 }
446
447 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
448                                           unsigned int i)
449 {
450         struct vring_desc_extra *extra = vq->split.desc_extra;
451         u16 flags;
452
453         if (!vq->use_dma_api)
454                 goto out;
455
456         flags = extra[i].flags;
457
458         if (flags & VRING_DESC_F_INDIRECT) {
459                 dma_unmap_single(vring_dma_dev(vq),
460                                  extra[i].addr,
461                                  extra[i].len,
462                                  (flags & VRING_DESC_F_WRITE) ?
463                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
464         } else {
465                 dma_unmap_page(vring_dma_dev(vq),
466                                extra[i].addr,
467                                extra[i].len,
468                                (flags & VRING_DESC_F_WRITE) ?
469                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
470         }
471
472 out:
473         return extra[i].next;
474 }
475
476 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
477                                                unsigned int total_sg,
478                                                gfp_t gfp)
479 {
480         struct vring_desc *desc;
481         unsigned int i;
482
483         /*
484          * We require lowmem mappings for the descriptors because
485          * otherwise virt_to_phys will give us bogus addresses in the
486          * virtqueue.
487          */
488         gfp &= ~__GFP_HIGHMEM;
489
490         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
491         if (!desc)
492                 return NULL;
493
494         for (i = 0; i < total_sg; i++)
495                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
496         return desc;
497 }
498
499 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
500                                                     struct vring_desc *desc,
501                                                     unsigned int i,
502                                                     dma_addr_t addr,
503                                                     unsigned int len,
504                                                     u16 flags,
505                                                     bool indirect)
506 {
507         struct vring_virtqueue *vring = to_vvq(vq);
508         struct vring_desc_extra *extra = vring->split.desc_extra;
509         u16 next;
510
511         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
512         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
513         desc[i].len = cpu_to_virtio32(vq->vdev, len);
514
515         if (!indirect) {
516                 next = extra[i].next;
517                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
518
519                 extra[i].addr = addr;
520                 extra[i].len = len;
521                 extra[i].flags = flags;
522         } else
523                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
524
525         return next;
526 }
527
528 static inline int virtqueue_add_split(struct virtqueue *_vq,
529                                       struct scatterlist *sgs[],
530                                       unsigned int total_sg,
531                                       unsigned int out_sgs,
532                                       unsigned int in_sgs,
533                                       void *data,
534                                       void *ctx,
535                                       gfp_t gfp)
536 {
537         struct vring_virtqueue *vq = to_vvq(_vq);
538         struct scatterlist *sg;
539         struct vring_desc *desc;
540         unsigned int i, n, avail, descs_used, prev, err_idx;
541         int head;
542         bool indirect;
543
544         START_USE(vq);
545
546         BUG_ON(data == NULL);
547         BUG_ON(ctx && vq->indirect);
548
549         if (unlikely(vq->broken)) {
550                 END_USE(vq);
551                 return -EIO;
552         }
553
554         LAST_ADD_TIME_UPDATE(vq);
555
556         BUG_ON(total_sg == 0);
557
558         head = vq->free_head;
559
560         if (virtqueue_use_indirect(vq, total_sg))
561                 desc = alloc_indirect_split(_vq, total_sg, gfp);
562         else {
563                 desc = NULL;
564                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
565         }
566
567         if (desc) {
568                 /* Use a single buffer which doesn't continue */
569                 indirect = true;
570                 /* Set up rest to use this indirect table. */
571                 i = 0;
572                 descs_used = 1;
573         } else {
574                 indirect = false;
575                 desc = vq->split.vring.desc;
576                 i = head;
577                 descs_used = total_sg;
578         }
579
580         if (unlikely(vq->vq.num_free < descs_used)) {
581                 pr_debug("Can't add buf len %i - avail = %i\n",
582                          descs_used, vq->vq.num_free);
583                 /* FIXME: for historical reasons, we force a notify here if
584                  * there are outgoing parts to the buffer.  Presumably the
585                  * host should service the ring ASAP. */
586                 if (out_sgs)
587                         vq->notify(&vq->vq);
588                 if (indirect)
589                         kfree(desc);
590                 END_USE(vq);
591                 return -ENOSPC;
592         }
593
594         for (n = 0; n < out_sgs; n++) {
595                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
596                         dma_addr_t addr;
597
598                         if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
599                                 goto unmap_release;
600
601                         prev = i;
602                         /* Note that we trust indirect descriptor
603                          * table since it use stream DMA mapping.
604                          */
605                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
606                                                      VRING_DESC_F_NEXT,
607                                                      indirect);
608                 }
609         }
610         for (; n < (out_sgs + in_sgs); n++) {
611                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
612                         dma_addr_t addr;
613
614                         if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
615                                 goto unmap_release;
616
617                         prev = i;
618                         /* Note that we trust indirect descriptor
619                          * table since it use stream DMA mapping.
620                          */
621                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
622                                                      sg->length,
623                                                      VRING_DESC_F_NEXT |
624                                                      VRING_DESC_F_WRITE,
625                                                      indirect);
626                 }
627         }
628         /* Last one doesn't continue. */
629         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
630         if (!indirect && vq->use_dma_api)
631                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
632                         ~VRING_DESC_F_NEXT;
633
634         if (indirect) {
635                 /* Now that the indirect table is filled in, map it. */
636                 dma_addr_t addr = vring_map_single(
637                         vq, desc, total_sg * sizeof(struct vring_desc),
638                         DMA_TO_DEVICE);
639                 if (vring_mapping_error(vq, addr))
640                         goto unmap_release;
641
642                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
643                                          head, addr,
644                                          total_sg * sizeof(struct vring_desc),
645                                          VRING_DESC_F_INDIRECT,
646                                          false);
647         }
648
649         /* We're using some buffers from the free list. */
650         vq->vq.num_free -= descs_used;
651
652         /* Update free pointer */
653         if (indirect)
654                 vq->free_head = vq->split.desc_extra[head].next;
655         else
656                 vq->free_head = i;
657
658         /* Store token and indirect buffer state. */
659         vq->split.desc_state[head].data = data;
660         if (indirect)
661                 vq->split.desc_state[head].indir_desc = desc;
662         else
663                 vq->split.desc_state[head].indir_desc = ctx;
664
665         /* Put entry in available array (but don't update avail->idx until they
666          * do sync). */
667         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
668         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
669
670         /* Descriptors and available array need to be set before we expose the
671          * new available array entries. */
672         virtio_wmb(vq->weak_barriers);
673         vq->split.avail_idx_shadow++;
674         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
675                                                 vq->split.avail_idx_shadow);
676         vq->num_added++;
677
678         pr_debug("Added buffer head %i to %p\n", head, vq);
679         END_USE(vq);
680
681         /* This is very unlikely, but theoretically possible.  Kick
682          * just in case. */
683         if (unlikely(vq->num_added == (1 << 16) - 1))
684                 virtqueue_kick(_vq);
685
686         return 0;
687
688 unmap_release:
689         err_idx = i;
690
691         if (indirect)
692                 i = 0;
693         else
694                 i = head;
695
696         for (n = 0; n < total_sg; n++) {
697                 if (i == err_idx)
698                         break;
699                 if (indirect) {
700                         vring_unmap_one_split_indirect(vq, &desc[i]);
701                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
702                 } else
703                         i = vring_unmap_one_split(vq, i);
704         }
705
706         if (indirect)
707                 kfree(desc);
708
709         END_USE(vq);
710         return -ENOMEM;
711 }
712
713 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
714 {
715         struct vring_virtqueue *vq = to_vvq(_vq);
716         u16 new, old;
717         bool needs_kick;
718
719         START_USE(vq);
720         /* We need to expose available array entries before checking avail
721          * event. */
722         virtio_mb(vq->weak_barriers);
723
724         old = vq->split.avail_idx_shadow - vq->num_added;
725         new = vq->split.avail_idx_shadow;
726         vq->num_added = 0;
727
728         LAST_ADD_TIME_CHECK(vq);
729         LAST_ADD_TIME_INVALID(vq);
730
731         if (vq->event) {
732                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
733                                         vring_avail_event(&vq->split.vring)),
734                                               new, old);
735         } else {
736                 needs_kick = !(vq->split.vring.used->flags &
737                                         cpu_to_virtio16(_vq->vdev,
738                                                 VRING_USED_F_NO_NOTIFY));
739         }
740         END_USE(vq);
741         return needs_kick;
742 }
743
744 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
745                              void **ctx)
746 {
747         unsigned int i, j;
748         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
749
750         /* Clear data ptr. */
751         vq->split.desc_state[head].data = NULL;
752
753         /* Put back on free list: unmap first-level descriptors and find end */
754         i = head;
755
756         while (vq->split.vring.desc[i].flags & nextflag) {
757                 vring_unmap_one_split(vq, i);
758                 i = vq->split.desc_extra[i].next;
759                 vq->vq.num_free++;
760         }
761
762         vring_unmap_one_split(vq, i);
763         vq->split.desc_extra[i].next = vq->free_head;
764         vq->free_head = head;
765
766         /* Plus final descriptor */
767         vq->vq.num_free++;
768
769         if (vq->indirect) {
770                 struct vring_desc *indir_desc =
771                                 vq->split.desc_state[head].indir_desc;
772                 u32 len;
773
774                 /* Free the indirect table, if any, now that it's unmapped. */
775                 if (!indir_desc)
776                         return;
777
778                 len = vq->split.desc_extra[head].len;
779
780                 BUG_ON(!(vq->split.desc_extra[head].flags &
781                                 VRING_DESC_F_INDIRECT));
782                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
783
784                 if (vq->use_dma_api) {
785                         for (j = 0; j < len / sizeof(struct vring_desc); j++)
786                                 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
787                 }
788
789                 kfree(indir_desc);
790                 vq->split.desc_state[head].indir_desc = NULL;
791         } else if (ctx) {
792                 *ctx = vq->split.desc_state[head].indir_desc;
793         }
794 }
795
796 static bool more_used_split(const struct vring_virtqueue *vq)
797 {
798         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
799                         vq->split.vring.used->idx);
800 }
801
802 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
803                                          unsigned int *len,
804                                          void **ctx)
805 {
806         struct vring_virtqueue *vq = to_vvq(_vq);
807         void *ret;
808         unsigned int i;
809         u16 last_used;
810
811         START_USE(vq);
812
813         if (unlikely(vq->broken)) {
814                 END_USE(vq);
815                 return NULL;
816         }
817
818         if (!more_used_split(vq)) {
819                 pr_debug("No more buffers in queue\n");
820                 END_USE(vq);
821                 return NULL;
822         }
823
824         /* Only get used array entries after they have been exposed by host. */
825         virtio_rmb(vq->weak_barriers);
826
827         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
828         i = virtio32_to_cpu(_vq->vdev,
829                         vq->split.vring.used->ring[last_used].id);
830         *len = virtio32_to_cpu(_vq->vdev,
831                         vq->split.vring.used->ring[last_used].len);
832
833         if (unlikely(i >= vq->split.vring.num)) {
834                 BAD_RING(vq, "id %u out of range\n", i);
835                 return NULL;
836         }
837         if (unlikely(!vq->split.desc_state[i].data)) {
838                 BAD_RING(vq, "id %u is not a head!\n", i);
839                 return NULL;
840         }
841
842         /* detach_buf_split clears data, so grab it now. */
843         ret = vq->split.desc_state[i].data;
844         detach_buf_split(vq, i, ctx);
845         vq->last_used_idx++;
846         /* If we expect an interrupt for the next entry, tell host
847          * by writing event index and flush out the write before
848          * the read in the next get_buf call. */
849         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
850                 virtio_store_mb(vq->weak_barriers,
851                                 &vring_used_event(&vq->split.vring),
852                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
853
854         LAST_ADD_TIME_INVALID(vq);
855
856         END_USE(vq);
857         return ret;
858 }
859
860 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
861 {
862         struct vring_virtqueue *vq = to_vvq(_vq);
863
864         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
865                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
866
867                 /*
868                  * If device triggered an event already it won't trigger one again:
869                  * no need to disable.
870                  */
871                 if (vq->event_triggered)
872                         return;
873
874                 if (vq->event)
875                         /* TODO: this is a hack. Figure out a cleaner value to write. */
876                         vring_used_event(&vq->split.vring) = 0x0;
877                 else
878                         vq->split.vring.avail->flags =
879                                 cpu_to_virtio16(_vq->vdev,
880                                                 vq->split.avail_flags_shadow);
881         }
882 }
883
884 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
885 {
886         struct vring_virtqueue *vq = to_vvq(_vq);
887         u16 last_used_idx;
888
889         START_USE(vq);
890
891         /* We optimistically turn back on interrupts, then check if there was
892          * more to do. */
893         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
894          * either clear the flags bit or point the event index at the next
895          * entry. Always do both to keep code simple. */
896         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
897                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
898                 if (!vq->event)
899                         vq->split.vring.avail->flags =
900                                 cpu_to_virtio16(_vq->vdev,
901                                                 vq->split.avail_flags_shadow);
902         }
903         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
904                         last_used_idx = vq->last_used_idx);
905         END_USE(vq);
906         return last_used_idx;
907 }
908
909 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
910 {
911         struct vring_virtqueue *vq = to_vvq(_vq);
912
913         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
914                         vq->split.vring.used->idx);
915 }
916
917 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
918 {
919         struct vring_virtqueue *vq = to_vvq(_vq);
920         u16 bufs;
921
922         START_USE(vq);
923
924         /* We optimistically turn back on interrupts, then check if there was
925          * more to do. */
926         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
927          * either clear the flags bit or point the event index at the next
928          * entry. Always update the event index to keep code simple. */
929         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
930                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
931                 if (!vq->event)
932                         vq->split.vring.avail->flags =
933                                 cpu_to_virtio16(_vq->vdev,
934                                                 vq->split.avail_flags_shadow);
935         }
936         /* TODO: tune this threshold */
937         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
938
939         virtio_store_mb(vq->weak_barriers,
940                         &vring_used_event(&vq->split.vring),
941                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
942
943         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
944                                         - vq->last_used_idx) > bufs)) {
945                 END_USE(vq);
946                 return false;
947         }
948
949         END_USE(vq);
950         return true;
951 }
952
953 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
954 {
955         struct vring_virtqueue *vq = to_vvq(_vq);
956         unsigned int i;
957         void *buf;
958
959         START_USE(vq);
960
961         for (i = 0; i < vq->split.vring.num; i++) {
962                 if (!vq->split.desc_state[i].data)
963                         continue;
964                 /* detach_buf_split clears data, so grab it now. */
965                 buf = vq->split.desc_state[i].data;
966                 detach_buf_split(vq, i, NULL);
967                 vq->split.avail_idx_shadow--;
968                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
969                                 vq->split.avail_idx_shadow);
970                 END_USE(vq);
971                 return buf;
972         }
973         /* That should have freed everything. */
974         BUG_ON(vq->vq.num_free != vq->split.vring.num);
975
976         END_USE(vq);
977         return NULL;
978 }
979
980 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
981                                        struct vring_virtqueue *vq)
982 {
983         struct virtio_device *vdev;
984
985         vdev = vq->vq.vdev;
986
987         vring_split->avail_flags_shadow = 0;
988         vring_split->avail_idx_shadow = 0;
989
990         /* No callback?  Tell other side not to bother us. */
991         if (!vq->vq.callback) {
992                 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
993                 if (!vq->event)
994                         vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
995                                         vring_split->avail_flags_shadow);
996         }
997 }
998
999 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1000 {
1001         int num;
1002
1003         num = vq->split.vring.num;
1004
1005         vq->split.vring.avail->flags = 0;
1006         vq->split.vring.avail->idx = 0;
1007
1008         /* reset avail event */
1009         vq->split.vring.avail->ring[num] = 0;
1010
1011         vq->split.vring.used->flags = 0;
1012         vq->split.vring.used->idx = 0;
1013
1014         /* reset used event */
1015         *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1016
1017         virtqueue_init(vq, num);
1018
1019         virtqueue_vring_init_split(&vq->split, vq);
1020 }
1021
1022 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1023                                          struct vring_virtqueue_split *vring_split)
1024 {
1025         vq->split = *vring_split;
1026
1027         /* Put everything in free lists. */
1028         vq->free_head = 0;
1029 }
1030
1031 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1032 {
1033         struct vring_desc_state_split *state;
1034         struct vring_desc_extra *extra;
1035         u32 num = vring_split->vring.num;
1036
1037         state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1038         if (!state)
1039                 goto err_state;
1040
1041         extra = vring_alloc_desc_extra(num);
1042         if (!extra)
1043                 goto err_extra;
1044
1045         memset(state, 0, num * sizeof(struct vring_desc_state_split));
1046
1047         vring_split->desc_state = state;
1048         vring_split->desc_extra = extra;
1049         return 0;
1050
1051 err_extra:
1052         kfree(state);
1053 err_state:
1054         return -ENOMEM;
1055 }
1056
1057 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1058                              struct virtio_device *vdev, struct device *dma_dev)
1059 {
1060         vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1061                          vring_split->vring.desc,
1062                          vring_split->queue_dma_addr,
1063                          dma_dev);
1064
1065         kfree(vring_split->desc_state);
1066         kfree(vring_split->desc_extra);
1067 }
1068
1069 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1070                                    struct virtio_device *vdev,
1071                                    u32 num,
1072                                    unsigned int vring_align,
1073                                    bool may_reduce_num,
1074                                    struct device *dma_dev)
1075 {
1076         void *queue = NULL;
1077         dma_addr_t dma_addr;
1078
1079         /* We assume num is a power of 2. */
1080         if (!is_power_of_2(num)) {
1081                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1082                 return -EINVAL;
1083         }
1084
1085         /* TODO: allocate each queue chunk individually */
1086         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1087                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1088                                           &dma_addr,
1089                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1090                                           dma_dev);
1091                 if (queue)
1092                         break;
1093                 if (!may_reduce_num)
1094                         return -ENOMEM;
1095         }
1096
1097         if (!num)
1098                 return -ENOMEM;
1099
1100         if (!queue) {
1101                 /* Try to get a single page. You are my only hope! */
1102                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1103                                           &dma_addr, GFP_KERNEL | __GFP_ZERO,
1104                                           dma_dev);
1105         }
1106         if (!queue)
1107                 return -ENOMEM;
1108
1109         vring_init(&vring_split->vring, num, queue, vring_align);
1110
1111         vring_split->queue_dma_addr = dma_addr;
1112         vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1113
1114         vring_split->vring_align = vring_align;
1115         vring_split->may_reduce_num = may_reduce_num;
1116
1117         return 0;
1118 }
1119
1120 static struct virtqueue *vring_create_virtqueue_split(
1121         unsigned int index,
1122         unsigned int num,
1123         unsigned int vring_align,
1124         struct virtio_device *vdev,
1125         bool weak_barriers,
1126         bool may_reduce_num,
1127         bool context,
1128         bool (*notify)(struct virtqueue *),
1129         void (*callback)(struct virtqueue *),
1130         const char *name,
1131         struct device *dma_dev)
1132 {
1133         struct vring_virtqueue_split vring_split = {};
1134         struct virtqueue *vq;
1135         int err;
1136
1137         err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1138                                       may_reduce_num, dma_dev);
1139         if (err)
1140                 return NULL;
1141
1142         vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1143                                    context, notify, callback, name, dma_dev);
1144         if (!vq) {
1145                 vring_free_split(&vring_split, vdev, dma_dev);
1146                 return NULL;
1147         }
1148
1149         to_vvq(vq)->we_own_ring = true;
1150
1151         return vq;
1152 }
1153
1154 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1155 {
1156         struct vring_virtqueue_split vring_split = {};
1157         struct vring_virtqueue *vq = to_vvq(_vq);
1158         struct virtio_device *vdev = _vq->vdev;
1159         int err;
1160
1161         err = vring_alloc_queue_split(&vring_split, vdev, num,
1162                                       vq->split.vring_align,
1163                                       vq->split.may_reduce_num,
1164                                       vring_dma_dev(vq));
1165         if (err)
1166                 goto err;
1167
1168         err = vring_alloc_state_extra_split(&vring_split);
1169         if (err)
1170                 goto err_state_extra;
1171
1172         vring_free(&vq->vq);
1173
1174         virtqueue_vring_init_split(&vring_split, vq);
1175
1176         virtqueue_init(vq, vring_split.vring.num);
1177         virtqueue_vring_attach_split(vq, &vring_split);
1178
1179         return 0;
1180
1181 err_state_extra:
1182         vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
1183 err:
1184         virtqueue_reinit_split(vq);
1185         return -ENOMEM;
1186 }
1187
1188
1189 /*
1190  * Packed ring specific functions - *_packed().
1191  */
1192 static bool packed_used_wrap_counter(u16 last_used_idx)
1193 {
1194         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1195 }
1196
1197 static u16 packed_last_used(u16 last_used_idx)
1198 {
1199         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1200 }
1201
1202 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1203                                      const struct vring_desc_extra *extra)
1204 {
1205         u16 flags;
1206
1207         if (!vq->use_dma_api)
1208                 return;
1209
1210         flags = extra->flags;
1211
1212         if (flags & VRING_DESC_F_INDIRECT) {
1213                 dma_unmap_single(vring_dma_dev(vq),
1214                                  extra->addr, extra->len,
1215                                  (flags & VRING_DESC_F_WRITE) ?
1216                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1217         } else {
1218                 dma_unmap_page(vring_dma_dev(vq),
1219                                extra->addr, extra->len,
1220                                (flags & VRING_DESC_F_WRITE) ?
1221                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1222         }
1223 }
1224
1225 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1226                                     const struct vring_packed_desc *desc)
1227 {
1228         u16 flags;
1229
1230         if (!vq->use_dma_api)
1231                 return;
1232
1233         flags = le16_to_cpu(desc->flags);
1234
1235         dma_unmap_page(vring_dma_dev(vq),
1236                        le64_to_cpu(desc->addr),
1237                        le32_to_cpu(desc->len),
1238                        (flags & VRING_DESC_F_WRITE) ?
1239                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1240 }
1241
1242 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1243                                                        gfp_t gfp)
1244 {
1245         struct vring_packed_desc *desc;
1246
1247         /*
1248          * We require lowmem mappings for the descriptors because
1249          * otherwise virt_to_phys will give us bogus addresses in the
1250          * virtqueue.
1251          */
1252         gfp &= ~__GFP_HIGHMEM;
1253
1254         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1255
1256         return desc;
1257 }
1258
1259 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1260                                          struct scatterlist *sgs[],
1261                                          unsigned int total_sg,
1262                                          unsigned int out_sgs,
1263                                          unsigned int in_sgs,
1264                                          void *data,
1265                                          gfp_t gfp)
1266 {
1267         struct vring_packed_desc *desc;
1268         struct scatterlist *sg;
1269         unsigned int i, n, err_idx;
1270         u16 head, id;
1271         dma_addr_t addr;
1272
1273         head = vq->packed.next_avail_idx;
1274         desc = alloc_indirect_packed(total_sg, gfp);
1275         if (!desc)
1276                 return -ENOMEM;
1277
1278         if (unlikely(vq->vq.num_free < 1)) {
1279                 pr_debug("Can't add buf len 1 - avail = 0\n");
1280                 kfree(desc);
1281                 END_USE(vq);
1282                 return -ENOSPC;
1283         }
1284
1285         i = 0;
1286         id = vq->free_head;
1287         BUG_ON(id == vq->packed.vring.num);
1288
1289         for (n = 0; n < out_sgs + in_sgs; n++) {
1290                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1291                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1292                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1293                                 goto unmap_release;
1294
1295                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1296                                                 0 : VRING_DESC_F_WRITE);
1297                         desc[i].addr = cpu_to_le64(addr);
1298                         desc[i].len = cpu_to_le32(sg->length);
1299                         i++;
1300                 }
1301         }
1302
1303         /* Now that the indirect table is filled in, map it. */
1304         addr = vring_map_single(vq, desc,
1305                         total_sg * sizeof(struct vring_packed_desc),
1306                         DMA_TO_DEVICE);
1307         if (vring_mapping_error(vq, addr))
1308                 goto unmap_release;
1309
1310         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1311         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1312                                 sizeof(struct vring_packed_desc));
1313         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1314
1315         if (vq->use_dma_api) {
1316                 vq->packed.desc_extra[id].addr = addr;
1317                 vq->packed.desc_extra[id].len = total_sg *
1318                                 sizeof(struct vring_packed_desc);
1319                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1320                                                   vq->packed.avail_used_flags;
1321         }
1322
1323         /*
1324          * A driver MUST NOT make the first descriptor in the list
1325          * available before all subsequent descriptors comprising
1326          * the list are made available.
1327          */
1328         virtio_wmb(vq->weak_barriers);
1329         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1330                                                 vq->packed.avail_used_flags);
1331
1332         /* We're using some buffers from the free list. */
1333         vq->vq.num_free -= 1;
1334
1335         /* Update free pointer */
1336         n = head + 1;
1337         if (n >= vq->packed.vring.num) {
1338                 n = 0;
1339                 vq->packed.avail_wrap_counter ^= 1;
1340                 vq->packed.avail_used_flags ^=
1341                                 1 << VRING_PACKED_DESC_F_AVAIL |
1342                                 1 << VRING_PACKED_DESC_F_USED;
1343         }
1344         vq->packed.next_avail_idx = n;
1345         vq->free_head = vq->packed.desc_extra[id].next;
1346
1347         /* Store token and indirect buffer state. */
1348         vq->packed.desc_state[id].num = 1;
1349         vq->packed.desc_state[id].data = data;
1350         vq->packed.desc_state[id].indir_desc = desc;
1351         vq->packed.desc_state[id].last = id;
1352
1353         vq->num_added += 1;
1354
1355         pr_debug("Added buffer head %i to %p\n", head, vq);
1356         END_USE(vq);
1357
1358         return 0;
1359
1360 unmap_release:
1361         err_idx = i;
1362
1363         for (i = 0; i < err_idx; i++)
1364                 vring_unmap_desc_packed(vq, &desc[i]);
1365
1366         kfree(desc);
1367
1368         END_USE(vq);
1369         return -ENOMEM;
1370 }
1371
1372 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1373                                        struct scatterlist *sgs[],
1374                                        unsigned int total_sg,
1375                                        unsigned int out_sgs,
1376                                        unsigned int in_sgs,
1377                                        void *data,
1378                                        void *ctx,
1379                                        gfp_t gfp)
1380 {
1381         struct vring_virtqueue *vq = to_vvq(_vq);
1382         struct vring_packed_desc *desc;
1383         struct scatterlist *sg;
1384         unsigned int i, n, c, descs_used, err_idx;
1385         __le16 head_flags, flags;
1386         u16 head, id, prev, curr, avail_used_flags;
1387         int err;
1388
1389         START_USE(vq);
1390
1391         BUG_ON(data == NULL);
1392         BUG_ON(ctx && vq->indirect);
1393
1394         if (unlikely(vq->broken)) {
1395                 END_USE(vq);
1396                 return -EIO;
1397         }
1398
1399         LAST_ADD_TIME_UPDATE(vq);
1400
1401         BUG_ON(total_sg == 0);
1402
1403         if (virtqueue_use_indirect(vq, total_sg)) {
1404                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1405                                                     in_sgs, data, gfp);
1406                 if (err != -ENOMEM) {
1407                         END_USE(vq);
1408                         return err;
1409                 }
1410
1411                 /* fall back on direct */
1412         }
1413
1414         head = vq->packed.next_avail_idx;
1415         avail_used_flags = vq->packed.avail_used_flags;
1416
1417         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1418
1419         desc = vq->packed.vring.desc;
1420         i = head;
1421         descs_used = total_sg;
1422
1423         if (unlikely(vq->vq.num_free < descs_used)) {
1424                 pr_debug("Can't add buf len %i - avail = %i\n",
1425                          descs_used, vq->vq.num_free);
1426                 END_USE(vq);
1427                 return -ENOSPC;
1428         }
1429
1430         id = vq->free_head;
1431         BUG_ON(id == vq->packed.vring.num);
1432
1433         curr = id;
1434         c = 0;
1435         for (n = 0; n < out_sgs + in_sgs; n++) {
1436                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1437                         dma_addr_t addr;
1438
1439                         if (vring_map_one_sg(vq, sg, n < out_sgs ?
1440                                              DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
1441                                 goto unmap_release;
1442
1443                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1444                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1445                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1446                         if (i == head)
1447                                 head_flags = flags;
1448                         else
1449                                 desc[i].flags = flags;
1450
1451                         desc[i].addr = cpu_to_le64(addr);
1452                         desc[i].len = cpu_to_le32(sg->length);
1453                         desc[i].id = cpu_to_le16(id);
1454
1455                         if (unlikely(vq->use_dma_api)) {
1456                                 vq->packed.desc_extra[curr].addr = addr;
1457                                 vq->packed.desc_extra[curr].len = sg->length;
1458                                 vq->packed.desc_extra[curr].flags =
1459                                         le16_to_cpu(flags);
1460                         }
1461                         prev = curr;
1462                         curr = vq->packed.desc_extra[curr].next;
1463
1464                         if ((unlikely(++i >= vq->packed.vring.num))) {
1465                                 i = 0;
1466                                 vq->packed.avail_used_flags ^=
1467                                         1 << VRING_PACKED_DESC_F_AVAIL |
1468                                         1 << VRING_PACKED_DESC_F_USED;
1469                         }
1470                 }
1471         }
1472
1473         if (i < head)
1474                 vq->packed.avail_wrap_counter ^= 1;
1475
1476         /* We're using some buffers from the free list. */
1477         vq->vq.num_free -= descs_used;
1478
1479         /* Update free pointer */
1480         vq->packed.next_avail_idx = i;
1481         vq->free_head = curr;
1482
1483         /* Store token. */
1484         vq->packed.desc_state[id].num = descs_used;
1485         vq->packed.desc_state[id].data = data;
1486         vq->packed.desc_state[id].indir_desc = ctx;
1487         vq->packed.desc_state[id].last = prev;
1488
1489         /*
1490          * A driver MUST NOT make the first descriptor in the list
1491          * available before all subsequent descriptors comprising
1492          * the list are made available.
1493          */
1494         virtio_wmb(vq->weak_barriers);
1495         vq->packed.vring.desc[head].flags = head_flags;
1496         vq->num_added += descs_used;
1497
1498         pr_debug("Added buffer head %i to %p\n", head, vq);
1499         END_USE(vq);
1500
1501         return 0;
1502
1503 unmap_release:
1504         err_idx = i;
1505         i = head;
1506         curr = vq->free_head;
1507
1508         vq->packed.avail_used_flags = avail_used_flags;
1509
1510         for (n = 0; n < total_sg; n++) {
1511                 if (i == err_idx)
1512                         break;
1513                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1514                 curr = vq->packed.desc_extra[curr].next;
1515                 i++;
1516                 if (i >= vq->packed.vring.num)
1517                         i = 0;
1518         }
1519
1520         END_USE(vq);
1521         return -EIO;
1522 }
1523
1524 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1525 {
1526         struct vring_virtqueue *vq = to_vvq(_vq);
1527         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1528         bool needs_kick;
1529         union {
1530                 struct {
1531                         __le16 off_wrap;
1532                         __le16 flags;
1533                 };
1534                 u32 u32;
1535         } snapshot;
1536
1537         START_USE(vq);
1538
1539         /*
1540          * We need to expose the new flags value before checking notification
1541          * suppressions.
1542          */
1543         virtio_mb(vq->weak_barriers);
1544
1545         old = vq->packed.next_avail_idx - vq->num_added;
1546         new = vq->packed.next_avail_idx;
1547         vq->num_added = 0;
1548
1549         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1550         flags = le16_to_cpu(snapshot.flags);
1551
1552         LAST_ADD_TIME_CHECK(vq);
1553         LAST_ADD_TIME_INVALID(vq);
1554
1555         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1556                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1557                 goto out;
1558         }
1559
1560         off_wrap = le16_to_cpu(snapshot.off_wrap);
1561
1562         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1563         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1564         if (wrap_counter != vq->packed.avail_wrap_counter)
1565                 event_idx -= vq->packed.vring.num;
1566
1567         needs_kick = vring_need_event(event_idx, new, old);
1568 out:
1569         END_USE(vq);
1570         return needs_kick;
1571 }
1572
1573 static void detach_buf_packed(struct vring_virtqueue *vq,
1574                               unsigned int id, void **ctx)
1575 {
1576         struct vring_desc_state_packed *state = NULL;
1577         struct vring_packed_desc *desc;
1578         unsigned int i, curr;
1579
1580         state = &vq->packed.desc_state[id];
1581
1582         /* Clear data ptr. */
1583         state->data = NULL;
1584
1585         vq->packed.desc_extra[state->last].next = vq->free_head;
1586         vq->free_head = id;
1587         vq->vq.num_free += state->num;
1588
1589         if (unlikely(vq->use_dma_api)) {
1590                 curr = id;
1591                 for (i = 0; i < state->num; i++) {
1592                         vring_unmap_extra_packed(vq,
1593                                                  &vq->packed.desc_extra[curr]);
1594                         curr = vq->packed.desc_extra[curr].next;
1595                 }
1596         }
1597
1598         if (vq->indirect) {
1599                 u32 len;
1600
1601                 /* Free the indirect table, if any, now that it's unmapped. */
1602                 desc = state->indir_desc;
1603                 if (!desc)
1604                         return;
1605
1606                 if (vq->use_dma_api) {
1607                         len = vq->packed.desc_extra[id].len;
1608                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1609                                         i++)
1610                                 vring_unmap_desc_packed(vq, &desc[i]);
1611                 }
1612                 kfree(desc);
1613                 state->indir_desc = NULL;
1614         } else if (ctx) {
1615                 *ctx = state->indir_desc;
1616         }
1617 }
1618
1619 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1620                                        u16 idx, bool used_wrap_counter)
1621 {
1622         bool avail, used;
1623         u16 flags;
1624
1625         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1626         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1627         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1628
1629         return avail == used && used == used_wrap_counter;
1630 }
1631
1632 static bool more_used_packed(const struct vring_virtqueue *vq)
1633 {
1634         u16 last_used;
1635         u16 last_used_idx;
1636         bool used_wrap_counter;
1637
1638         last_used_idx = READ_ONCE(vq->last_used_idx);
1639         last_used = packed_last_used(last_used_idx);
1640         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1641         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1642 }
1643
1644 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1645                                           unsigned int *len,
1646                                           void **ctx)
1647 {
1648         struct vring_virtqueue *vq = to_vvq(_vq);
1649         u16 last_used, id, last_used_idx;
1650         bool used_wrap_counter;
1651         void *ret;
1652
1653         START_USE(vq);
1654
1655         if (unlikely(vq->broken)) {
1656                 END_USE(vq);
1657                 return NULL;
1658         }
1659
1660         if (!more_used_packed(vq)) {
1661                 pr_debug("No more buffers in queue\n");
1662                 END_USE(vq);
1663                 return NULL;
1664         }
1665
1666         /* Only get used elements after they have been exposed by host. */
1667         virtio_rmb(vq->weak_barriers);
1668
1669         last_used_idx = READ_ONCE(vq->last_used_idx);
1670         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1671         last_used = packed_last_used(last_used_idx);
1672         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1673         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1674
1675         if (unlikely(id >= vq->packed.vring.num)) {
1676                 BAD_RING(vq, "id %u out of range\n", id);
1677                 return NULL;
1678         }
1679         if (unlikely(!vq->packed.desc_state[id].data)) {
1680                 BAD_RING(vq, "id %u is not a head!\n", id);
1681                 return NULL;
1682         }
1683
1684         /* detach_buf_packed clears data, so grab it now. */
1685         ret = vq->packed.desc_state[id].data;
1686         detach_buf_packed(vq, id, ctx);
1687
1688         last_used += vq->packed.desc_state[id].num;
1689         if (unlikely(last_used >= vq->packed.vring.num)) {
1690                 last_used -= vq->packed.vring.num;
1691                 used_wrap_counter ^= 1;
1692         }
1693
1694         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1695         WRITE_ONCE(vq->last_used_idx, last_used);
1696
1697         /*
1698          * If we expect an interrupt for the next entry, tell host
1699          * by writing event index and flush out the write before
1700          * the read in the next get_buf call.
1701          */
1702         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1703                 virtio_store_mb(vq->weak_barriers,
1704                                 &vq->packed.vring.driver->off_wrap,
1705                                 cpu_to_le16(vq->last_used_idx));
1706
1707         LAST_ADD_TIME_INVALID(vq);
1708
1709         END_USE(vq);
1710         return ret;
1711 }
1712
1713 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1714 {
1715         struct vring_virtqueue *vq = to_vvq(_vq);
1716
1717         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1718                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1719
1720                 /*
1721                  * If device triggered an event already it won't trigger one again:
1722                  * no need to disable.
1723                  */
1724                 if (vq->event_triggered)
1725                         return;
1726
1727                 vq->packed.vring.driver->flags =
1728                         cpu_to_le16(vq->packed.event_flags_shadow);
1729         }
1730 }
1731
1732 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1733 {
1734         struct vring_virtqueue *vq = to_vvq(_vq);
1735
1736         START_USE(vq);
1737
1738         /*
1739          * We optimistically turn back on interrupts, then check if there was
1740          * more to do.
1741          */
1742
1743         if (vq->event) {
1744                 vq->packed.vring.driver->off_wrap =
1745                         cpu_to_le16(vq->last_used_idx);
1746                 /*
1747                  * We need to update event offset and event wrap
1748                  * counter first before updating event flags.
1749                  */
1750                 virtio_wmb(vq->weak_barriers);
1751         }
1752
1753         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1754                 vq->packed.event_flags_shadow = vq->event ?
1755                                 VRING_PACKED_EVENT_FLAG_DESC :
1756                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1757                 vq->packed.vring.driver->flags =
1758                                 cpu_to_le16(vq->packed.event_flags_shadow);
1759         }
1760
1761         END_USE(vq);
1762         return vq->last_used_idx;
1763 }
1764
1765 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1766 {
1767         struct vring_virtqueue *vq = to_vvq(_vq);
1768         bool wrap_counter;
1769         u16 used_idx;
1770
1771         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1772         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1773
1774         return is_used_desc_packed(vq, used_idx, wrap_counter);
1775 }
1776
1777 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1778 {
1779         struct vring_virtqueue *vq = to_vvq(_vq);
1780         u16 used_idx, wrap_counter, last_used_idx;
1781         u16 bufs;
1782
1783         START_USE(vq);
1784
1785         /*
1786          * We optimistically turn back on interrupts, then check if there was
1787          * more to do.
1788          */
1789
1790         if (vq->event) {
1791                 /* TODO: tune this threshold */
1792                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1793                 last_used_idx = READ_ONCE(vq->last_used_idx);
1794                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1795
1796                 used_idx = packed_last_used(last_used_idx) + bufs;
1797                 if (used_idx >= vq->packed.vring.num) {
1798                         used_idx -= vq->packed.vring.num;
1799                         wrap_counter ^= 1;
1800                 }
1801
1802                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1803                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1804
1805                 /*
1806                  * We need to update event offset and event wrap
1807                  * counter first before updating event flags.
1808                  */
1809                 virtio_wmb(vq->weak_barriers);
1810         }
1811
1812         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1813                 vq->packed.event_flags_shadow = vq->event ?
1814                                 VRING_PACKED_EVENT_FLAG_DESC :
1815                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1816                 vq->packed.vring.driver->flags =
1817                                 cpu_to_le16(vq->packed.event_flags_shadow);
1818         }
1819
1820         /*
1821          * We need to update event suppression structure first
1822          * before re-checking for more used buffers.
1823          */
1824         virtio_mb(vq->weak_barriers);
1825
1826         last_used_idx = READ_ONCE(vq->last_used_idx);
1827         wrap_counter = packed_used_wrap_counter(last_used_idx);
1828         used_idx = packed_last_used(last_used_idx);
1829         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1830                 END_USE(vq);
1831                 return false;
1832         }
1833
1834         END_USE(vq);
1835         return true;
1836 }
1837
1838 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1839 {
1840         struct vring_virtqueue *vq = to_vvq(_vq);
1841         unsigned int i;
1842         void *buf;
1843
1844         START_USE(vq);
1845
1846         for (i = 0; i < vq->packed.vring.num; i++) {
1847                 if (!vq->packed.desc_state[i].data)
1848                         continue;
1849                 /* detach_buf clears data, so grab it now. */
1850                 buf = vq->packed.desc_state[i].data;
1851                 detach_buf_packed(vq, i, NULL);
1852                 END_USE(vq);
1853                 return buf;
1854         }
1855         /* That should have freed everything. */
1856         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1857
1858         END_USE(vq);
1859         return NULL;
1860 }
1861
1862 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1863 {
1864         struct vring_desc_extra *desc_extra;
1865         unsigned int i;
1866
1867         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1868                                    GFP_KERNEL);
1869         if (!desc_extra)
1870                 return NULL;
1871
1872         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1873
1874         for (i = 0; i < num - 1; i++)
1875                 desc_extra[i].next = i + 1;
1876
1877         return desc_extra;
1878 }
1879
1880 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1881                               struct virtio_device *vdev,
1882                               struct device *dma_dev)
1883 {
1884         if (vring_packed->vring.desc)
1885                 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1886                                  vring_packed->vring.desc,
1887                                  vring_packed->ring_dma_addr,
1888                                  dma_dev);
1889
1890         if (vring_packed->vring.driver)
1891                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1892                                  vring_packed->vring.driver,
1893                                  vring_packed->driver_event_dma_addr,
1894                                  dma_dev);
1895
1896         if (vring_packed->vring.device)
1897                 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1898                                  vring_packed->vring.device,
1899                                  vring_packed->device_event_dma_addr,
1900                                  dma_dev);
1901
1902         kfree(vring_packed->desc_state);
1903         kfree(vring_packed->desc_extra);
1904 }
1905
1906 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1907                                     struct virtio_device *vdev,
1908                                     u32 num, struct device *dma_dev)
1909 {
1910         struct vring_packed_desc *ring;
1911         struct vring_packed_desc_event *driver, *device;
1912         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1913         size_t ring_size_in_bytes, event_size_in_bytes;
1914
1915         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1916
1917         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1918                                  &ring_dma_addr,
1919                                  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1920                                  dma_dev);
1921         if (!ring)
1922                 goto err;
1923
1924         vring_packed->vring.desc         = ring;
1925         vring_packed->ring_dma_addr      = ring_dma_addr;
1926         vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1927
1928         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1929
1930         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1931                                    &driver_event_dma_addr,
1932                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1933                                    dma_dev);
1934         if (!driver)
1935                 goto err;
1936
1937         vring_packed->vring.driver          = driver;
1938         vring_packed->event_size_in_bytes   = event_size_in_bytes;
1939         vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1940
1941         device = vring_alloc_queue(vdev, event_size_in_bytes,
1942                                    &device_event_dma_addr,
1943                                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1944                                    dma_dev);
1945         if (!device)
1946                 goto err;
1947
1948         vring_packed->vring.device          = device;
1949         vring_packed->device_event_dma_addr = device_event_dma_addr;
1950
1951         vring_packed->vring.num = num;
1952
1953         return 0;
1954
1955 err:
1956         vring_free_packed(vring_packed, vdev, dma_dev);
1957         return -ENOMEM;
1958 }
1959
1960 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1961 {
1962         struct vring_desc_state_packed *state;
1963         struct vring_desc_extra *extra;
1964         u32 num = vring_packed->vring.num;
1965
1966         state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1967         if (!state)
1968                 goto err_desc_state;
1969
1970         memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1971
1972         extra = vring_alloc_desc_extra(num);
1973         if (!extra)
1974                 goto err_desc_extra;
1975
1976         vring_packed->desc_state = state;
1977         vring_packed->desc_extra = extra;
1978
1979         return 0;
1980
1981 err_desc_extra:
1982         kfree(state);
1983 err_desc_state:
1984         return -ENOMEM;
1985 }
1986
1987 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1988                                         bool callback)
1989 {
1990         vring_packed->next_avail_idx = 0;
1991         vring_packed->avail_wrap_counter = 1;
1992         vring_packed->event_flags_shadow = 0;
1993         vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1994
1995         /* No callback?  Tell other side not to bother us. */
1996         if (!callback) {
1997                 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1998                 vring_packed->vring.driver->flags =
1999                         cpu_to_le16(vring_packed->event_flags_shadow);
2000         }
2001 }
2002
2003 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2004                                           struct vring_virtqueue_packed *vring_packed)
2005 {
2006         vq->packed = *vring_packed;
2007
2008         /* Put everything in free lists. */
2009         vq->free_head = 0;
2010 }
2011
2012 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2013 {
2014         memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2015         memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2016
2017         /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2018         memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2019
2020         virtqueue_init(vq, vq->packed.vring.num);
2021         virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2022 }
2023
2024 static struct virtqueue *vring_create_virtqueue_packed(
2025         unsigned int index,
2026         unsigned int num,
2027         unsigned int vring_align,
2028         struct virtio_device *vdev,
2029         bool weak_barriers,
2030         bool may_reduce_num,
2031         bool context,
2032         bool (*notify)(struct virtqueue *),
2033         void (*callback)(struct virtqueue *),
2034         const char *name,
2035         struct device *dma_dev)
2036 {
2037         struct vring_virtqueue_packed vring_packed = {};
2038         struct vring_virtqueue *vq;
2039         int err;
2040
2041         if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
2042                 goto err_ring;
2043
2044         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2045         if (!vq)
2046                 goto err_vq;
2047
2048         vq->vq.callback = callback;
2049         vq->vq.vdev = vdev;
2050         vq->vq.name = name;
2051         vq->vq.index = index;
2052         vq->vq.reset = false;
2053         vq->we_own_ring = true;
2054         vq->notify = notify;
2055         vq->weak_barriers = weak_barriers;
2056 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2057         vq->broken = true;
2058 #else
2059         vq->broken = false;
2060 #endif
2061         vq->packed_ring = true;
2062         vq->dma_dev = dma_dev;
2063         vq->use_dma_api = vring_use_dma_api(vdev);
2064
2065         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2066                 !context;
2067         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2068
2069         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2070                 vq->weak_barriers = false;
2071
2072         err = vring_alloc_state_extra_packed(&vring_packed);
2073         if (err)
2074                 goto err_state_extra;
2075
2076         virtqueue_vring_init_packed(&vring_packed, !!callback);
2077
2078         virtqueue_init(vq, num);
2079         virtqueue_vring_attach_packed(vq, &vring_packed);
2080
2081         spin_lock(&vdev->vqs_list_lock);
2082         list_add_tail(&vq->vq.list, &vdev->vqs);
2083         spin_unlock(&vdev->vqs_list_lock);
2084         return &vq->vq;
2085
2086 err_state_extra:
2087         kfree(vq);
2088 err_vq:
2089         vring_free_packed(&vring_packed, vdev, dma_dev);
2090 err_ring:
2091         return NULL;
2092 }
2093
2094 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2095 {
2096         struct vring_virtqueue_packed vring_packed = {};
2097         struct vring_virtqueue *vq = to_vvq(_vq);
2098         struct virtio_device *vdev = _vq->vdev;
2099         int err;
2100
2101         if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
2102                 goto err_ring;
2103
2104         err = vring_alloc_state_extra_packed(&vring_packed);
2105         if (err)
2106                 goto err_state_extra;
2107
2108         vring_free(&vq->vq);
2109
2110         virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2111
2112         virtqueue_init(vq, vring_packed.vring.num);
2113         virtqueue_vring_attach_packed(vq, &vring_packed);
2114
2115         return 0;
2116
2117 err_state_extra:
2118         vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
2119 err_ring:
2120         virtqueue_reinit_packed(vq);
2121         return -ENOMEM;
2122 }
2123
2124
2125 /*
2126  * Generic functions and exported symbols.
2127  */
2128
2129 static inline int virtqueue_add(struct virtqueue *_vq,
2130                                 struct scatterlist *sgs[],
2131                                 unsigned int total_sg,
2132                                 unsigned int out_sgs,
2133                                 unsigned int in_sgs,
2134                                 void *data,
2135                                 void *ctx,
2136                                 gfp_t gfp)
2137 {
2138         struct vring_virtqueue *vq = to_vvq(_vq);
2139
2140         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2141                                         out_sgs, in_sgs, data, ctx, gfp) :
2142                                  virtqueue_add_split(_vq, sgs, total_sg,
2143                                         out_sgs, in_sgs, data, ctx, gfp);
2144 }
2145
2146 /**
2147  * virtqueue_add_sgs - expose buffers to other end
2148  * @_vq: the struct virtqueue we're talking about.
2149  * @sgs: array of terminated scatterlists.
2150  * @out_sgs: the number of scatterlists readable by other side
2151  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2152  * @data: the token identifying the buffer.
2153  * @gfp: how to do memory allocations (if necessary).
2154  *
2155  * Caller must ensure we don't call this with other virtqueue operations
2156  * at the same time (except where noted).
2157  *
2158  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2159  */
2160 int virtqueue_add_sgs(struct virtqueue *_vq,
2161                       struct scatterlist *sgs[],
2162                       unsigned int out_sgs,
2163                       unsigned int in_sgs,
2164                       void *data,
2165                       gfp_t gfp)
2166 {
2167         unsigned int i, total_sg = 0;
2168
2169         /* Count them first. */
2170         for (i = 0; i < out_sgs + in_sgs; i++) {
2171                 struct scatterlist *sg;
2172
2173                 for (sg = sgs[i]; sg; sg = sg_next(sg))
2174                         total_sg++;
2175         }
2176         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2177                              data, NULL, gfp);
2178 }
2179 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2180
2181 /**
2182  * virtqueue_add_outbuf - expose output buffers to other end
2183  * @vq: the struct virtqueue we're talking about.
2184  * @sg: scatterlist (must be well-formed and terminated!)
2185  * @num: the number of entries in @sg readable by other side
2186  * @data: the token identifying the buffer.
2187  * @gfp: how to do memory allocations (if necessary).
2188  *
2189  * Caller must ensure we don't call this with other virtqueue operations
2190  * at the same time (except where noted).
2191  *
2192  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2193  */
2194 int virtqueue_add_outbuf(struct virtqueue *vq,
2195                          struct scatterlist *sg, unsigned int num,
2196                          void *data,
2197                          gfp_t gfp)
2198 {
2199         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2200 }
2201 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2202
2203 /**
2204  * virtqueue_add_inbuf - expose input buffers to other end
2205  * @vq: the struct virtqueue we're talking about.
2206  * @sg: scatterlist (must be well-formed and terminated!)
2207  * @num: the number of entries in @sg writable by other side
2208  * @data: the token identifying the buffer.
2209  * @gfp: how to do memory allocations (if necessary).
2210  *
2211  * Caller must ensure we don't call this with other virtqueue operations
2212  * at the same time (except where noted).
2213  *
2214  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2215  */
2216 int virtqueue_add_inbuf(struct virtqueue *vq,
2217                         struct scatterlist *sg, unsigned int num,
2218                         void *data,
2219                         gfp_t gfp)
2220 {
2221         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2222 }
2223 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2224
2225 /**
2226  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2227  * @vq: the struct virtqueue we're talking about.
2228  * @sg: scatterlist (must be well-formed and terminated!)
2229  * @num: the number of entries in @sg writable by other side
2230  * @data: the token identifying the buffer.
2231  * @ctx: extra context for the token
2232  * @gfp: how to do memory allocations (if necessary).
2233  *
2234  * Caller must ensure we don't call this with other virtqueue operations
2235  * at the same time (except where noted).
2236  *
2237  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2238  */
2239 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2240                         struct scatterlist *sg, unsigned int num,
2241                         void *data,
2242                         void *ctx,
2243                         gfp_t gfp)
2244 {
2245         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2246 }
2247 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2248
2249 /**
2250  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2251  * @_vq: the struct virtqueue
2252  *
2253  * Instead of virtqueue_kick(), you can do:
2254  *      if (virtqueue_kick_prepare(vq))
2255  *              virtqueue_notify(vq);
2256  *
2257  * This is sometimes useful because the virtqueue_kick_prepare() needs
2258  * to be serialized, but the actual virtqueue_notify() call does not.
2259  */
2260 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2261 {
2262         struct vring_virtqueue *vq = to_vvq(_vq);
2263
2264         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2265                                  virtqueue_kick_prepare_split(_vq);
2266 }
2267 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2268
2269 /**
2270  * virtqueue_notify - second half of split virtqueue_kick call.
2271  * @_vq: the struct virtqueue
2272  *
2273  * This does not need to be serialized.
2274  *
2275  * Returns false if host notify failed or queue is broken, otherwise true.
2276  */
2277 bool virtqueue_notify(struct virtqueue *_vq)
2278 {
2279         struct vring_virtqueue *vq = to_vvq(_vq);
2280
2281         if (unlikely(vq->broken))
2282                 return false;
2283
2284         /* Prod other side to tell it about changes. */
2285         if (!vq->notify(_vq)) {
2286                 vq->broken = true;
2287                 return false;
2288         }
2289         return true;
2290 }
2291 EXPORT_SYMBOL_GPL(virtqueue_notify);
2292
2293 /**
2294  * virtqueue_kick - update after add_buf
2295  * @vq: the struct virtqueue
2296  *
2297  * After one or more virtqueue_add_* calls, invoke this to kick
2298  * the other side.
2299  *
2300  * Caller must ensure we don't call this with other virtqueue
2301  * operations at the same time (except where noted).
2302  *
2303  * Returns false if kick failed, otherwise true.
2304  */
2305 bool virtqueue_kick(struct virtqueue *vq)
2306 {
2307         if (virtqueue_kick_prepare(vq))
2308                 return virtqueue_notify(vq);
2309         return true;
2310 }
2311 EXPORT_SYMBOL_GPL(virtqueue_kick);
2312
2313 /**
2314  * virtqueue_get_buf_ctx - get the next used buffer
2315  * @_vq: the struct virtqueue we're talking about.
2316  * @len: the length written into the buffer
2317  * @ctx: extra context for the token
2318  *
2319  * If the device wrote data into the buffer, @len will be set to the
2320  * amount written.  This means you don't need to clear the buffer
2321  * beforehand to ensure there's no data leakage in the case of short
2322  * writes.
2323  *
2324  * Caller must ensure we don't call this with other virtqueue
2325  * operations at the same time (except where noted).
2326  *
2327  * Returns NULL if there are no used buffers, or the "data" token
2328  * handed to virtqueue_add_*().
2329  */
2330 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2331                             void **ctx)
2332 {
2333         struct vring_virtqueue *vq = to_vvq(_vq);
2334
2335         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2336                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2337 }
2338 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2339
2340 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2341 {
2342         return virtqueue_get_buf_ctx(_vq, len, NULL);
2343 }
2344 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2345 /**
2346  * virtqueue_disable_cb - disable callbacks
2347  * @_vq: the struct virtqueue we're talking about.
2348  *
2349  * Note that this is not necessarily synchronous, hence unreliable and only
2350  * useful as an optimization.
2351  *
2352  * Unlike other operations, this need not be serialized.
2353  */
2354 void virtqueue_disable_cb(struct virtqueue *_vq)
2355 {
2356         struct vring_virtqueue *vq = to_vvq(_vq);
2357
2358         if (vq->packed_ring)
2359                 virtqueue_disable_cb_packed(_vq);
2360         else
2361                 virtqueue_disable_cb_split(_vq);
2362 }
2363 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2364
2365 /**
2366  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2367  * @_vq: the struct virtqueue we're talking about.
2368  *
2369  * This re-enables callbacks; it returns current queue state
2370  * in an opaque unsigned value. This value should be later tested by
2371  * virtqueue_poll, to detect a possible race between the driver checking for
2372  * more work, and enabling callbacks.
2373  *
2374  * Caller must ensure we don't call this with other virtqueue
2375  * operations at the same time (except where noted).
2376  */
2377 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2378 {
2379         struct vring_virtqueue *vq = to_vvq(_vq);
2380
2381         if (vq->event_triggered)
2382                 vq->event_triggered = false;
2383
2384         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2385                                  virtqueue_enable_cb_prepare_split(_vq);
2386 }
2387 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2388
2389 /**
2390  * virtqueue_poll - query pending used buffers
2391  * @_vq: the struct virtqueue we're talking about.
2392  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2393  *
2394  * Returns "true" if there are pending used buffers in the queue.
2395  *
2396  * This does not need to be serialized.
2397  */
2398 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2399 {
2400         struct vring_virtqueue *vq = to_vvq(_vq);
2401
2402         if (unlikely(vq->broken))
2403                 return false;
2404
2405         virtio_mb(vq->weak_barriers);
2406         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2407                                  virtqueue_poll_split(_vq, last_used_idx);
2408 }
2409 EXPORT_SYMBOL_GPL(virtqueue_poll);
2410
2411 /**
2412  * virtqueue_enable_cb - restart callbacks after disable_cb.
2413  * @_vq: the struct virtqueue we're talking about.
2414  *
2415  * This re-enables callbacks; it returns "false" if there are pending
2416  * buffers in the queue, to detect a possible race between the driver
2417  * checking for more work, and enabling callbacks.
2418  *
2419  * Caller must ensure we don't call this with other virtqueue
2420  * operations at the same time (except where noted).
2421  */
2422 bool virtqueue_enable_cb(struct virtqueue *_vq)
2423 {
2424         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2425
2426         return !virtqueue_poll(_vq, last_used_idx);
2427 }
2428 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2429
2430 /**
2431  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2432  * @_vq: the struct virtqueue we're talking about.
2433  *
2434  * This re-enables callbacks but hints to the other side to delay
2435  * interrupts until most of the available buffers have been processed;
2436  * it returns "false" if there are many pending buffers in the queue,
2437  * to detect a possible race between the driver checking for more work,
2438  * and enabling callbacks.
2439  *
2440  * Caller must ensure we don't call this with other virtqueue
2441  * operations at the same time (except where noted).
2442  */
2443 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2444 {
2445         struct vring_virtqueue *vq = to_vvq(_vq);
2446
2447         if (vq->event_triggered)
2448                 vq->event_triggered = false;
2449
2450         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2451                                  virtqueue_enable_cb_delayed_split(_vq);
2452 }
2453 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2454
2455 /**
2456  * virtqueue_detach_unused_buf - detach first unused buffer
2457  * @_vq: the struct virtqueue we're talking about.
2458  *
2459  * Returns NULL or the "data" token handed to virtqueue_add_*().
2460  * This is not valid on an active queue; it is useful for device
2461  * shutdown or the reset queue.
2462  */
2463 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2464 {
2465         struct vring_virtqueue *vq = to_vvq(_vq);
2466
2467         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2468                                  virtqueue_detach_unused_buf_split(_vq);
2469 }
2470 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2471
2472 static inline bool more_used(const struct vring_virtqueue *vq)
2473 {
2474         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2475 }
2476
2477 /**
2478  * vring_interrupt - notify a virtqueue on an interrupt
2479  * @irq: the IRQ number (ignored)
2480  * @_vq: the struct virtqueue to notify
2481  *
2482  * Calls the callback function of @_vq to process the virtqueue
2483  * notification.
2484  */
2485 irqreturn_t vring_interrupt(int irq, void *_vq)
2486 {
2487         struct vring_virtqueue *vq = to_vvq(_vq);
2488
2489         if (!more_used(vq)) {
2490                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2491                 return IRQ_NONE;
2492         }
2493
2494         if (unlikely(vq->broken)) {
2495 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2496                 dev_warn_once(&vq->vq.vdev->dev,
2497                               "virtio vring IRQ raised before DRIVER_OK");
2498                 return IRQ_NONE;
2499 #else
2500                 return IRQ_HANDLED;
2501 #endif
2502         }
2503
2504         /* Just a hint for performance: so it's ok that this can be racy! */
2505         if (vq->event)
2506                 vq->event_triggered = true;
2507
2508         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2509         if (vq->vq.callback)
2510                 vq->vq.callback(&vq->vq);
2511
2512         return IRQ_HANDLED;
2513 }
2514 EXPORT_SYMBOL_GPL(vring_interrupt);
2515
2516 /* Only available for split ring */
2517 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2518                                                struct vring_virtqueue_split *vring_split,
2519                                                struct virtio_device *vdev,
2520                                                bool weak_barriers,
2521                                                bool context,
2522                                                bool (*notify)(struct virtqueue *),
2523                                                void (*callback)(struct virtqueue *),
2524                                                const char *name,
2525                                                struct device *dma_dev)
2526 {
2527         struct vring_virtqueue *vq;
2528         int err;
2529
2530         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2531                 return NULL;
2532
2533         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2534         if (!vq)
2535                 return NULL;
2536
2537         vq->packed_ring = false;
2538         vq->vq.callback = callback;
2539         vq->vq.vdev = vdev;
2540         vq->vq.name = name;
2541         vq->vq.index = index;
2542         vq->vq.reset = false;
2543         vq->we_own_ring = false;
2544         vq->notify = notify;
2545         vq->weak_barriers = weak_barriers;
2546 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2547         vq->broken = true;
2548 #else
2549         vq->broken = false;
2550 #endif
2551         vq->dma_dev = dma_dev;
2552         vq->use_dma_api = vring_use_dma_api(vdev);
2553
2554         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2555                 !context;
2556         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2557
2558         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2559                 vq->weak_barriers = false;
2560
2561         err = vring_alloc_state_extra_split(vring_split);
2562         if (err) {
2563                 kfree(vq);
2564                 return NULL;
2565         }
2566
2567         virtqueue_vring_init_split(vring_split, vq);
2568
2569         virtqueue_init(vq, vring_split->vring.num);
2570         virtqueue_vring_attach_split(vq, vring_split);
2571
2572         spin_lock(&vdev->vqs_list_lock);
2573         list_add_tail(&vq->vq.list, &vdev->vqs);
2574         spin_unlock(&vdev->vqs_list_lock);
2575         return &vq->vq;
2576 }
2577
2578 struct virtqueue *vring_create_virtqueue(
2579         unsigned int index,
2580         unsigned int num,
2581         unsigned int vring_align,
2582         struct virtio_device *vdev,
2583         bool weak_barriers,
2584         bool may_reduce_num,
2585         bool context,
2586         bool (*notify)(struct virtqueue *),
2587         void (*callback)(struct virtqueue *),
2588         const char *name)
2589 {
2590
2591         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2592                 return vring_create_virtqueue_packed(index, num, vring_align,
2593                                 vdev, weak_barriers, may_reduce_num,
2594                                 context, notify, callback, name, vdev->dev.parent);
2595
2596         return vring_create_virtqueue_split(index, num, vring_align,
2597                         vdev, weak_barriers, may_reduce_num,
2598                         context, notify, callback, name, vdev->dev.parent);
2599 }
2600 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2601
2602 struct virtqueue *vring_create_virtqueue_dma(
2603         unsigned int index,
2604         unsigned int num,
2605         unsigned int vring_align,
2606         struct virtio_device *vdev,
2607         bool weak_barriers,
2608         bool may_reduce_num,
2609         bool context,
2610         bool (*notify)(struct virtqueue *),
2611         void (*callback)(struct virtqueue *),
2612         const char *name,
2613         struct device *dma_dev)
2614 {
2615
2616         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2617                 return vring_create_virtqueue_packed(index, num, vring_align,
2618                                 vdev, weak_barriers, may_reduce_num,
2619                                 context, notify, callback, name, dma_dev);
2620
2621         return vring_create_virtqueue_split(index, num, vring_align,
2622                         vdev, weak_barriers, may_reduce_num,
2623                         context, notify, callback, name, dma_dev);
2624 }
2625 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2626
2627 /**
2628  * virtqueue_resize - resize the vring of vq
2629  * @_vq: the struct virtqueue we're talking about.
2630  * @num: new ring num
2631  * @recycle: callback for recycle the useless buffer
2632  *
2633  * When it is really necessary to create a new vring, it will set the current vq
2634  * into the reset state. Then call the passed callback to recycle the buffer
2635  * that is no longer used. Only after the new vring is successfully created, the
2636  * old vring will be released.
2637  *
2638  * Caller must ensure we don't call this with other virtqueue operations
2639  * at the same time (except where noted).
2640  *
2641  * Returns zero or a negative error.
2642  * 0: success.
2643  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2644  *  vq can still work normally
2645  * -EBUSY: Failed to sync with device, vq may not work properly
2646  * -ENOENT: Transport or device not supported
2647  * -E2BIG/-EINVAL: num error
2648  * -EPERM: Operation not permitted
2649  *
2650  */
2651 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2652                      void (*recycle)(struct virtqueue *vq, void *buf))
2653 {
2654         struct vring_virtqueue *vq = to_vvq(_vq);
2655         struct virtio_device *vdev = vq->vq.vdev;
2656         void *buf;
2657         int err;
2658
2659         if (!vq->we_own_ring)
2660                 return -EPERM;
2661
2662         if (num > vq->vq.num_max)
2663                 return -E2BIG;
2664
2665         if (!num)
2666                 return -EINVAL;
2667
2668         if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2669                 return 0;
2670
2671         if (!vdev->config->disable_vq_and_reset)
2672                 return -ENOENT;
2673
2674         if (!vdev->config->enable_vq_after_reset)
2675                 return -ENOENT;
2676
2677         err = vdev->config->disable_vq_and_reset(_vq);
2678         if (err)
2679                 return err;
2680
2681         while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2682                 recycle(_vq, buf);
2683
2684         if (vq->packed_ring)
2685                 err = virtqueue_resize_packed(_vq, num);
2686         else
2687                 err = virtqueue_resize_split(_vq, num);
2688
2689         if (vdev->config->enable_vq_after_reset(_vq))
2690                 return -EBUSY;
2691
2692         return err;
2693 }
2694 EXPORT_SYMBOL_GPL(virtqueue_resize);
2695
2696 /* Only available for split ring */
2697 struct virtqueue *vring_new_virtqueue(unsigned int index,
2698                                       unsigned int num,
2699                                       unsigned int vring_align,
2700                                       struct virtio_device *vdev,
2701                                       bool weak_barriers,
2702                                       bool context,
2703                                       void *pages,
2704                                       bool (*notify)(struct virtqueue *vq),
2705                                       void (*callback)(struct virtqueue *vq),
2706                                       const char *name)
2707 {
2708         struct vring_virtqueue_split vring_split = {};
2709
2710         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2711                 return NULL;
2712
2713         vring_init(&vring_split.vring, num, pages, vring_align);
2714         return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2715                                      context, notify, callback, name,
2716                                      vdev->dev.parent);
2717 }
2718 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2719
2720 static void vring_free(struct virtqueue *_vq)
2721 {
2722         struct vring_virtqueue *vq = to_vvq(_vq);
2723
2724         if (vq->we_own_ring) {
2725                 if (vq->packed_ring) {
2726                         vring_free_queue(vq->vq.vdev,
2727                                          vq->packed.ring_size_in_bytes,
2728                                          vq->packed.vring.desc,
2729                                          vq->packed.ring_dma_addr,
2730                                          vring_dma_dev(vq));
2731
2732                         vring_free_queue(vq->vq.vdev,
2733                                          vq->packed.event_size_in_bytes,
2734                                          vq->packed.vring.driver,
2735                                          vq->packed.driver_event_dma_addr,
2736                                          vring_dma_dev(vq));
2737
2738                         vring_free_queue(vq->vq.vdev,
2739                                          vq->packed.event_size_in_bytes,
2740                                          vq->packed.vring.device,
2741                                          vq->packed.device_event_dma_addr,
2742                                          vring_dma_dev(vq));
2743
2744                         kfree(vq->packed.desc_state);
2745                         kfree(vq->packed.desc_extra);
2746                 } else {
2747                         vring_free_queue(vq->vq.vdev,
2748                                          vq->split.queue_size_in_bytes,
2749                                          vq->split.vring.desc,
2750                                          vq->split.queue_dma_addr,
2751                                          vring_dma_dev(vq));
2752                 }
2753         }
2754         if (!vq->packed_ring) {
2755                 kfree(vq->split.desc_state);
2756                 kfree(vq->split.desc_extra);
2757         }
2758 }
2759
2760 void vring_del_virtqueue(struct virtqueue *_vq)
2761 {
2762         struct vring_virtqueue *vq = to_vvq(_vq);
2763
2764         spin_lock(&vq->vq.vdev->vqs_list_lock);
2765         list_del(&_vq->list);
2766         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2767
2768         vring_free(_vq);
2769
2770         kfree(vq);
2771 }
2772 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2773
2774 u32 vring_notification_data(struct virtqueue *_vq)
2775 {
2776         struct vring_virtqueue *vq = to_vvq(_vq);
2777         u16 next;
2778
2779         if (vq->packed_ring)
2780                 next = (vq->packed.next_avail_idx &
2781                                 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2782                         vq->packed.avail_wrap_counter <<
2783                                 VRING_PACKED_EVENT_F_WRAP_CTR;
2784         else
2785                 next = vq->split.avail_idx_shadow;
2786
2787         return next << 16 | _vq->index;
2788 }
2789 EXPORT_SYMBOL_GPL(vring_notification_data);
2790
2791 /* Manipulates transport-specific feature bits. */
2792 void vring_transport_features(struct virtio_device *vdev)
2793 {
2794         unsigned int i;
2795
2796         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2797                 switch (i) {
2798                 case VIRTIO_RING_F_INDIRECT_DESC:
2799                         break;
2800                 case VIRTIO_RING_F_EVENT_IDX:
2801                         break;
2802                 case VIRTIO_F_VERSION_1:
2803                         break;
2804                 case VIRTIO_F_ACCESS_PLATFORM:
2805                         break;
2806                 case VIRTIO_F_RING_PACKED:
2807                         break;
2808                 case VIRTIO_F_ORDER_PLATFORM:
2809                         break;
2810                 case VIRTIO_F_NOTIFICATION_DATA:
2811                         break;
2812                 default:
2813                         /* We don't understand this bit. */
2814                         __virtio_clear_bit(vdev, i);
2815                 }
2816         }
2817 }
2818 EXPORT_SYMBOL_GPL(vring_transport_features);
2819
2820 /**
2821  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2822  * @_vq: the struct virtqueue containing the vring of interest.
2823  *
2824  * Returns the size of the vring.  This is mainly used for boasting to
2825  * userspace.  Unlike other operations, this need not be serialized.
2826  */
2827 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2828 {
2829
2830         const struct vring_virtqueue *vq = to_vvq(_vq);
2831
2832         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2833 }
2834 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2835
2836 /*
2837  * This function should only be called by the core, not directly by the driver.
2838  */
2839 void __virtqueue_break(struct virtqueue *_vq)
2840 {
2841         struct vring_virtqueue *vq = to_vvq(_vq);
2842
2843         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2844         WRITE_ONCE(vq->broken, true);
2845 }
2846 EXPORT_SYMBOL_GPL(__virtqueue_break);
2847
2848 /*
2849  * This function should only be called by the core, not directly by the driver.
2850  */
2851 void __virtqueue_unbreak(struct virtqueue *_vq)
2852 {
2853         struct vring_virtqueue *vq = to_vvq(_vq);
2854
2855         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2856         WRITE_ONCE(vq->broken, false);
2857 }
2858 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2859
2860 bool virtqueue_is_broken(const struct virtqueue *_vq)
2861 {
2862         const struct vring_virtqueue *vq = to_vvq(_vq);
2863
2864         return READ_ONCE(vq->broken);
2865 }
2866 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2867
2868 /*
2869  * This should prevent the device from being used, allowing drivers to
2870  * recover.  You may need to grab appropriate locks to flush.
2871  */
2872 void virtio_break_device(struct virtio_device *dev)
2873 {
2874         struct virtqueue *_vq;
2875
2876         spin_lock(&dev->vqs_list_lock);
2877         list_for_each_entry(_vq, &dev->vqs, list) {
2878                 struct vring_virtqueue *vq = to_vvq(_vq);
2879
2880                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2881                 WRITE_ONCE(vq->broken, true);
2882         }
2883         spin_unlock(&dev->vqs_list_lock);
2884 }
2885 EXPORT_SYMBOL_GPL(virtio_break_device);
2886
2887 /*
2888  * This should allow the device to be used by the driver. You may
2889  * need to grab appropriate locks to flush the write to
2890  * vq->broken. This should only be used in some specific case e.g
2891  * (probing and restoring). This function should only be called by the
2892  * core, not directly by the driver.
2893  */
2894 void __virtio_unbreak_device(struct virtio_device *dev)
2895 {
2896         struct virtqueue *_vq;
2897
2898         spin_lock(&dev->vqs_list_lock);
2899         list_for_each_entry(_vq, &dev->vqs, list) {
2900                 struct vring_virtqueue *vq = to_vvq(_vq);
2901
2902                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2903                 WRITE_ONCE(vq->broken, false);
2904         }
2905         spin_unlock(&dev->vqs_list_lock);
2906 }
2907 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2908
2909 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
2910 {
2911         const struct vring_virtqueue *vq = to_vvq(_vq);
2912
2913         BUG_ON(!vq->we_own_ring);
2914
2915         if (vq->packed_ring)
2916                 return vq->packed.ring_dma_addr;
2917
2918         return vq->split.queue_dma_addr;
2919 }
2920 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2921
2922 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
2923 {
2924         const struct vring_virtqueue *vq = to_vvq(_vq);
2925
2926         BUG_ON(!vq->we_own_ring);
2927
2928         if (vq->packed_ring)
2929                 return vq->packed.driver_event_dma_addr;
2930
2931         return vq->split.queue_dma_addr +
2932                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2933 }
2934 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2935
2936 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
2937 {
2938         const struct vring_virtqueue *vq = to_vvq(_vq);
2939
2940         BUG_ON(!vq->we_own_ring);
2941
2942         if (vq->packed_ring)
2943                 return vq->packed.device_event_dma_addr;
2944
2945         return vq->split.queue_dma_addr +
2946                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2947 }
2948 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2949
2950 /* Only available for split ring */
2951 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
2952 {
2953         return &to_vvq(vq)->split.vring;
2954 }
2955 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2956
2957 MODULE_LICENSE("GPL");