1 // SPDX-License-Identifier: GPL-2.0
3 * channel program interfaces
5 * Copyright IBM Corp. 2017
7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
11 #include <linux/ratelimit.h>
13 #include <linux/slab.h>
14 #include <linux/highmem.h>
15 #include <linux/iommu.h>
16 #include <linux/vfio.h>
17 #include <asm/idals.h>
19 #include "vfio_ccw_cp.h"
20 #include "vfio_ccw_private.h"
23 /* Array that stores pages need to pin. */
25 /* Array that receives the pinned pages. */
26 struct page **pa_page;
27 /* Number of pages pinned from @pa_iova. */
32 struct list_head next;
34 /* Guest physical address of the current chain. */
36 /* Count of the valid ccws in chain. */
38 /* Pinned PAGEs for the original data. */
39 struct page_array *ch_pa;
43 * page_array_alloc() - alloc memory for page array
44 * @pa: page_array on which to perform the operation
45 * @iova: target guest physical address
46 * @len: number of bytes that should be pinned from @iova
48 * Attempt to allocate memory for page array.
50 * Usage of page_array:
51 * We expect (pa_nr == 0) and (pa_iova == NULL), any field in
52 * this structure will be filled in by this function.
55 * 0 if page array is allocated
56 * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL
57 * -ENOMEM if alloc failed
59 static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len)
63 if (pa->pa_nr || pa->pa_iova)
66 pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
70 pa->pa_iova = kcalloc(pa->pa_nr,
71 sizeof(*pa->pa_iova) + sizeof(*pa->pa_page),
73 if (unlikely(!pa->pa_iova)) {
77 pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr];
79 pa->pa_iova[0] = iova;
80 pa->pa_page[0] = NULL;
81 for (i = 1; i < pa->pa_nr; i++) {
82 pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE;
83 pa->pa_page[i] = NULL;
90 * page_array_unpin() - Unpin user pages in memory
91 * @pa: page_array on which to perform the operation
92 * @vdev: the vfio device to perform the operation
93 * @pa_nr: number of user pages to unpin
95 * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0,
96 * otherwise only clear pa->pa_nr
98 static void page_array_unpin(struct page_array *pa,
99 struct vfio_device *vdev, int pa_nr)
101 int unpinned = 0, npage = 1;
103 while (unpinned < pa_nr) {
104 dma_addr_t *first = &pa->pa_iova[unpinned];
105 dma_addr_t *last = &first[npage];
107 if (unpinned + npage < pa_nr &&
108 *first + npage * PAGE_SIZE == *last) {
113 vfio_unpin_pages(vdev, *first, npage);
122 * page_array_pin() - Pin user pages in memory
123 * @pa: page_array on which to perform the operation
124 * @mdev: the mediated device to perform pin operations
126 * Returns number of pages pinned upon success.
127 * If the pin request partially succeeds, or fails completely,
128 * all pages are left unpinned and a negative error value is returned.
130 static int page_array_pin(struct page_array *pa, struct vfio_device *vdev)
132 int pinned = 0, npage = 1;
135 while (pinned < pa->pa_nr) {
136 dma_addr_t *first = &pa->pa_iova[pinned];
137 dma_addr_t *last = &first[npage];
139 if (pinned + npage < pa->pa_nr &&
140 *first + npage * PAGE_SIZE == *last) {
145 ret = vfio_pin_pages(vdev, *first, npage,
146 IOMMU_READ | IOMMU_WRITE,
147 &pa->pa_page[pinned]);
150 } else if (ret > 0 && ret != npage) {
162 page_array_unpin(pa, vdev, pinned);
166 /* Unpin the pages before releasing the memory. */
167 static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev)
169 page_array_unpin(pa, vdev, pa->pa_nr);
173 static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length)
175 u64 iova_pfn_start = iova >> PAGE_SHIFT;
176 u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT;
180 for (i = 0; i < pa->pa_nr; i++) {
181 pfn = pa->pa_iova[i] >> PAGE_SHIFT;
182 if (pfn >= iova_pfn_start && pfn <= iova_pfn_end)
188 /* Create the list of IDAL words for a page_array. */
189 static inline void page_array_idal_create_words(struct page_array *pa,
190 unsigned long *idaws)
195 * Idal words (execept the first one) rely on the memory being 4k
196 * aligned. If a user virtual address is 4K aligned, then it's
197 * corresponding kernel physical address will also be 4K aligned. Thus
198 * there will be no problem here to simply use the phys to create an
202 for (i = 0; i < pa->pa_nr; i++)
203 idaws[i] = page_to_phys(pa->pa_page[i]);
205 /* Adjust the first IDAW, since it may not start on a page boundary */
206 idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1);
209 static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
212 struct ccw1 *pccw1 = source;
215 for (i = 0; i < len; i++) {
216 ccw0 = *(struct ccw0 *)pccw1;
217 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
218 pccw1->cmd_code = CCW_CMD_TIC;
222 pccw1->cmd_code = ccw0.cmd_code;
223 pccw1->flags = ccw0.flags;
224 pccw1->count = ccw0.count;
226 pccw1->cda = ccw0.cda;
232 * Within the domain (@mdev), copy @n bytes from a guest physical
233 * address (@iova) to a host physical address (@to).
235 static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova,
238 struct page_array pa = {0};
242 ret = page_array_alloc(&pa, iova, n);
246 ret = page_array_pin(&pa, vdev);
248 page_array_unpin_free(&pa, vdev);
253 for (i = 0; i < pa.pa_nr; i++) {
254 void *from = kmap_local_page(pa.pa_page[i]);
258 from += iova & (PAGE_SIZE - 1);
259 m -= iova & (PAGE_SIZE - 1);
263 memcpy(to + (n - l), from, m);
271 page_array_unpin_free(&pa, vdev);
277 * Helpers to operate ccwchain.
279 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
280 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
281 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
283 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
285 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
287 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
288 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
290 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
293 * ccw_does_data_transfer()
295 * Determine whether a CCW will move any data, such that the guest pages
296 * would need to be pinned before performing the I/O.
298 * Returns 1 if yes, 0 if no.
300 static inline int ccw_does_data_transfer(struct ccw1 *ccw)
302 /* If the count field is zero, then no data will be transferred */
306 /* If the command is a NOP, then no data will be transferred */
307 if (ccw_is_noop(ccw))
310 /* If the skip flag is off, then data will be transferred */
311 if (!ccw_is_skip(ccw))
315 * If the skip flag is on, it is only meaningful if the command
316 * code is a read, read backward, sense, or sense ID. In those
317 * cases, no data will be transferred.
319 if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
322 if (ccw_is_sense(ccw))
325 /* The skip flag is on, but it is ignored for this command code. */
330 * is_cpa_within_range()
332 * @cpa: channel program address being questioned
333 * @head: address of the beginning of a CCW chain
334 * @len: number of CCWs within the chain
336 * Determine whether the address of a CCW (whether a new chain,
337 * or the target of a TIC) falls within a range (including the end points).
339 * Returns 1 if yes, 0 if no.
341 static inline int is_cpa_within_range(u32 cpa, u32 head, int len)
343 u32 tail = head + (len - 1) * sizeof(struct ccw1);
345 return (head <= cpa && cpa <= tail);
348 static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
350 if (!ccw_is_tic(ccw))
353 return is_cpa_within_range(ccw->cda, head, len);
356 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
358 struct ccwchain *chain;
362 /* Make ccw address aligned to 8. */
363 size = ((sizeof(*chain) + 7L) & -8L) +
364 sizeof(*chain->ch_ccw) * len +
365 sizeof(*chain->ch_pa) * len;
366 chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
370 data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
371 chain->ch_ccw = (struct ccw1 *)data;
373 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
374 chain->ch_pa = (struct page_array *)data;
378 list_add_tail(&chain->next, &cp->ccwchain_list);
383 static void ccwchain_free(struct ccwchain *chain)
385 list_del(&chain->next);
389 /* Free resource for a ccw that allocated memory for its cda. */
390 static void ccwchain_cda_free(struct ccwchain *chain, int idx)
392 struct ccw1 *ccw = chain->ch_ccw + idx;
397 kfree((void *)(u64)ccw->cda);
401 * ccwchain_calc_length - calculate the length of the ccw chain.
402 * @iova: guest physical address of the target ccw chain
403 * @cp: channel_program on which to perform the operation
405 * This is the chain length not considering any TICs.
406 * You need to do a new round for each TIC target.
408 * The program is also validated for absence of not yet supported
409 * indirect data addressing scenarios.
411 * Returns: the length of the ccw chain or -errno.
413 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
415 struct ccw1 *ccw = cp->guest_cp;
422 * As we don't want to fail direct addressing even if the
423 * orb specified one of the unsupported formats, we defer
424 * checking for IDAWs in unsupported formats to here.
426 if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
430 * We want to keep counting if the current CCW has the
431 * command-chaining flag enabled, or if it is a TIC CCW
432 * that loops back into the current chain. The latter
433 * is used for device orientation, where the CCW PRIOR to
434 * the TIC can either jump to the TIC or a CCW immediately
435 * after the TIC, depending on the results of its operation.
437 if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
441 } while (cnt < CCWCHAIN_LEN_MAX + 1);
443 if (cnt == CCWCHAIN_LEN_MAX + 1)
449 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
451 struct ccwchain *chain;
454 list_for_each_entry(chain, &cp->ccwchain_list, next) {
455 ccw_head = chain->ch_iova;
456 if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
463 static int ccwchain_loop_tic(struct ccwchain *chain,
464 struct channel_program *cp);
466 static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
468 struct vfio_device *vdev =
469 &container_of(cp, struct vfio_ccw_private, cp)->vdev;
470 struct ccwchain *chain;
473 /* Copy 2K (the most we support today) of possible CCWs */
474 len = copy_from_iova(vdev, cp->guest_cp, cda,
475 CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
479 /* Convert any Format-0 CCWs to Format-1 */
480 if (!cp->orb.cmd.fmt)
481 convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
483 /* Count the CCWs in the current chain */
484 len = ccwchain_calc_length(cda, cp);
488 /* Need alloc a new chain for this one. */
489 chain = ccwchain_alloc(cp, len);
492 chain->ch_iova = cda;
494 /* Copy the actual CCWs into the new chain */
495 memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
497 /* Loop for tics on this new chain. */
498 ret = ccwchain_loop_tic(chain, cp);
501 ccwchain_free(chain);
507 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
512 for (i = 0; i < chain->ch_len; i++) {
513 tic = chain->ch_ccw + i;
515 if (!ccw_is_tic(tic))
518 /* May transfer to an existing chain. */
519 if (tic_target_chain_exists(tic, cp))
522 /* Build a ccwchain for the next segment */
523 ret = ccwchain_handle_ccw(tic->cda, cp);
531 static int ccwchain_fetch_tic(struct ccwchain *chain,
533 struct channel_program *cp)
535 struct ccw1 *ccw = chain->ch_ccw + idx;
536 struct ccwchain *iter;
539 list_for_each_entry(iter, &cp->ccwchain_list, next) {
540 ccw_head = iter->ch_iova;
541 if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
542 ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) +
543 (ccw->cda - ccw_head));
551 static int ccwchain_fetch_direct(struct ccwchain *chain,
553 struct channel_program *cp)
555 struct vfio_device *vdev =
556 &container_of(cp, struct vfio_ccw_private, cp)->vdev;
558 struct page_array *pa;
560 unsigned long *idaws;
563 int idaw_nr, idal_len;
566 ccw = chain->ch_ccw + idx;
571 /* Calculate size of IDAL */
572 if (ccw_is_idal(ccw)) {
573 /* Read first IDAW to see if it's 4K-aligned or not. */
574 /* All subsequent IDAws will be 4K-aligned. */
575 ret = copy_from_iova(vdev, &iova, ccw->cda, sizeof(iova));
581 idaw_nr = idal_nr_words((void *)iova, bytes);
582 idal_len = idaw_nr * sizeof(*idaws);
584 /* Allocate an IDAL from host storage */
585 idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
592 * Allocate an array of pages to pin/translate.
593 * The number of pages is actually the count of the idaws
594 * required for the data transfer, since we only only support
597 pa = chain->ch_pa + idx;
598 ret = page_array_alloc(pa, iova, bytes);
602 if (ccw_is_idal(ccw)) {
603 /* Copy guest IDAL into host IDAL */
604 ret = copy_from_iova(vdev, idaws, ccw->cda, idal_len);
609 * Copy guest IDAWs into page_array, in case the memory they
610 * occupy is not contiguous.
612 for (i = 0; i < idaw_nr; i++)
613 pa->pa_iova[i] = idaws[i];
616 * No action is required here; the iova addresses in page_array
617 * were initialized sequentially in page_array_alloc() beginning
618 * with the contents of ccw->cda.
622 if (ccw_does_data_transfer(ccw)) {
623 ret = page_array_pin(pa, vdev);
630 ccw->cda = (__u32) virt_to_phys(idaws);
631 ccw->flags |= CCW_FLAG_IDA;
633 /* Populate the IDAL with pinned/translated addresses from page */
634 page_array_idal_create_words(pa, idaws);
639 page_array_unpin_free(pa, vdev);
649 * To reduce memory copy, we'll pin the cda page in memory,
650 * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
651 * direct ccws to idal ccws.
653 static int ccwchain_fetch_one(struct ccwchain *chain,
655 struct channel_program *cp)
657 struct ccw1 *ccw = chain->ch_ccw + idx;
660 return ccwchain_fetch_tic(chain, idx, cp);
662 return ccwchain_fetch_direct(chain, idx, cp);
666 * cp_init() - allocate ccwchains for a channel program.
667 * @cp: channel_program on which to perform the operation
668 * @mdev: the mediated device to perform pin/unpin operations
669 * @orb: control block for the channel program from the guest
671 * This creates one or more ccwchain(s), and copies the raw data of
672 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
675 * 1. Supports idal(c64) ccw chaining.
676 * 2. Supports 4k idaw.
679 * %0 on success and a negative error value on failure.
681 int cp_init(struct channel_program *cp, union orb *orb)
683 struct vfio_device *vdev =
684 &container_of(cp, struct vfio_ccw_private, cp)->vdev;
685 /* custom ratelimit used to avoid flood during guest IPL */
686 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
689 /* this is an error in the caller */
694 * We only support prefetching the channel program. We assume all channel
695 * programs executed by supported guests likewise support prefetching.
696 * Executing a channel program that does not specify prefetching will
697 * typically not cause an error, but a warning is issued to help identify
698 * the problem if something does break.
700 if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
703 "Prefetching channel program even though prefetch not specified in ORB");
705 INIT_LIST_HEAD(&cp->ccwchain_list);
706 memcpy(&cp->orb, orb, sizeof(*orb));
708 /* Build a ccwchain for the first CCW segment */
709 ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
712 cp->initialized = true;
714 /* It is safe to force: if it was not set but idals used
715 * ccwchain_calc_length would have returned an error.
725 * cp_free() - free resources for channel program.
726 * @cp: channel_program on which to perform the operation
728 * This unpins the memory pages and frees the memory space occupied by
729 * @cp, which must have been returned by a previous call to cp_init().
730 * Otherwise, undefined behavior occurs.
732 void cp_free(struct channel_program *cp)
734 struct vfio_device *vdev =
735 &container_of(cp, struct vfio_ccw_private, cp)->vdev;
736 struct ccwchain *chain, *temp;
739 if (!cp->initialized)
742 cp->initialized = false;
743 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
744 for (i = 0; i < chain->ch_len; i++) {
745 page_array_unpin_free(chain->ch_pa + i, vdev);
746 ccwchain_cda_free(chain, i);
748 ccwchain_free(chain);
753 * cp_prefetch() - translate a guest physical address channel program to
754 * a real-device runnable channel program.
755 * @cp: channel_program on which to perform the operation
757 * This function translates the guest-physical-address channel program
758 * and stores the result to ccwchain list. @cp must have been
759 * initialized by a previous call with cp_init(). Otherwise, undefined
761 * For each chain composing the channel program:
762 * - On entry ch_len holds the count of CCWs to be translated.
763 * - On exit ch_len is adjusted to the count of successfully translated CCWs.
764 * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
766 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
767 * as helpers to do ccw chain translation inside the kernel. Basically
768 * they accept a channel program issued by a virtual machine, and
769 * translate the channel program to a real-device runnable channel
772 * These APIs will copy the ccws into kernel-space buffers, and update
773 * the guest phsical addresses with their corresponding host physical
774 * addresses. Then channel I/O device drivers could issue the
775 * translated channel program to real devices to perform an I/O
778 * These interfaces are designed to support translation only for
779 * channel programs, which are generated and formatted by a
780 * guest. Thus this will make it possible for things like VFIO to
781 * leverage the interfaces to passthrough a channel I/O mediated
784 * We support direct ccw chaining by translating them to idal ccws.
787 * %0 on success and a negative error value on failure.
789 int cp_prefetch(struct channel_program *cp)
791 struct ccwchain *chain;
794 /* this is an error in the caller */
795 if (!cp->initialized)
798 list_for_each_entry(chain, &cp->ccwchain_list, next) {
800 for (idx = 0; idx < len; idx++) {
801 ret = ccwchain_fetch_one(chain, idx, cp);
809 /* Only cleanup the chain elements that were actually translated. */
811 list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
818 * cp_get_orb() - get the orb of the channel program
819 * @cp: channel_program on which to perform the operation
820 * @intparm: new intparm for the returned orb
821 * @lpm: candidate value of the logical-path mask for the returned orb
823 * This function returns the address of the updated orb of the channel
824 * program. Channel I/O device drivers could use this orb to issue a
827 union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
830 struct ccwchain *chain;
833 /* this is an error in the caller */
834 if (!cp->initialized)
839 orb->cmd.intparm = intparm;
841 orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
843 if (orb->cmd.lpm == 0)
846 chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
848 orb->cmd.cpa = (__u32) __pa(cpa);
854 * cp_update_scsw() - update scsw for a channel program.
855 * @cp: channel_program on which to perform the operation
856 * @scsw: I/O results of the channel program and also the target to be
859 * @scsw contains the I/O results of the channel program that pointed
860 * to by @cp. However what @scsw->cpa stores is a host physical
861 * address, which is meaningless for the guest, which is waiting for
864 * This function updates @scsw->cpa to its coressponding guest physical
867 void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
869 struct ccwchain *chain;
870 u32 cpa = scsw->cmd.cpa;
873 if (!cp->initialized)
878 * For now, only update the cmd.cpa part. We may need to deal with
879 * other portions of the schib as well, even if we don't return them
880 * in the ioctl directly. Path status changes etc.
882 list_for_each_entry(chain, &cp->ccwchain_list, next) {
883 ccw_head = (u32)(u64)chain->ch_ccw;
885 * On successful execution, cpa points just beyond the end
888 if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
890 * (cpa - ccw_head) is the offset value of the host
891 * physical ccw to its chain head.
892 * Adding this value to the guest physical ccw chain
893 * head gets us the guest cpa.
895 cpa = chain->ch_iova + (cpa - ccw_head);
904 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
905 * @cp: channel_program on which to perform the operation
906 * @iova: the iova to check
907 * @length: the length to check from @iova
909 * If the @iova is currently pinned for the ccw chain, return true;
912 bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
914 struct ccwchain *chain;
917 if (!cp->initialized)
920 list_for_each_entry(chain, &cp->ccwchain_list, next) {
921 for (i = 0; i < chain->ch_len; i++)
922 if (page_array_iova_pinned(chain->ch_pa + i, iova, length))