KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg
authorJanis Schoetterl-Glausch <scgl@linux.ibm.com>
Mon, 6 Feb 2023 16:46:00 +0000 (17:46 +0100)
committerJanosch Frank <frankja@linux.ibm.com>
Tue, 7 Feb 2023 17:06:00 +0000 (18:06 +0100)
User space can use the MEM_OP ioctl to make storage key checked reads
and writes to the guest, however, it has no way of performing atomic,
key checked, accesses to the guest.
Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg
op. For now, support this op for absolute accesses only.

This op can be used, for example, to set the device-state-change
indicator and the adapter-local-summary indicator atomically.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20230206164602.138068-13-scgl@linux.ibm.com
Message-Id: <20230206164602.138068-13-scgl@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
arch/s390/kvm/gaccess.c
arch/s390/kvm/gaccess.h
arch/s390/kvm/kvm-s390.c
include/uapi/linux/kvm.h

index 0243b6e..3eb85f2 100644 (file)
@@ -1162,6 +1162,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 }
 
 /**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ *       non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ *            the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ *            *@old_addr contains the value at @gpa before the attempt to
+ *            exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ *         * >0: a program interruption code indicating the reason cmpxchg could
+ *               not be attempted
+ *         * -EINVAL: address misaligned or len not power of two
+ *         * -EAGAIN: transient failure (len 1 or 2)
+ *         * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+                              __uint128_t *old_addr, __uint128_t new,
+                              u8 access_key, bool *success)
+{
+       gfn_t gfn = gpa_to_gfn(gpa);
+       struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+       bool writable;
+       hva_t hva;
+       int ret;
+
+       if (!IS_ALIGNED(gpa, len))
+               return -EINVAL;
+
+       hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+       if (kvm_is_error_hva(hva))
+               return PGM_ADDRESSING;
+       /*
+        * Check if it's a read-only memslot, even though that cannot occur
+        * since those are unsupported.
+        * Don't try to actually handle that case.
+        */
+       if (!writable)
+               return -EOPNOTSUPP;
+
+       hva += offset_in_page(gpa);
+       /*
+        * The cmpxchg_user_key macro depends on the type of "old", so we need
+        * a case for each valid length and get some code duplication as long
+        * as we don't introduce a new macro.
+        */
+       switch (len) {
+       case 1: {
+               u8 old;
+
+               ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+               *success = !ret && old == *old_addr;
+               *old_addr = old;
+               break;
+       }
+       case 2: {
+               u16 old;
+
+               ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+               *success = !ret && old == *old_addr;
+               *old_addr = old;
+               break;
+       }
+       case 4: {
+               u32 old;
+
+               ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+               *success = !ret && old == *old_addr;
+               *old_addr = old;
+               break;
+       }
+       case 8: {
+               u64 old;
+
+               ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+               *success = !ret && old == *old_addr;
+               *old_addr = old;
+               break;
+       }
+       case 16: {
+               __uint128_t old;
+
+               ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+               *success = !ret && old == *old_addr;
+               *old_addr = old;
+               break;
+       }
+       default:
+               return -EINVAL;
+       }
+       if (*success)
+               mark_page_dirty_in_slot(kvm, slot, gfn);
+       /*
+        * Assume that the fault is caused by protection, either key protection
+        * or user page write protection.
+        */
+       if (ret == -EFAULT)
+               ret = PGM_PROTECTION;
+       return ret;
+}
+
+/**
  * guest_translate_address_with_key - translate guest logical into guest absolute address
  * @vcpu: virtual cpu
  * @gva: Guest virtual address
index 9408d6c..b320d12 100644 (file)
@@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
                      void *data, unsigned long len, enum gacc_mode mode);
 
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+                              __uint128_t new, u8 access_key, bool *success);
+
 /**
  * write_guest_with_key - copy data from kernel space to guest space
  * @vcpu: virtual cpu
index 17368d1..8dfda72 100644 (file)
@@ -584,7 +584,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_VCPU_RESETS:
        case KVM_CAP_SET_GUEST_DEBUG:
        case KVM_CAP_S390_DIAG318:
-       case KVM_CAP_S390_MEM_OP_EXTENSION:
                r = 1;
                break;
        case KVM_CAP_SET_GUEST_DEBUG2:
@@ -598,6 +597,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_MEM_OP:
                r = MEM_OP_MAX_SIZE;
                break;
+       case KVM_CAP_S390_MEM_OP_EXTENSION:
+               /*
+                * Flag bits indicating which extensions are supported.
+                * If r > 0, the base extension must also be supported/indicated,
+                * in order to maintain backwards compatibility.
+                */
+               r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+                   KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+               break;
        case KVM_CAP_NR_VCPUS:
        case KVM_CAP_MAX_VCPUS:
        case KVM_CAP_MAX_VCPU_ID:
@@ -2832,6 +2840,50 @@ out_unlock:
        return r;
 }
 
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+       void __user *uaddr = (void __user *)mop->buf;
+       void __user *old_addr = (void __user *)mop->old_addr;
+       union {
+               __uint128_t quad;
+               char raw[sizeof(__uint128_t)];
+       } old = { .quad = 0}, new = { .quad = 0 };
+       unsigned int off_in_quad = sizeof(new) - mop->size;
+       int r, srcu_idx;
+       bool success;
+
+       r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+       if (r)
+               return r;
+       /*
+        * This validates off_in_quad. Checking that size is a power
+        * of two is not necessary, as cmpxchg_guest_abs_with_key
+        * takes care of that
+        */
+       if (mop->size > sizeof(new))
+               return -EINVAL;
+       if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+               return -EFAULT;
+       if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+               return -EFAULT;
+
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+
+       if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+               r = PGM_ADDRESSING;
+               goto out_unlock;
+       }
+
+       r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+                                      new.quad, mop->key, &success);
+       if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+               r = -EFAULT;
+
+out_unlock:
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       return r;
+}
+
 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
 {
        /*
@@ -2850,6 +2902,8 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
        case KVM_S390_MEMOP_ABSOLUTE_READ:
        case KVM_S390_MEMOP_ABSOLUTE_WRITE:
                return kvm_s390_vm_mem_op_abs(kvm, mop);
+       case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+               return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
        default:
                return -EINVAL;
        }
index 55155e2..d2f3046 100644 (file)
@@ -583,6 +583,8 @@ struct kvm_s390_mem_op {
                struct {
                        __u8 ar;        /* the access register number */
                        __u8 key;       /* access key, ignored if flag unset */
+                       __u8 pad1[6];   /* ignored */
+                       __u64 old_addr; /* ignored if cmpxchg flag unset */
                };
                __u32 sida_offset; /* offset into the sida */
                __u8 reserved[32]; /* ignored */
@@ -595,11 +597,17 @@ struct kvm_s390_mem_op {
 #define KVM_S390_MEMOP_SIDA_WRITE      3
 #define KVM_S390_MEMOP_ABSOLUTE_READ   4
 #define KVM_S390_MEMOP_ABSOLUTE_WRITE  5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG        6
+
 /* flags for kvm_s390_mem_op->flags */
 #define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
 #define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
 #define KVM_S390_MEMOP_F_SKEY_PROTECTION       (1ULL << 2)
 
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE      (1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG   (1 << 1)
+
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
        /* in */