arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         int i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 break;
 589         case KVM_CAP_S390_COW:
 590                 r = MACHINE_HAS_ESOP;
 591                 break;
 592         case KVM_CAP_S390_VECTOR_REGISTERS:
 593                 r = MACHINE_HAS_VX;
 594                 break;
 595         case KVM_CAP_S390_RI:
 596                 r = test_facility(64);
 597                 break;
 598         case KVM_CAP_S390_GS:
 599                 r = test_facility(133);
 600                 break;
 601         case KVM_CAP_S390_BPB:
 602                 r = test_facility(82);
 603                 break;
 604         case KVM_CAP_S390_PROTECTED:
 605                 r = is_prot_virt_host();
 606                 break;
 607         default:
 608                 r = 0;
 609         }
 610         return r;
 611 }
 612
 613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 614 {
 615         int i;
 616         gfn_t cur_gfn, last_gfn;
 617         unsigned long gaddr, vmaddr;
 618         struct gmap *gmap = kvm->arch.gmap;
 619         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 620
 621         /* Loop over all guest segments */
 622         cur_gfn = memslot->base_gfn;
 623         last_gfn = memslot->base_gfn + memslot->npages;
 624         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 625                 gaddr = gfn_to_gpa(cur_gfn);
 626                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 627                 if (kvm_is_error_hva(vmaddr))
 628                         continue;
 629
 630                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 631                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 632                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 633                         if (test_bit(i, bitmap))
 634                                 mark_page_dirty(kvm, cur_gfn + i);
 635                 }
 636
 637                 if (fatal_signal_pending(current))
 638                         return;
 639                 cond_resched();
 640         }
 641 }
 642
 643 /* Section: vm related */
 644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 645
 646 /*
 647  * Get (and clear) the dirty memory log for a memory slot.
 648  */
 649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 650                                struct kvm_dirty_log *log)
 651 {
 652         int r;
 653         unsigned long n;
 654         struct kvm_memory_slot *memslot;
 655         int is_dirty;
 656
 657         if (kvm_is_ucontrol(kvm))
 658                 return -EINVAL;
 659
 660         mutex_lock(&kvm->slots_lock);
 661
 662         r = -EINVAL;
 663         if (log->slot >= KVM_USER_MEM_SLOTS)
 664                 goto out;
 665
 666         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 667         if (r)
 668                 goto out;
 669
 670         /* Clear the dirty log */
 671         if (is_dirty) {
 672                 n = kvm_dirty_bitmap_bytes(memslot);
 673                 memset(memslot->dirty_bitmap, 0, n);
 674         }
 675         r = 0;
 676 out:
 677         mutex_unlock(&kvm->slots_lock);
 678         return r;
 679 }
 680
 681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 682 {
 683         unsigned int i;
 684         struct kvm_vcpu *vcpu;
 685
 686         kvm_for_each_vcpu(i, vcpu, kvm) {
 687                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 688         }
 689 }
 690
 691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 692 {
 693         int r;
 694
 695         if (cap->flags)
 696                 return -EINVAL;
 697
 698         switch (cap->cap) {
 699         case KVM_CAP_S390_IRQCHIP:
 700                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 701                 kvm->arch.use_irqchip = 1;
 702                 r = 0;
 703                 break;
 704         case KVM_CAP_S390_USER_SIGP:
 705                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 706                 kvm->arch.user_sigp = 1;
 707                 r = 0;
 708                 break;
 709         case KVM_CAP_S390_VECTOR_REGISTERS:
 710                 mutex_lock(&kvm->lock);
 711                 if (kvm->created_vcpus) {
 712                         r = -EBUSY;
 713                 } else if (MACHINE_HAS_VX) {
 714                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 715                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 716                         if (test_facility(134)) {
 717                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 718                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 719                         }
 720                         if (test_facility(135)) {
 721                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 722                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 723                         }
 724                         if (test_facility(148)) {
 725                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 726                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 727                         }
 728                         if (test_facility(152)) {
 729                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 730                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 731                         }
 732                         if (test_facility(192)) {
 733                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 734                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 735                         }
 736                         r = 0;
 737                 } else
 738                         r = -EINVAL;
 739                 mutex_unlock(&kvm->lock);
 740                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 741                          r ? "(not available)" : "(success)");
 742                 break;
 743         case KVM_CAP_S390_RI:
 744                 r = -EINVAL;
 745                 mutex_lock(&kvm->lock);
 746                 if (kvm->created_vcpus) {
 747                         r = -EBUSY;
 748                 } else if (test_facility(64)) {
 749                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 750                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 751                         r = 0;
 752                 }
 753                 mutex_unlock(&kvm->lock);
 754                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 755                          r ? "(not available)" : "(success)");
 756                 break;
 757         case KVM_CAP_S390_AIS:
 758                 mutex_lock(&kvm->lock);
 759                 if (kvm->created_vcpus) {
 760                         r = -EBUSY;
 761                 } else {
 762                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 763                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 764                         r = 0;
 765                 }
 766                 mutex_unlock(&kvm->lock);
 767                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 768                          r ? "(not available)" : "(success)");
 769                 break;
 770         case KVM_CAP_S390_GS:
 771                 r = -EINVAL;
 772                 mutex_lock(&kvm->lock);
 773                 if (kvm->created_vcpus) {
 774                         r = -EBUSY;
 775                 } else if (test_facility(133)) {
 776                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 777                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 778                         r = 0;
 779                 }
 780                 mutex_unlock(&kvm->lock);
 781                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 782                          r ? "(not available)" : "(success)");
 783                 break;
 784         case KVM_CAP_S390_HPAGE_1M:
 785                 mutex_lock(&kvm->lock);
 786                 if (kvm->created_vcpus)
 787                         r = -EBUSY;
 788                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 789                         r = -EINVAL;
 790                 else {
 791                         r = 0;
 792                         mmap_write_lock(kvm->mm);
 793                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 794                         mmap_write_unlock(kvm->mm);
 795                         /*
 796                          * We might have to create fake 4k page
 797                          * tables. To avoid that the hardware works on
 798                          * stale PGSTEs, we emulate these instructions.
 799                          */
 800                         kvm->arch.use_skf = 0;
 801                         kvm->arch.use_pfmfi = 0;
 802                 }
 803                 mutex_unlock(&kvm->lock);
 804                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 805                          r ? "(not available)" : "(success)");
 806                 break;
 807         case KVM_CAP_S390_USER_STSI:
 808                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 809                 kvm->arch.user_stsi = 1;
 810                 r = 0;
 811                 break;
 812         case KVM_CAP_S390_USER_INSTR0:
 813                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 814                 kvm->arch.user_instr0 = 1;
 815                 icpt_operexc_on_all_vcpus(kvm);
 816                 r = 0;
 817                 break;
 818         default:
 819                 r = -EINVAL;
 820                 break;
 821         }
 822         return r;
 823 }
 824
 825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 826 {
 827         int ret;
 828
 829         switch (attr->attr) {
 830         case KVM_S390_VM_MEM_LIMIT_SIZE:
 831                 ret = 0;
 832                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 833                          kvm->arch.mem_limit);
 834                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 835                         ret = -EFAULT;
 836                 break;
 837         default:
 838                 ret = -ENXIO;
 839                 break;
 840         }
 841         return ret;
 842 }
 843
 844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 845 {
 846         int ret;
 847         unsigned int idx;
 848         switch (attr->attr) {
 849         case KVM_S390_VM_MEM_ENABLE_CMMA:
 850                 ret = -ENXIO;
 851                 if (!sclp.has_cmma)
 852                         break;
 853
 854                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 855                 mutex_lock(&kvm->lock);
 856                 if (kvm->created_vcpus)
 857                         ret = -EBUSY;
 858                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 859                         ret = -EINVAL;
 860                 else {
 861                         kvm->arch.use_cmma = 1;
 862                         /* Not compatible with cmma. */
 863                         kvm->arch.use_pfmfi = 0;
 864                         ret = 0;
 865                 }
 866                 mutex_unlock(&kvm->lock);
 867                 break;
 868         case KVM_S390_VM_MEM_CLR_CMMA:
 869                 ret = -ENXIO;
 870                 if (!sclp.has_cmma)
 871                         break;
 872                 ret = -EINVAL;
 873                 if (!kvm->arch.use_cmma)
 874                         break;
 875
 876                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 877                 mutex_lock(&kvm->lock);
 878                 idx = srcu_read_lock(&kvm->srcu);
 879                 s390_reset_cmma(kvm->arch.gmap->mm);
 880                 srcu_read_unlock(&kvm->srcu, idx);
 881                 mutex_unlock(&kvm->lock);
 882                 ret = 0;
 883                 break;
 884         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 885                 unsigned long new_limit;
 886
 887                 if (kvm_is_ucontrol(kvm))
 888                         return -EINVAL;
 889
 890                 if (get_user(new_limit, (u64 __user *)attr->addr))
 891                         return -EFAULT;
 892
 893                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 894                     new_limit > kvm->arch.mem_limit)
 895                         return -E2BIG;
 896
 897                 if (!new_limit)
 898                         return -EINVAL;
 899
 900                 /* gmap_create takes last usable address */
 901                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 902                         new_limit -= 1;
 903
 904                 ret = -EBUSY;
 905                 mutex_lock(&kvm->lock);
 906                 if (!kvm->created_vcpus) {
 907                         /* gmap_create will round the limit up */
 908                         struct gmap *new = gmap_create(current->mm, new_limit);
 909
 910                         if (!new) {
 911                                 ret = -ENOMEM;
 912                         } else {
 913                                 gmap_remove(kvm->arch.gmap);
 914                                 new->private = kvm;
 915                                 kvm->arch.gmap = new;
 916                                 ret = 0;
 917                         }
 918                 }
 919                 mutex_unlock(&kvm->lock);
 920                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 921                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 922                          (void *) kvm->arch.gmap->asce);
 923                 break;
 924         }
 925         default:
 926                 ret = -ENXIO;
 927                 break;
 928         }
 929         return ret;
 930 }
 931
 932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 933
 934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 935 {
 936         struct kvm_vcpu *vcpu;
 937         int i;
 938
 939         kvm_s390_vcpu_block_all(kvm);
 940
 941         kvm_for_each_vcpu(i, vcpu, kvm) {
 942                 kvm_s390_vcpu_crypto_setup(vcpu);
 943                 /* recreate the shadow crycb by leaving the VSIE handler */
 944                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 945         }
 946
 947         kvm_s390_vcpu_unblock_all(kvm);
 948 }
 949
 950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 951 {
 952         mutex_lock(&kvm->lock);
 953         switch (attr->attr) {
 954         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 955                 if (!test_kvm_facility(kvm, 76)) {
 956                         mutex_unlock(&kvm->lock);
 957                         return -EINVAL;
 958                 }
 959                 get_random_bytes(
 960                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 961                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 962                 kvm->arch.crypto.aes_kw = 1;
 963                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 964                 break;
 965         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 966                 if (!test_kvm_facility(kvm, 76)) {
 967                         mutex_unlock(&kvm->lock);
 968                         return -EINVAL;
 969                 }
 970                 get_random_bytes(
 971                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 972                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 973                 kvm->arch.crypto.dea_kw = 1;
 974                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 975                 break;
 976         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 977                 if (!test_kvm_facility(kvm, 76)) {
 978                         mutex_unlock(&kvm->lock);
 979                         return -EINVAL;
 980                 }
 981                 kvm->arch.crypto.aes_kw = 0;
 982                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 983                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 984                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 985                 break;
 986         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 987                 if (!test_kvm_facility(kvm, 76)) {
 988                         mutex_unlock(&kvm->lock);
 989                         return -EINVAL;
 990                 }
 991                 kvm->arch.crypto.dea_kw = 0;
 992                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 993                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 994                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 995                 break;
 996         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 997                 if (!ap_instructions_available()) {
 998                         mutex_unlock(&kvm->lock);
 999                         return -EOPNOTSUPP;
1000                 }
1001                 kvm->arch.crypto.apie = 1;
1002                 break;
1003         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004                 if (!ap_instructions_available()) {
1005                         mutex_unlock(&kvm->lock);
1006                         return -EOPNOTSUPP;
1007                 }
1008                 kvm->arch.crypto.apie = 0;
1009                 break;
1010         default:
1011                 mutex_unlock(&kvm->lock);
1012                 return -ENXIO;
1013         }
1014
1015         kvm_s390_vcpu_crypto_reset_all(kvm);
1016         mutex_unlock(&kvm->lock);
1017         return 0;
1018 }
1019
1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022         int cx;
1023         struct kvm_vcpu *vcpu;
1024
1025         kvm_for_each_vcpu(cx, vcpu, kvm)
1026                 kvm_s390_sync_request(req, vcpu);
1027 }
1028
1029 /*
1030  * Must be called with kvm->srcu held to avoid races on memslots, and with
1031  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032  */
1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035         struct kvm_memory_slot *ms;
1036         struct kvm_memslots *slots;
1037         unsigned long ram_pages = 0;
1038         int slotnr;
1039
1040         /* migration mode already enabled */
1041         if (kvm->arch.migration_mode)
1042                 return 0;
1043         slots = kvm_memslots(kvm);
1044         if (!slots || !slots->used_slots)
1045                 return -EINVAL;
1046
1047         if (!kvm->arch.use_cmma) {
1048                 kvm->arch.migration_mode = 1;
1049                 return 0;
1050         }
1051         /* mark all the pages in active slots as dirty */
1052         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053                 ms = slots->memslots + slotnr;
1054                 if (!ms->dirty_bitmap)
1055                         return -EINVAL;
1056                 /*
1057                  * The second half of the bitmap is only used on x86,
1058                  * and would be wasted otherwise, so we put it to good
1059                  * use here to keep track of the state of the storage
1060                  * attributes.
1061                  */
1062                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063                 ram_pages += ms->npages;
1064         }
1065         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066         kvm->arch.migration_mode = 1;
1067         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068         return 0;
1069 }
1070
1071 /*
1072  * Must be called with kvm->slots_lock to avoid races with ourselves and
1073  * kvm_s390_vm_start_migration.
1074  */
1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077         /* migration mode already disabled */
1078         if (!kvm->arch.migration_mode)
1079                 return 0;
1080         kvm->arch.migration_mode = 0;
1081         if (kvm->arch.use_cmma)
1082                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083         return 0;
1084 }
1085
1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087                                      struct kvm_device_attr *attr)
1088 {
1089         int res = -ENXIO;
1090
1091         mutex_lock(&kvm->slots_lock);
1092         switch (attr->attr) {
1093         case KVM_S390_VM_MIGRATION_START:
1094                 res = kvm_s390_vm_start_migration(kvm);
1095                 break;
1096         case KVM_S390_VM_MIGRATION_STOP:
1097                 res = kvm_s390_vm_stop_migration(kvm);
1098                 break;
1099         default:
1100                 break;
1101         }
1102         mutex_unlock(&kvm->slots_lock);
1103
1104         return res;
1105 }
1106
1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108                                      struct kvm_device_attr *attr)
1109 {
1110         u64 mig = kvm->arch.migration_mode;
1111
1112         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113                 return -ENXIO;
1114
1115         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116                 return -EFAULT;
1117         return 0;
1118 }
1119
1120 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1121
1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124         struct kvm_s390_vm_tod_clock gtod;
1125
1126         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1127                 return -EFAULT;
1128
1129         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130                 return -EINVAL;
1131         __kvm_s390_set_tod_clock(kvm, &gtod);
1132
1133         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134                 gtod.epoch_idx, gtod.tod);
1135
1136         return 0;
1137 }
1138
1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141         u8 gtod_high;
1142
1143         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1144                                            sizeof(gtod_high)))
1145                 return -EFAULT;
1146
1147         if (gtod_high != 0)
1148                 return -EINVAL;
1149         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150
1151         return 0;
1152 }
1153
1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156         struct kvm_s390_vm_tod_clock gtod = { 0 };
1157
1158         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1159                            sizeof(gtod.tod)))
1160                 return -EFAULT;
1161
1162         __kvm_s390_set_tod_clock(kvm, &gtod);
1163         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164         return 0;
1165 }
1166
1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169         int ret;
1170
1171         if (attr->flags)
1172                 return -EINVAL;
1173
1174         mutex_lock(&kvm->lock);
1175         /*
1176          * For protected guests, the TOD is managed by the ultravisor, so trying
1177          * to change it will never bring the expected results.
1178          */
1179         if (kvm_s390_pv_is_protected(kvm)) {
1180                 ret = -EOPNOTSUPP;
1181                 goto out_unlock;
1182         }
1183
1184         switch (attr->attr) {
1185         case KVM_S390_VM_TOD_EXT:
1186                 ret = kvm_s390_set_tod_ext(kvm, attr);
1187                 break;
1188         case KVM_S390_VM_TOD_HIGH:
1189                 ret = kvm_s390_set_tod_high(kvm, attr);
1190                 break;
1191         case KVM_S390_VM_TOD_LOW:
1192                 ret = kvm_s390_set_tod_low(kvm, attr);
1193                 break;
1194         default:
1195                 ret = -ENXIO;
1196                 break;
1197         }
1198
1199 out_unlock:
1200         mutex_unlock(&kvm->lock);
1201         return ret;
1202 }
1203
1204 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1205                                    struct kvm_s390_vm_tod_clock *gtod)
1206 {
1207         union tod_clock clk;
1208
1209         preempt_disable();
1210
1211         store_tod_clock_ext(&clk);
1212
1213         gtod->tod = clk.tod + kvm->arch.epoch;
1214         gtod->epoch_idx = 0;
1215         if (test_kvm_facility(kvm, 139)) {
1216                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1217                 if (gtod->tod < clk.tod)
1218                         gtod->epoch_idx += 1;
1219         }
1220
1221         preempt_enable();
1222 }
1223
1224 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         struct kvm_s390_vm_tod_clock gtod;
1227
1228         memset(&gtod, 0, sizeof(gtod));
1229         kvm_s390_get_tod_clock(kvm, &gtod);
1230         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231                 return -EFAULT;
1232
1233         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1234                 gtod.epoch_idx, gtod.tod);
1235         return 0;
1236 }
1237
1238 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1239 {
1240         u8 gtod_high = 0;
1241
1242         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1243                                          sizeof(gtod_high)))
1244                 return -EFAULT;
1245         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1246
1247         return 0;
1248 }
1249
1250 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252         u64 gtod;
1253
1254         gtod = kvm_s390_get_tod_clock_fast(kvm);
1255         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1256                 return -EFAULT;
1257         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1258
1259         return 0;
1260 }
1261
1262 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1263 {
1264         int ret;
1265
1266         if (attr->flags)
1267                 return -EINVAL;
1268
1269         switch (attr->attr) {
1270         case KVM_S390_VM_TOD_EXT:
1271                 ret = kvm_s390_get_tod_ext(kvm, attr);
1272                 break;
1273         case KVM_S390_VM_TOD_HIGH:
1274                 ret = kvm_s390_get_tod_high(kvm, attr);
1275                 break;
1276         case KVM_S390_VM_TOD_LOW:
1277                 ret = kvm_s390_get_tod_low(kvm, attr);
1278                 break;
1279         default:
1280                 ret = -ENXIO;
1281                 break;
1282         }
1283         return ret;
1284 }
1285
1286 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288         struct kvm_s390_vm_cpu_processor *proc;
1289         u16 lowest_ibc, unblocked_ibc;
1290         int ret = 0;
1291
1292         mutex_lock(&kvm->lock);
1293         if (kvm->created_vcpus) {
1294                 ret = -EBUSY;
1295                 goto out;
1296         }
1297         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1298         if (!proc) {
1299                 ret = -ENOMEM;
1300                 goto out;
1301         }
1302         if (!copy_from_user(proc, (void __user *)attr->addr,
1303                             sizeof(*proc))) {
1304                 kvm->arch.model.cpuid = proc->cpuid;
1305                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1306                 unblocked_ibc = sclp.ibc & 0xfff;
1307                 if (lowest_ibc && proc->ibc) {
1308                         if (proc->ibc > unblocked_ibc)
1309                                 kvm->arch.model.ibc = unblocked_ibc;
1310                         else if (proc->ibc < lowest_ibc)
1311                                 kvm->arch.model.ibc = lowest_ibc;
1312                         else
1313                                 kvm->arch.model.ibc = proc->ibc;
1314                 }
1315                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1316                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1317                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1318                          kvm->arch.model.ibc,
1319                          kvm->arch.model.cpuid);
1320                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1321                          kvm->arch.model.fac_list[0],
1322                          kvm->arch.model.fac_list[1],
1323                          kvm->arch.model.fac_list[2]);
1324         } else
1325                 ret = -EFAULT;
1326         kfree(proc);
1327 out:
1328         mutex_unlock(&kvm->lock);
1329         return ret;
1330 }
1331
1332 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1333                                        struct kvm_device_attr *attr)
1334 {
1335         struct kvm_s390_vm_cpu_feat data;
1336
1337         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1338                 return -EFAULT;
1339         if (!bitmap_subset((unsigned long *) data.feat,
1340                            kvm_s390_available_cpu_feat,
1341                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1342                 return -EINVAL;
1343
1344         mutex_lock(&kvm->lock);
1345         if (kvm->created_vcpus) {
1346                 mutex_unlock(&kvm->lock);
1347                 return -EBUSY;
1348         }
1349         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1350                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1351         mutex_unlock(&kvm->lock);
1352         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1353                          data.feat[0],
1354                          data.feat[1],
1355                          data.feat[2]);
1356         return 0;
1357 }
1358
1359 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1360                                           struct kvm_device_attr *attr)
1361 {
1362         mutex_lock(&kvm->lock);
1363         if (kvm->created_vcpus) {
1364                 mutex_unlock(&kvm->lock);
1365                 return -EBUSY;
1366         }
1367
1368         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1369                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1370                 mutex_unlock(&kvm->lock);
1371                 return -EFAULT;
1372         }
1373         mutex_unlock(&kvm->lock);
1374
1375         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1380         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1383         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1386         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1389         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1392         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1395         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1398         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1399                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1401         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1402                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1404         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1405                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1407         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1408                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1410         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1413         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1416         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1418                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1419         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1420                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1421                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1422         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1423                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1424                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1425                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1426                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1427         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1428                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1429                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1430                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1431                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1432
1433         return 0;
1434 }
1435
1436 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1437 {
1438         int ret = -ENXIO;
1439
1440         switch (attr->attr) {
1441         case KVM_S390_VM_CPU_PROCESSOR:
1442                 ret = kvm_s390_set_processor(kvm, attr);
1443                 break;
1444         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1445                 ret = kvm_s390_set_processor_feat(kvm, attr);
1446                 break;
1447         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1448                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1449                 break;
1450         }
1451         return ret;
1452 }
1453
1454 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1455 {
1456         struct kvm_s390_vm_cpu_processor *proc;
1457         int ret = 0;
1458
1459         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1460         if (!proc) {
1461                 ret = -ENOMEM;
1462                 goto out;
1463         }
1464         proc->cpuid = kvm->arch.model.cpuid;
1465         proc->ibc = kvm->arch.model.ibc;
1466         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1467                S390_ARCH_FAC_LIST_SIZE_BYTE);
1468         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1469                  kvm->arch.model.ibc,
1470                  kvm->arch.model.cpuid);
1471         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1472                  kvm->arch.model.fac_list[0],
1473                  kvm->arch.model.fac_list[1],
1474                  kvm->arch.model.fac_list[2]);
1475         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1476                 ret = -EFAULT;
1477         kfree(proc);
1478 out:
1479         return ret;
1480 }
1481
1482 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_machine *mach;
1485         int ret = 0;
1486
1487         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1488         if (!mach) {
1489                 ret = -ENOMEM;
1490                 goto out;
1491         }
1492         get_cpu_id((struct cpuid *) &mach->cpuid);
1493         mach->ibc = sclp.ibc;
1494         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1495                S390_ARCH_FAC_LIST_SIZE_BYTE);
1496         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1497                sizeof(stfle_fac_list));
1498         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1499                  kvm->arch.model.ibc,
1500                  kvm->arch.model.cpuid);
1501         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1502                  mach->fac_mask[0],
1503                  mach->fac_mask[1],
1504                  mach->fac_mask[2]);
1505         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1506                  mach->fac_list[0],
1507                  mach->fac_list[1],
1508                  mach->fac_list[2]);
1509         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1510                 ret = -EFAULT;
1511         kfree(mach);
1512 out:
1513         return ret;
1514 }
1515
1516 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1517                                        struct kvm_device_attr *attr)
1518 {
1519         struct kvm_s390_vm_cpu_feat data;
1520
1521         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1522                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1523         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1524                 return -EFAULT;
1525         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1526                          data.feat[0],
1527                          data.feat[1],
1528                          data.feat[2]);
1529         return 0;
1530 }
1531
1532 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1533                                      struct kvm_device_attr *attr)
1534 {
1535         struct kvm_s390_vm_cpu_feat data;
1536
1537         bitmap_copy((unsigned long *) data.feat,
1538                     kvm_s390_available_cpu_feat,
1539                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1540         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1541                 return -EFAULT;
1542         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1543                          data.feat[0],
1544                          data.feat[1],
1545                          data.feat[2]);
1546         return 0;
1547 }
1548
1549 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1550                                           struct kvm_device_attr *attr)
1551 {
1552         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1553             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1554                 return -EFAULT;
1555
1556         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1561         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1564         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1567         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1570         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1573         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1576         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1579         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1582         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1585         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1588         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1591         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1594         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1597         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1599                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1600         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1602                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1603         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1605                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1606                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1607                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1608         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1610                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1611                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1612                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1613
1614         return 0;
1615 }
1616
1617 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1618                                         struct kvm_device_attr *attr)
1619 {
1620         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1621             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1622                 return -EFAULT;
1623
1624         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1629         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1630                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1631                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1632         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1633                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1635         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1636                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1638         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1639                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1641         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1642                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1644         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1645                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1646                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1647         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1648                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1649                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1650         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1651                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1653         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1654                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1656         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1657                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1658                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1659         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1660                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1662         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1663                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1665         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1666                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1667                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1668         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1669                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1670                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1671         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1672                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1673                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1674                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1675                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1676         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1677                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1678                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1679                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1680                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1681
1682         return 0;
1683 }
1684
1685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1686 {
1687         int ret = -ENXIO;
1688
1689         switch (attr->attr) {
1690         case KVM_S390_VM_CPU_PROCESSOR:
1691                 ret = kvm_s390_get_processor(kvm, attr);
1692                 break;
1693         case KVM_S390_VM_CPU_MACHINE:
1694                 ret = kvm_s390_get_machine(kvm, attr);
1695                 break;
1696         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1697                 ret = kvm_s390_get_processor_feat(kvm, attr);
1698                 break;
1699         case KVM_S390_VM_CPU_MACHINE_FEAT:
1700                 ret = kvm_s390_get_machine_feat(kvm, attr);
1701                 break;
1702         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1703                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1704                 break;
1705         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1706                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1707                 break;
1708         }
1709         return ret;
1710 }
1711
1712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1713 {
1714         int ret;
1715
1716         switch (attr->group) {
1717         case KVM_S390_VM_MEM_CTRL:
1718                 ret = kvm_s390_set_mem_control(kvm, attr);
1719                 break;
1720         case KVM_S390_VM_TOD:
1721                 ret = kvm_s390_set_tod(kvm, attr);
1722                 break;
1723         case KVM_S390_VM_CPU_MODEL:
1724                 ret = kvm_s390_set_cpu_model(kvm, attr);
1725                 break;
1726         case KVM_S390_VM_CRYPTO:
1727                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1728                 break;
1729         case KVM_S390_VM_MIGRATION:
1730                 ret = kvm_s390_vm_set_migration(kvm, attr);
1731                 break;
1732         default:
1733                 ret = -ENXIO;
1734                 break;
1735         }
1736
1737         return ret;
1738 }
1739
1740 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742         int ret;
1743
1744         switch (attr->group) {
1745         case KVM_S390_VM_MEM_CTRL:
1746                 ret = kvm_s390_get_mem_control(kvm, attr);
1747                 break;
1748         case KVM_S390_VM_TOD:
1749                 ret = kvm_s390_get_tod(kvm, attr);
1750                 break;
1751         case KVM_S390_VM_CPU_MODEL:
1752                 ret = kvm_s390_get_cpu_model(kvm, attr);
1753                 break;
1754         case KVM_S390_VM_MIGRATION:
1755                 ret = kvm_s390_vm_get_migration(kvm, attr);
1756                 break;
1757         default:
1758                 ret = -ENXIO;
1759                 break;
1760         }
1761
1762         return ret;
1763 }
1764
1765 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1766 {
1767         int ret;
1768
1769         switch (attr->group) {
1770         case KVM_S390_VM_MEM_CTRL:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1773                 case KVM_S390_VM_MEM_CLR_CMMA:
1774                         ret = sclp.has_cmma ? 0 : -ENXIO;
1775                         break;
1776                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1777                         ret = 0;
1778                         break;
1779                 default:
1780                         ret = -ENXIO;
1781                         break;
1782                 }
1783                 break;
1784         case KVM_S390_VM_TOD:
1785                 switch (attr->attr) {
1786                 case KVM_S390_VM_TOD_LOW:
1787                 case KVM_S390_VM_TOD_HIGH:
1788                         ret = 0;
1789                         break;
1790                 default:
1791                         ret = -ENXIO;
1792                         break;
1793                 }
1794                 break;
1795         case KVM_S390_VM_CPU_MODEL:
1796                 switch (attr->attr) {
1797                 case KVM_S390_VM_CPU_PROCESSOR:
1798                 case KVM_S390_VM_CPU_MACHINE:
1799                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1800                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1801                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1802                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1803                         ret = 0;
1804                         break;
1805                 default:
1806                         ret = -ENXIO;
1807                         break;
1808                 }
1809                 break;
1810         case KVM_S390_VM_CRYPTO:
1811                 switch (attr->attr) {
1812                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1813                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1814                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1815                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1816                         ret = 0;
1817                         break;
1818                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1819                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1820                         ret = ap_instructions_available() ? 0 : -ENXIO;
1821                         break;
1822                 default:
1823                         ret = -ENXIO;
1824                         break;
1825                 }
1826                 break;
1827         case KVM_S390_VM_MIGRATION:
1828                 ret = 0;
1829                 break;
1830         default:
1831                 ret = -ENXIO;
1832                 break;
1833         }
1834
1835         return ret;
1836 }
1837
1838 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839 {
1840         uint8_t *keys;
1841         uint64_t hva;
1842         int srcu_idx, i, r = 0;
1843
1844         if (args->flags != 0)
1845                 return -EINVAL;
1846
1847         /* Is this guest using storage keys? */
1848         if (!mm_uses_skeys(current->mm))
1849                 return KVM_S390_GET_SKEYS_NONE;
1850
1851         /* Enforce sane limit on memory allocation */
1852         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1853                 return -EINVAL;
1854
1855         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1856         if (!keys)
1857                 return -ENOMEM;
1858
1859         mmap_read_lock(current->mm);
1860         srcu_idx = srcu_read_lock(&kvm->srcu);
1861         for (i = 0; i < args->count; i++) {
1862                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1863                 if (kvm_is_error_hva(hva)) {
1864                         r = -EFAULT;
1865                         break;
1866                 }
1867
1868                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1869                 if (r)
1870                         break;
1871         }
1872         srcu_read_unlock(&kvm->srcu, srcu_idx);
1873         mmap_read_unlock(current->mm);
1874
1875         if (!r) {
1876                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1877                                  sizeof(uint8_t) * args->count);
1878                 if (r)
1879                         r = -EFAULT;
1880         }
1881
1882         kvfree(keys);
1883         return r;
1884 }
1885
1886 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1887 {
1888         uint8_t *keys;
1889         uint64_t hva;
1890         int srcu_idx, i, r = 0;
1891         bool unlocked;
1892
1893         if (args->flags != 0)
1894                 return -EINVAL;
1895
1896         /* Enforce sane limit on memory allocation */
1897         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1898                 return -EINVAL;
1899
1900         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1901         if (!keys)
1902                 return -ENOMEM;
1903
1904         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1905                            sizeof(uint8_t) * args->count);
1906         if (r) {
1907                 r = -EFAULT;
1908                 goto out;
1909         }
1910
1911         /* Enable storage key handling for the guest */
1912         r = s390_enable_skey();
1913         if (r)
1914                 goto out;
1915
1916         i = 0;
1917         mmap_read_lock(current->mm);
1918         srcu_idx = srcu_read_lock(&kvm->srcu);
1919         while (i < args->count) {
1920                 unlocked = false;
1921                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1922                 if (kvm_is_error_hva(hva)) {
1923                         r = -EFAULT;
1924                         break;
1925                 }
1926
1927                 /* Lowest order bit is reserved */
1928                 if (keys[i] & 0x01) {
1929                         r = -EINVAL;
1930                         break;
1931                 }
1932
1933                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1934                 if (r) {
1935                         r = fixup_user_fault(current->mm, hva,
1936                                              FAULT_FLAG_WRITE, &unlocked);
1937                         if (r)
1938                                 break;
1939                 }
1940                 if (!r)
1941                         i++;
1942         }
1943         srcu_read_unlock(&kvm->srcu, srcu_idx);
1944         mmap_read_unlock(current->mm);
1945 out:
1946         kvfree(keys);
1947         return r;
1948 }
1949
1950 /*
1951  * Base address and length must be sent at the start of each block, therefore
1952  * it's cheaper to send some clean data, as long as it's less than the size of
1953  * two longs.
1954  */
1955 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1956 /* for consistency */
1957 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1958
1959 /*
1960  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1961  * address falls in a hole. In that case the index of one of the memslots
1962  * bordering the hole is returned.
1963  */
1964 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1965 {
1966         int start = 0, end = slots->used_slots;
1967         int slot = atomic_read(&slots->last_used_slot);
1968         struct kvm_memory_slot *memslots = slots->memslots;
1969
1970         if (gfn >= memslots[slot].base_gfn &&
1971             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1972                 return slot;
1973
1974         while (start < end) {
1975                 slot = start + (end - start) / 2;
1976
1977                 if (gfn >= memslots[slot].base_gfn)
1978                         end = slot;
1979                 else
1980                         start = slot + 1;
1981         }
1982
1983         if (start >= slots->used_slots)
1984                 return slots->used_slots - 1;
1985
1986         if (gfn >= memslots[start].base_gfn &&
1987             gfn < memslots[start].base_gfn + memslots[start].npages) {
1988                 atomic_set(&slots->last_used_slot, start);
1989         }
1990
1991         return start;
1992 }
1993
1994 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995                               u8 *res, unsigned long bufsize)
1996 {
1997         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1998
1999         args->count = 0;
2000         while (args->count < bufsize) {
2001                 hva = gfn_to_hva(kvm, cur_gfn);
2002                 /*
2003                  * We return an error if the first value was invalid, but we
2004                  * return successfully if at least one value was copied.
2005                  */
2006                 if (kvm_is_error_hva(hva))
2007                         return args->count ? 0 : -EFAULT;
2008                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2009                         pgstev = 0;
2010                 res[args->count++] = (pgstev >> 24) & 0x43;
2011                 cur_gfn++;
2012         }
2013
2014         return 0;
2015 }
2016
2017 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2018                                               unsigned long cur_gfn)
2019 {
2020         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2021         struct kvm_memory_slot *ms = slots->memslots + slotidx;
2022         unsigned long ofs = cur_gfn - ms->base_gfn;
2023
2024         if (ms->base_gfn + ms->npages <= cur_gfn) {
2025                 slotidx--;
2026                 /* If we are above the highest slot, wrap around */
2027                 if (slotidx < 0)
2028                         slotidx = slots->used_slots - 1;
2029
2030                 ms = slots->memslots + slotidx;
2031                 ofs = 0;
2032         }
2033         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2034         while ((slotidx > 0) && (ofs >= ms->npages)) {
2035                 slotidx--;
2036                 ms = slots->memslots + slotidx;
2037                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2038         }
2039         return ms->base_gfn + ofs;
2040 }
2041
2042 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2043                              u8 *res, unsigned long bufsize)
2044 {
2045         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2046         struct kvm_memslots *slots = kvm_memslots(kvm);
2047         struct kvm_memory_slot *ms;
2048
2049         if (unlikely(!slots->used_slots))
2050                 return 0;
2051
2052         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2053         ms = gfn_to_memslot(kvm, cur_gfn);
2054         args->count = 0;
2055         args->start_gfn = cur_gfn;
2056         if (!ms)
2057                 return 0;
2058         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2059         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2060
2061         while (args->count < bufsize) {
2062                 hva = gfn_to_hva(kvm, cur_gfn);
2063                 if (kvm_is_error_hva(hva))
2064                         return 0;
2065                 /* Decrement only if we actually flipped the bit to 0 */
2066                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2067                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2068                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2069                         pgstev = 0;
2070                 /* Save the value */
2071                 res[args->count++] = (pgstev >> 24) & 0x43;
2072                 /* If the next bit is too far away, stop. */
2073                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2074                         return 0;
2075                 /* If we reached the previous "next", find the next one */
2076                 if (cur_gfn == next_gfn)
2077                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2078                 /* Reached the end of memory or of the buffer, stop */
2079                 if ((next_gfn >= mem_end) ||
2080                     (next_gfn - args->start_gfn >= bufsize))
2081                         return 0;
2082                 cur_gfn++;
2083                 /* Reached the end of the current memslot, take the next one. */
2084                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2085                         ms = gfn_to_memslot(kvm, cur_gfn);
2086                         if (!ms)
2087                                 return 0;
2088                 }
2089         }
2090         return 0;
2091 }
2092
2093 /*
2094  * This function searches for the next page with dirty CMMA attributes, and
2095  * saves the attributes in the buffer up to either the end of the buffer or
2096  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2097  * no trailing clean bytes are saved.
2098  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2099  * output buffer will indicate 0 as length.
2100  */
2101 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2102                                   struct kvm_s390_cmma_log *args)
2103 {
2104         unsigned long bufsize;
2105         int srcu_idx, peek, ret;
2106         u8 *values;
2107
2108         if (!kvm->arch.use_cmma)
2109                 return -ENXIO;
2110         /* Invalid/unsupported flags were specified */
2111         if (args->flags & ~KVM_S390_CMMA_PEEK)
2112                 return -EINVAL;
2113         /* Migration mode query, and we are not doing a migration */
2114         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2115         if (!peek && !kvm->arch.migration_mode)
2116                 return -EINVAL;
2117         /* CMMA is disabled or was not used, or the buffer has length zero */
2118         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2119         if (!bufsize || !kvm->mm->context.uses_cmm) {
2120                 memset(args, 0, sizeof(*args));
2121                 return 0;
2122         }
2123         /* We are not peeking, and there are no dirty pages */
2124         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2125                 memset(args, 0, sizeof(*args));
2126                 return 0;
2127         }
2128
2129         values = vmalloc(bufsize);
2130         if (!values)
2131                 return -ENOMEM;
2132
2133         mmap_read_lock(kvm->mm);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         if (peek)
2136                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2137         else
2138                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2139         srcu_read_unlock(&kvm->srcu, srcu_idx);
2140         mmap_read_unlock(kvm->mm);
2141
2142         if (kvm->arch.migration_mode)
2143                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2144         else
2145                 args->remaining = 0;
2146
2147         if (copy_to_user((void __user *)args->values, values, args->count))
2148                 ret = -EFAULT;
2149
2150         vfree(values);
2151         return ret;
2152 }
2153
2154 /*
2155  * This function sets the CMMA attributes for the given pages. If the input
2156  * buffer has zero length, no action is taken, otherwise the attributes are
2157  * set and the mm->context.uses_cmm flag is set.
2158  */
2159 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2160                                   const struct kvm_s390_cmma_log *args)
2161 {
2162         unsigned long hva, mask, pgstev, i;
2163         uint8_t *bits;
2164         int srcu_idx, r = 0;
2165
2166         mask = args->mask;
2167
2168         if (!kvm->arch.use_cmma)
2169                 return -ENXIO;
2170         /* invalid/unsupported flags */
2171         if (args->flags != 0)
2172                 return -EINVAL;
2173         /* Enforce sane limit on memory allocation */
2174         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2175                 return -EINVAL;
2176         /* Nothing to do */
2177         if (args->count == 0)
2178                 return 0;
2179
2180         bits = vmalloc(array_size(sizeof(*bits), args->count));
2181         if (!bits)
2182                 return -ENOMEM;
2183
2184         r = copy_from_user(bits, (void __user *)args->values, args->count);
2185         if (r) {
2186                 r = -EFAULT;
2187                 goto out;
2188         }
2189
2190         mmap_read_lock(kvm->mm);
2191         srcu_idx = srcu_read_lock(&kvm->srcu);
2192         for (i = 0; i < args->count; i++) {
2193                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2194                 if (kvm_is_error_hva(hva)) {
2195                         r = -EFAULT;
2196                         break;
2197                 }
2198
2199                 pgstev = bits[i];
2200                 pgstev = pgstev << 24;
2201                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2202                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2203         }
2204         srcu_read_unlock(&kvm->srcu, srcu_idx);
2205         mmap_read_unlock(kvm->mm);
2206
2207         if (!kvm->mm->context.uses_cmm) {
2208                 mmap_write_lock(kvm->mm);
2209                 kvm->mm->context.uses_cmm = 1;
2210                 mmap_write_unlock(kvm->mm);
2211         }
2212 out:
2213         vfree(bits);
2214         return r;
2215 }
2216
2217 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2218 {
2219         struct kvm_vcpu *vcpu;
2220         u16 rc, rrc;
2221         int ret = 0;
2222         int i;
2223
2224         /*
2225          * We ignore failures and try to destroy as many CPUs as possible.
2226          * At the same time we must not free the assigned resources when
2227          * this fails, as the ultravisor has still access to that memory.
2228          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2229          * behind.
2230          * We want to return the first failure rc and rrc, though.
2231          */
2232         kvm_for_each_vcpu(i, vcpu, kvm) {
2233                 mutex_lock(&vcpu->mutex);
2234                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2235                         *rcp = rc;
2236                         *rrcp = rrc;
2237                         ret = -EIO;
2238                 }
2239                 mutex_unlock(&vcpu->mutex);
2240         }
2241         return ret;
2242 }
2243
2244 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2245 {
2246         int i, r = 0;
2247         u16 dummy;
2248
2249         struct kvm_vcpu *vcpu;
2250
2251         kvm_for_each_vcpu(i, vcpu, kvm) {
2252                 mutex_lock(&vcpu->mutex);
2253                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2254                 mutex_unlock(&vcpu->mutex);
2255                 if (r)
2256                         break;
2257         }
2258         if (r)
2259                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2260         return r;
2261 }
2262
2263 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2264 {
2265         int r = 0;
2266         u16 dummy;
2267         void __user *argp = (void __user *)cmd->data;
2268
2269         switch (cmd->cmd) {
2270         case KVM_PV_ENABLE: {
2271                 r = -EINVAL;
2272                 if (kvm_s390_pv_is_protected(kvm))
2273                         break;
2274
2275                 /*
2276                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2277                  *  esca, we need no cleanup in the error cases below
2278                  */
2279                 r = sca_switch_to_extended(kvm);
2280                 if (r)
2281                         break;
2282
2283                 mmap_write_lock(current->mm);
2284                 r = gmap_mark_unmergeable();
2285                 mmap_write_unlock(current->mm);
2286                 if (r)
2287                         break;
2288
2289                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2290                 if (r)
2291                         break;
2292
2293                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2294                 if (r)
2295                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2296
2297                 /* we need to block service interrupts from now on */
2298                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2299                 break;
2300         }
2301         case KVM_PV_DISABLE: {
2302                 r = -EINVAL;
2303                 if (!kvm_s390_pv_is_protected(kvm))
2304                         break;
2305
2306                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2307                 /*
2308                  * If a CPU could not be destroyed, destroy VM will also fail.
2309                  * There is no point in trying to destroy it. Instead return
2310                  * the rc and rrc from the first CPU that failed destroying.
2311                  */
2312                 if (r)
2313                         break;
2314                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2315
2316                 /* no need to block service interrupts any more */
2317                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2318                 break;
2319         }
2320         case KVM_PV_SET_SEC_PARMS: {
2321                 struct kvm_s390_pv_sec_parm parms = {};
2322                 void *hdr;
2323
2324                 r = -EINVAL;
2325                 if (!kvm_s390_pv_is_protected(kvm))
2326                         break;
2327
2328                 r = -EFAULT;
2329                 if (copy_from_user(&parms, argp, sizeof(parms)))
2330                         break;
2331
2332                 /* Currently restricted to 8KB */
2333                 r = -EINVAL;
2334                 if (parms.length > PAGE_SIZE * 2)
2335                         break;
2336
2337                 r = -ENOMEM;
2338                 hdr = vmalloc(parms.length);
2339                 if (!hdr)
2340                         break;
2341
2342                 r = -EFAULT;
2343                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2344                                     parms.length))
2345                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2346                                                       &cmd->rc, &cmd->rrc);
2347
2348                 vfree(hdr);
2349                 break;
2350         }
2351         case KVM_PV_UNPACK: {
2352                 struct kvm_s390_pv_unp unp = {};
2353
2354                 r = -EINVAL;
2355                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2356                         break;
2357
2358                 r = -EFAULT;
2359                 if (copy_from_user(&unp, argp, sizeof(unp)))
2360                         break;
2361
2362                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2363                                        &cmd->rc, &cmd->rrc);
2364                 break;
2365         }
2366         case KVM_PV_VERIFY: {
2367                 r = -EINVAL;
2368                 if (!kvm_s390_pv_is_protected(kvm))
2369                         break;
2370
2371                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2373                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2374                              cmd->rrc);
2375                 break;
2376         }
2377         case KVM_PV_PREP_RESET: {
2378                 r = -EINVAL;
2379                 if (!kvm_s390_pv_is_protected(kvm))
2380                         break;
2381
2382                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2384                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2385                              cmd->rc, cmd->rrc);
2386                 break;
2387         }
2388         case KVM_PV_UNSHARE_ALL: {
2389                 r = -EINVAL;
2390                 if (!kvm_s390_pv_is_protected(kvm))
2391                         break;
2392
2393                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2394                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2395                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2396                              cmd->rc, cmd->rrc);
2397                 break;
2398         }
2399         default:
2400                 r = -ENOTTY;
2401         }
2402         return r;
2403 }
2404
2405 long kvm_arch_vm_ioctl(struct file *filp,
2406                        unsigned int ioctl, unsigned long arg)
2407 {
2408         struct kvm *kvm = filp->private_data;
2409         void __user *argp = (void __user *)arg;
2410         struct kvm_device_attr attr;
2411         int r;
2412
2413         switch (ioctl) {
2414         case KVM_S390_INTERRUPT: {
2415                 struct kvm_s390_interrupt s390int;
2416
2417                 r = -EFAULT;
2418                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2419                         break;
2420                 r = kvm_s390_inject_vm(kvm, &s390int);
2421                 break;
2422         }
2423         case KVM_CREATE_IRQCHIP: {
2424                 struct kvm_irq_routing_entry routing;
2425
2426                 r = -EINVAL;
2427                 if (kvm->arch.use_irqchip) {
2428                         /* Set up dummy routing. */
2429                         memset(&routing, 0, sizeof(routing));
2430                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2431                 }
2432                 break;
2433         }
2434         case KVM_SET_DEVICE_ATTR: {
2435                 r = -EFAULT;
2436                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2437                         break;
2438                 r = kvm_s390_vm_set_attr(kvm, &attr);
2439                 break;
2440         }
2441         case KVM_GET_DEVICE_ATTR: {
2442                 r = -EFAULT;
2443                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2444                         break;
2445                 r = kvm_s390_vm_get_attr(kvm, &attr);
2446                 break;
2447         }
2448         case KVM_HAS_DEVICE_ATTR: {
2449                 r = -EFAULT;
2450                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2451                         break;
2452                 r = kvm_s390_vm_has_attr(kvm, &attr);
2453                 break;
2454         }
2455         case KVM_S390_GET_SKEYS: {
2456                 struct kvm_s390_skeys args;
2457
2458                 r = -EFAULT;
2459                 if (copy_from_user(&args, argp,
2460                                    sizeof(struct kvm_s390_skeys)))
2461                         break;
2462                 r = kvm_s390_get_skeys(kvm, &args);
2463                 break;
2464         }
2465         case KVM_S390_SET_SKEYS: {
2466                 struct kvm_s390_skeys args;
2467
2468                 r = -EFAULT;
2469                 if (copy_from_user(&args, argp,
2470                                    sizeof(struct kvm_s390_skeys)))
2471                         break;
2472                 r = kvm_s390_set_skeys(kvm, &args);
2473                 break;
2474         }
2475         case KVM_S390_GET_CMMA_BITS: {
2476                 struct kvm_s390_cmma_log args;
2477
2478                 r = -EFAULT;
2479                 if (copy_from_user(&args, argp, sizeof(args)))
2480                         break;
2481                 mutex_lock(&kvm->slots_lock);
2482                 r = kvm_s390_get_cmma_bits(kvm, &args);
2483                 mutex_unlock(&kvm->slots_lock);
2484                 if (!r) {
2485                         r = copy_to_user(argp, &args, sizeof(args));
2486                         if (r)
2487                                 r = -EFAULT;
2488                 }
2489                 break;
2490         }
2491         case KVM_S390_SET_CMMA_BITS: {
2492                 struct kvm_s390_cmma_log args;
2493
2494                 r = -EFAULT;
2495                 if (copy_from_user(&args, argp, sizeof(args)))
2496                         break;
2497                 mutex_lock(&kvm->slots_lock);
2498                 r = kvm_s390_set_cmma_bits(kvm, &args);
2499                 mutex_unlock(&kvm->slots_lock);
2500                 break;
2501         }
2502         case KVM_S390_PV_COMMAND: {
2503                 struct kvm_pv_cmd args;
2504
2505                 /* protvirt means user sigp */
2506                 kvm->arch.user_cpu_state_ctrl = 1;
2507                 r = 0;
2508                 if (!is_prot_virt_host()) {
2509                         r = -EINVAL;
2510                         break;
2511                 }
2512                 if (copy_from_user(&args, argp, sizeof(args))) {
2513                         r = -EFAULT;
2514                         break;
2515                 }
2516                 if (args.flags) {
2517                         r = -EINVAL;
2518                         break;
2519                 }
2520                 mutex_lock(&kvm->lock);
2521                 r = kvm_s390_handle_pv(kvm, &args);
2522                 mutex_unlock(&kvm->lock);
2523                 if (copy_to_user(argp, &args, sizeof(args))) {
2524                         r = -EFAULT;
2525                         break;
2526                 }
2527                 break;
2528         }
2529         default:
2530                 r = -ENOTTY;
2531         }
2532
2533         return r;
2534 }
2535
2536 static int kvm_s390_apxa_installed(void)
2537 {
2538         struct ap_config_info info;
2539
2540         if (ap_instructions_available()) {
2541                 if (ap_qci(&info) == 0)
2542                         return info.apxa;
2543         }
2544
2545         return 0;
2546 }
2547
2548 /*
2549  * The format of the crypto control block (CRYCB) is specified in the 3 low
2550  * order bits of the CRYCB designation (CRYCBD) field as follows:
2551  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2552  *           AP extended addressing (APXA) facility are installed.
2553  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2554  * Format 2: Both the APXA and MSAX3 facilities are installed
2555  */
2556 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2557 {
2558         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2559
2560         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2561         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2562
2563         /* Check whether MSAX3 is installed */
2564         if (!test_kvm_facility(kvm, 76))
2565                 return;
2566
2567         if (kvm_s390_apxa_installed())
2568                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2569         else
2570                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2571 }
2572
2573 /*
2574  * kvm_arch_crypto_set_masks
2575  *
2576  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2577  *       to be set.
2578  * @apm: the mask identifying the accessible AP adapters
2579  * @aqm: the mask identifying the accessible AP domains
2580  * @adm: the mask identifying the accessible AP control domains
2581  *
2582  * Set the masks that identify the adapters, domains and control domains to
2583  * which the KVM guest is granted access.
2584  *
2585  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2586  *       function.
2587  */
2588 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2589                                unsigned long *aqm, unsigned long *adm)
2590 {
2591         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2592
2593         kvm_s390_vcpu_block_all(kvm);
2594
2595         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2596         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2597                 memcpy(crycb->apcb1.apm, apm, 32);
2598                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2599                          apm[0], apm[1], apm[2], apm[3]);
2600                 memcpy(crycb->apcb1.aqm, aqm, 32);
2601                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2602                          aqm[0], aqm[1], aqm[2], aqm[3]);
2603                 memcpy(crycb->apcb1.adm, adm, 32);
2604                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2605                          adm[0], adm[1], adm[2], adm[3]);
2606                 break;
2607         case CRYCB_FORMAT1:
2608         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2609                 memcpy(crycb->apcb0.apm, apm, 8);
2610                 memcpy(crycb->apcb0.aqm, aqm, 2);
2611                 memcpy(crycb->apcb0.adm, adm, 2);
2612                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2613                          apm[0], *((unsigned short *)aqm),
2614                          *((unsigned short *)adm));
2615                 break;
2616         default:        /* Can not happen */
2617                 break;
2618         }
2619
2620         /* recreate the shadow crycb for each vcpu */
2621         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2622         kvm_s390_vcpu_unblock_all(kvm);
2623 }
2624 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2625
2626 /*
2627  * kvm_arch_crypto_clear_masks
2628  *
2629  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2630  *       to be cleared.
2631  *
2632  * Clear the masks that identify the adapters, domains and control domains to
2633  * which the KVM guest is granted access.
2634  *
2635  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2636  *       function.
2637  */
2638 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2639 {
2640         kvm_s390_vcpu_block_all(kvm);
2641
2642         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2643                sizeof(kvm->arch.crypto.crycb->apcb0));
2644         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2645                sizeof(kvm->arch.crypto.crycb->apcb1));
2646
2647         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2648         /* recreate the shadow crycb for each vcpu */
2649         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2650         kvm_s390_vcpu_unblock_all(kvm);
2651 }
2652 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2653
2654 static u64 kvm_s390_get_initial_cpuid(void)
2655 {
2656         struct cpuid cpuid;
2657
2658         get_cpu_id(&cpuid);
2659         cpuid.version = 0xff;
2660         return *((u64 *) &cpuid);
2661 }
2662
2663 static void kvm_s390_crypto_init(struct kvm *kvm)
2664 {
2665         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2666         kvm_s390_set_crycb_format(kvm);
2667         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2668
2669         if (!test_kvm_facility(kvm, 76))
2670                 return;
2671
2672         /* Enable AES/DEA protected key functions by default */
2673         kvm->arch.crypto.aes_kw = 1;
2674         kvm->arch.crypto.dea_kw = 1;
2675         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2676                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2677         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2678                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2679 }
2680
2681 static void sca_dispose(struct kvm *kvm)
2682 {
2683         if (kvm->arch.use_esca)
2684                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2685         else
2686                 free_page((unsigned long)(kvm->arch.sca));
2687         kvm->arch.sca = NULL;
2688 }
2689
2690 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2691 {
2692         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2693         int i, rc;
2694         char debug_name[16];
2695         static unsigned long sca_offset;
2696
2697         rc = -EINVAL;
2698 #ifdef CONFIG_KVM_S390_UCONTROL
2699         if (type & ~KVM_VM_S390_UCONTROL)
2700                 goto out_err;
2701         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2702                 goto out_err;
2703 #else
2704         if (type)
2705                 goto out_err;
2706 #endif
2707
2708         rc = s390_enable_sie();
2709         if (rc)
2710                 goto out_err;
2711
2712         rc = -ENOMEM;
2713
2714         if (!sclp.has_64bscao)
2715                 alloc_flags |= GFP_DMA;
2716         rwlock_init(&kvm->arch.sca_lock);
2717         /* start with basic SCA */
2718         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2719         if (!kvm->arch.sca)
2720                 goto out_err;
2721         mutex_lock(&kvm_lock);
2722         sca_offset += 16;
2723         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2724                 sca_offset = 0;
2725         kvm->arch.sca = (struct bsca_block *)
2726                         ((char *) kvm->arch.sca + sca_offset);
2727         mutex_unlock(&kvm_lock);
2728
2729         sprintf(debug_name, "kvm-%u", current->pid);
2730
2731         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2732         if (!kvm->arch.dbf)
2733                 goto out_err;
2734
2735         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2736         kvm->arch.sie_page2 =
2737              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2738         if (!kvm->arch.sie_page2)
2739                 goto out_err;
2740
2741         kvm->arch.sie_page2->kvm = kvm;
2742         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2743
2744         for (i = 0; i < kvm_s390_fac_size(); i++) {
2745                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2746                                               (kvm_s390_fac_base[i] |
2747                                                kvm_s390_fac_ext[i]);
2748                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2749                                               kvm_s390_fac_base[i];
2750         }
2751         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2752
2753         /* we are always in czam mode - even on pre z14 machines */
2754         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2755         set_kvm_facility(kvm->arch.model.fac_list, 138);
2756         /* we emulate STHYI in kvm */
2757         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2758         set_kvm_facility(kvm->arch.model.fac_list, 74);
2759         if (MACHINE_HAS_TLB_GUEST) {
2760                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2761                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2762         }
2763
2764         if (css_general_characteristics.aiv && test_facility(65))
2765                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2766
2767         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2768         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2769
2770         kvm_s390_crypto_init(kvm);
2771
2772         mutex_init(&kvm->arch.float_int.ais_lock);
2773         spin_lock_init(&kvm->arch.float_int.lock);
2774         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2775                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2776         init_waitqueue_head(&kvm->arch.ipte_wq);
2777         mutex_init(&kvm->arch.ipte_mutex);
2778
2779         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2780         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2781
2782         if (type & KVM_VM_S390_UCONTROL) {
2783                 kvm->arch.gmap = NULL;
2784                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2785         } else {
2786                 if (sclp.hamax == U64_MAX)
2787                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2788                 else
2789                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2790                                                     sclp.hamax + 1);
2791                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2792                 if (!kvm->arch.gmap)
2793                         goto out_err;
2794                 kvm->arch.gmap->private = kvm;
2795                 kvm->arch.gmap->pfault_enabled = 0;
2796         }
2797
2798         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2799         kvm->arch.use_skf = sclp.has_skey;
2800         spin_lock_init(&kvm->arch.start_stop_lock);
2801         kvm_s390_vsie_init(kvm);
2802         if (use_gisa)
2803                 kvm_s390_gisa_init(kvm);
2804         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2805
2806         return 0;
2807 out_err:
2808         free_page((unsigned long)kvm->arch.sie_page2);
2809         debug_unregister(kvm->arch.dbf);
2810         sca_dispose(kvm);
2811         KVM_EVENT(3, "creation of vm failed: %d", rc);
2812         return rc;
2813 }
2814
2815 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2816 {
2817         u16 rc, rrc;
2818
2819         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2820         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2821         kvm_s390_clear_local_irqs(vcpu);
2822         kvm_clear_async_pf_completion_queue(vcpu);
2823         if (!kvm_is_ucontrol(vcpu->kvm))
2824                 sca_del_vcpu(vcpu);
2825
2826         if (kvm_is_ucontrol(vcpu->kvm))
2827                 gmap_remove(vcpu->arch.gmap);
2828
2829         if (vcpu->kvm->arch.use_cmma)
2830                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2831         /* We can not hold the vcpu mutex here, we are already dying */
2832         if (kvm_s390_pv_cpu_get_handle(vcpu))
2833                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2834         free_page((unsigned long)(vcpu->arch.sie_block));
2835 }
2836
2837 static void kvm_free_vcpus(struct kvm *kvm)
2838 {
2839         unsigned int i;
2840         struct kvm_vcpu *vcpu;
2841
2842         kvm_for_each_vcpu(i, vcpu, kvm)
2843                 kvm_vcpu_destroy(vcpu);
2844
2845         mutex_lock(&kvm->lock);
2846         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2847                 kvm->vcpus[i] = NULL;
2848
2849         atomic_set(&kvm->online_vcpus, 0);
2850         mutex_unlock(&kvm->lock);
2851 }
2852
2853 void kvm_arch_destroy_vm(struct kvm *kvm)
2854 {
2855         u16 rc, rrc;
2856
2857         kvm_free_vcpus(kvm);
2858         sca_dispose(kvm);
2859         kvm_s390_gisa_destroy(kvm);
2860         /*
2861          * We are already at the end of life and kvm->lock is not taken.
2862          * This is ok as the file descriptor is closed by now and nobody
2863          * can mess with the pv state. To avoid lockdep_assert_held from
2864          * complaining we do not use kvm_s390_pv_is_protected.
2865          */
2866         if (kvm_s390_pv_get_handle(kvm))
2867                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2868         debug_unregister(kvm->arch.dbf);
2869         free_page((unsigned long)kvm->arch.sie_page2);
2870         if (!kvm_is_ucontrol(kvm))
2871                 gmap_remove(kvm->arch.gmap);
2872         kvm_s390_destroy_adapters(kvm);
2873         kvm_s390_clear_float_irqs(kvm);
2874         kvm_s390_vsie_destroy(kvm);
2875         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2876 }
2877
2878 /* Section: vcpu related */
2879 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2880 {
2881         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2882         if (!vcpu->arch.gmap)
2883                 return -ENOMEM;
2884         vcpu->arch.gmap->private = vcpu->kvm;
2885
2886         return 0;
2887 }
2888
2889 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2890 {
2891         if (!kvm_s390_use_sca_entries())
2892                 return;
2893         read_lock(&vcpu->kvm->arch.sca_lock);
2894         if (vcpu->kvm->arch.use_esca) {
2895                 struct esca_block *sca = vcpu->kvm->arch.sca;
2896
2897                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2898                 sca->cpu[vcpu->vcpu_id].sda = 0;
2899         } else {
2900                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2901
2902                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2903                 sca->cpu[vcpu->vcpu_id].sda = 0;
2904         }
2905         read_unlock(&vcpu->kvm->arch.sca_lock);
2906 }
2907
2908 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2909 {
2910         if (!kvm_s390_use_sca_entries()) {
2911                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2912
2913                 /* we still need the basic sca for the ipte control */
2914                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2915                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2916                 return;
2917         }
2918         read_lock(&vcpu->kvm->arch.sca_lock);
2919         if (vcpu->kvm->arch.use_esca) {
2920                 struct esca_block *sca = vcpu->kvm->arch.sca;
2921
2922                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2923                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2924                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2925                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2926                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2927         } else {
2928                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2929
2930                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2931                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2932                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2933                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2934         }
2935         read_unlock(&vcpu->kvm->arch.sca_lock);
2936 }
2937
2938 /* Basic SCA to Extended SCA data copy routines */
2939 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2940 {
2941         d->sda = s->sda;
2942         d->sigp_ctrl.c = s->sigp_ctrl.c;
2943         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2944 }
2945
2946 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2947 {
2948         int i;
2949
2950         d->ipte_control = s->ipte_control;
2951         d->mcn[0] = s->mcn;
2952         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2953                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2954 }
2955
2956 static int sca_switch_to_extended(struct kvm *kvm)
2957 {
2958         struct bsca_block *old_sca = kvm->arch.sca;
2959         struct esca_block *new_sca;
2960         struct kvm_vcpu *vcpu;
2961         unsigned int vcpu_idx;
2962         u32 scaol, scaoh;
2963
2964         if (kvm->arch.use_esca)
2965                 return 0;
2966
2967         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2968         if (!new_sca)
2969                 return -ENOMEM;
2970
2971         scaoh = (u32)((u64)(new_sca) >> 32);
2972         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2973
2974         kvm_s390_vcpu_block_all(kvm);
2975         write_lock(&kvm->arch.sca_lock);
2976
2977         sca_copy_b_to_e(new_sca, old_sca);
2978
2979         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2980                 vcpu->arch.sie_block->scaoh = scaoh;
2981                 vcpu->arch.sie_block->scaol = scaol;
2982                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2983         }
2984         kvm->arch.sca = new_sca;
2985         kvm->arch.use_esca = 1;
2986
2987         write_unlock(&kvm->arch.sca_lock);
2988         kvm_s390_vcpu_unblock_all(kvm);
2989
2990         free_page((unsigned long)old_sca);
2991
2992         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2993                  old_sca, kvm->arch.sca);
2994         return 0;
2995 }
2996
2997 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2998 {
2999         int rc;
3000
3001         if (!kvm_s390_use_sca_entries()) {
3002                 if (id < KVM_MAX_VCPUS)
3003                         return true;
3004                 return false;
3005         }
3006         if (id < KVM_S390_BSCA_CPU_SLOTS)
3007                 return true;
3008         if (!sclp.has_esca || !sclp.has_64bscao)
3009                 return false;
3010
3011         mutex_lock(&kvm->lock);
3012         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3013         mutex_unlock(&kvm->lock);
3014
3015         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3016 }
3017
3018 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3019 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3020 {
3021         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3022         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3023         vcpu->arch.cputm_start = get_tod_clock_fast();
3024         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3025 }
3026
3027 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3028 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3029 {
3030         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3031         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3032         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3033         vcpu->arch.cputm_start = 0;
3034         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3035 }
3036
3037 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3038 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3039 {
3040         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3041         vcpu->arch.cputm_enabled = true;
3042         __start_cpu_timer_accounting(vcpu);
3043 }
3044
3045 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3046 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3047 {
3048         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3049         __stop_cpu_timer_accounting(vcpu);
3050         vcpu->arch.cputm_enabled = false;
3051 }
3052
3053 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3054 {
3055         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3056         __enable_cpu_timer_accounting(vcpu);
3057         preempt_enable();
3058 }
3059
3060 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3061 {
3062         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3063         __disable_cpu_timer_accounting(vcpu);
3064         preempt_enable();
3065 }
3066
3067 /* set the cpu timer - may only be called from the VCPU thread itself */
3068 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3069 {
3070         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3071         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3072         if (vcpu->arch.cputm_enabled)
3073                 vcpu->arch.cputm_start = get_tod_clock_fast();
3074         vcpu->arch.sie_block->cputm = cputm;
3075         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3076         preempt_enable();
3077 }
3078
3079 /* update and get the cpu timer - can also be called from other VCPU threads */
3080 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3081 {
3082         unsigned int seq;
3083         __u64 value;
3084
3085         if (unlikely(!vcpu->arch.cputm_enabled))
3086                 return vcpu->arch.sie_block->cputm;
3087
3088         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3089         do {
3090                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3091                 /*
3092                  * If the writer would ever execute a read in the critical
3093                  * section, e.g. in irq context, we have a deadlock.
3094                  */
3095                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3096                 value = vcpu->arch.sie_block->cputm;
3097                 /* if cputm_start is 0, accounting is being started/stopped */
3098                 if (likely(vcpu->arch.cputm_start))
3099                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3100         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3101         preempt_enable();
3102         return value;
3103 }
3104
3105 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3106 {
3107
3108         gmap_enable(vcpu->arch.enabled_gmap);
3109         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3110         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3111                 __start_cpu_timer_accounting(vcpu);
3112         vcpu->cpu = cpu;
3113 }
3114
3115 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3116 {
3117         vcpu->cpu = -1;
3118         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3119                 __stop_cpu_timer_accounting(vcpu);
3120         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3121         vcpu->arch.enabled_gmap = gmap_get_enabled();
3122         gmap_disable(vcpu->arch.enabled_gmap);
3123
3124 }
3125
3126 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3127 {
3128         mutex_lock(&vcpu->kvm->lock);
3129         preempt_disable();
3130         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3131         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3132         preempt_enable();
3133         mutex_unlock(&vcpu->kvm->lock);
3134         if (!kvm_is_ucontrol(vcpu->kvm)) {
3135                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3136                 sca_add_vcpu(vcpu);
3137         }
3138         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3139                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3140         /* make vcpu_load load the right gmap on the first trigger */
3141         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3142 }
3143
3144 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3145 {
3146         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3147             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3148                 return true;
3149         return false;
3150 }
3151
3152 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3153 {
3154         /* At least one ECC subfunction must be present */
3155         return kvm_has_pckmo_subfunc(kvm, 32) ||
3156                kvm_has_pckmo_subfunc(kvm, 33) ||
3157                kvm_has_pckmo_subfunc(kvm, 34) ||
3158                kvm_has_pckmo_subfunc(kvm, 40) ||
3159                kvm_has_pckmo_subfunc(kvm, 41);
3160
3161 }
3162
3163 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3164 {
3165         /*
3166          * If the AP instructions are not being interpreted and the MSAX3
3167          * facility is not configured for the guest, there is nothing to set up.
3168          */
3169         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3170                 return;
3171
3172         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3173         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3174         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3175         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3176
3177         if (vcpu->kvm->arch.crypto.apie)
3178                 vcpu->arch.sie_block->eca |= ECA_APIE;
3179
3180         /* Set up protected key support */
3181         if (vcpu->kvm->arch.crypto.aes_kw) {
3182                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3183                 /* ecc is also wrapped with AES key */
3184                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3185                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3186         }
3187
3188         if (vcpu->kvm->arch.crypto.dea_kw)
3189                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3190 }
3191
3192 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3193 {
3194         free_page(vcpu->arch.sie_block->cbrlo);
3195         vcpu->arch.sie_block->cbrlo = 0;
3196 }
3197
3198 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3199 {
3200         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3201         if (!vcpu->arch.sie_block->cbrlo)
3202                 return -ENOMEM;
3203         return 0;
3204 }
3205
3206 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3207 {
3208         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3209
3210         vcpu->arch.sie_block->ibc = model->ibc;
3211         if (test_kvm_facility(vcpu->kvm, 7))
3212                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3213 }
3214
3215 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3216 {
3217         int rc = 0;
3218         u16 uvrc, uvrrc;
3219
3220         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3221                                                     CPUSTAT_SM |
3222                                                     CPUSTAT_STOPPED);
3223
3224         if (test_kvm_facility(vcpu->kvm, 78))
3225                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3226         else if (test_kvm_facility(vcpu->kvm, 8))
3227                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3228
3229         kvm_s390_vcpu_setup_model(vcpu);
3230
3231         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3232         if (MACHINE_HAS_ESOP)
3233                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3234         if (test_kvm_facility(vcpu->kvm, 9))
3235                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3236         if (test_kvm_facility(vcpu->kvm, 73))
3237                 vcpu->arch.sie_block->ecb |= ECB_TE;
3238         if (!kvm_is_ucontrol(vcpu->kvm))
3239                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3240
3241         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3242                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3243         if (test_kvm_facility(vcpu->kvm, 130))
3244                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3245         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3246         if (sclp.has_cei)
3247                 vcpu->arch.sie_block->eca |= ECA_CEI;
3248         if (sclp.has_ib)
3249                 vcpu->arch.sie_block->eca |= ECA_IB;
3250         if (sclp.has_siif)
3251                 vcpu->arch.sie_block->eca |= ECA_SII;
3252         if (sclp.has_sigpif)
3253                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3254         if (test_kvm_facility(vcpu->kvm, 129)) {
3255                 vcpu->arch.sie_block->eca |= ECA_VX;
3256                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3257         }
3258         if (test_kvm_facility(vcpu->kvm, 139))
3259                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3260         if (test_kvm_facility(vcpu->kvm, 156))
3261                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3262         if (vcpu->arch.sie_block->gd) {
3263                 vcpu->arch.sie_block->eca |= ECA_AIV;
3264                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3265                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3266         }
3267         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3268                                         | SDNXC;
3269         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3270
3271         if (sclp.has_kss)
3272                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3273         else
3274                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3275
3276         if (vcpu->kvm->arch.use_cmma) {
3277                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3278                 if (rc)
3279                         return rc;
3280         }
3281         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3282         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3283
3284         vcpu->arch.sie_block->hpid = HPID_KVM;
3285
3286         kvm_s390_vcpu_crypto_setup(vcpu);
3287
3288         mutex_lock(&vcpu->kvm->lock);
3289         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3290                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3291                 if (rc)
3292                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3293         }
3294         mutex_unlock(&vcpu->kvm->lock);
3295
3296         return rc;
3297 }
3298
3299 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3300 {
3301         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3302                 return -EINVAL;
3303         return 0;
3304 }
3305
3306 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3307 {
3308         struct sie_page *sie_page;
3309         int rc;
3310
3311         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3312         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3313         if (!sie_page)
3314                 return -ENOMEM;
3315
3316         vcpu->arch.sie_block = &sie_page->sie_block;
3317         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3318
3319         /* the real guest size will always be smaller than msl */
3320         vcpu->arch.sie_block->mso = 0;
3321         vcpu->arch.sie_block->msl = sclp.hamax;
3322
3323         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3324         spin_lock_init(&vcpu->arch.local_int.lock);
3325         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3326         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3327                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3328         seqcount_init(&vcpu->arch.cputm_seqcount);
3329
3330         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3331         kvm_clear_async_pf_completion_queue(vcpu);
3332         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3333                                     KVM_SYNC_GPRS |
3334                                     KVM_SYNC_ACRS |
3335                                     KVM_SYNC_CRS |
3336                                     KVM_SYNC_ARCH0 |
3337                                     KVM_SYNC_PFAULT |
3338                                     KVM_SYNC_DIAG318;
3339         kvm_s390_set_prefix(vcpu, 0);
3340         if (test_kvm_facility(vcpu->kvm, 64))
3341                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3342         if (test_kvm_facility(vcpu->kvm, 82))
3343                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3344         if (test_kvm_facility(vcpu->kvm, 133))
3345                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3346         if (test_kvm_facility(vcpu->kvm, 156))
3347                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3348         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3349          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3350          */
3351         if (MACHINE_HAS_VX)
3352                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3353         else
3354                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3355
3356         if (kvm_is_ucontrol(vcpu->kvm)) {
3357                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3358                 if (rc)
3359                         goto out_free_sie_block;
3360         }
3361
3362         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3363                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3364         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3365
3366         rc = kvm_s390_vcpu_setup(vcpu);
3367         if (rc)
3368                 goto out_ucontrol_uninit;
3369         return 0;
3370
3371 out_ucontrol_uninit:
3372         if (kvm_is_ucontrol(vcpu->kvm))
3373                 gmap_remove(vcpu->arch.gmap);
3374 out_free_sie_block:
3375         free_page((unsigned long)(vcpu->arch.sie_block));
3376         return rc;
3377 }
3378
3379 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3380 {
3381         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3382         return kvm_s390_vcpu_has_irq(vcpu, 0);
3383 }
3384
3385 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3386 {
3387         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3388 }
3389
3390 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3391 {
3392         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3393         exit_sie(vcpu);
3394 }
3395
3396 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3397 {
3398         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3399 }
3400
3401 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3402 {
3403         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3404         exit_sie(vcpu);
3405 }
3406
3407 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3408 {
3409         return atomic_read(&vcpu->arch.sie_block->prog20) &
3410                (PROG_BLOCK_SIE | PROG_REQUEST);
3411 }
3412
3413 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3414 {
3415         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3416 }
3417
3418 /*
3419  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3420  * If the CPU is not running (e.g. waiting as idle) the function will
3421  * return immediately. */
3422 void exit_sie(struct kvm_vcpu *vcpu)
3423 {
3424         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3425         kvm_s390_vsie_kick(vcpu);
3426         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3427                 cpu_relax();
3428 }
3429
3430 /* Kick a guest cpu out of SIE to process a request synchronously */
3431 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3432 {
3433         kvm_make_request(req, vcpu);
3434         kvm_s390_vcpu_request(vcpu);
3435 }
3436
3437 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3438                               unsigned long end)
3439 {
3440         struct kvm *kvm = gmap->private;
3441         struct kvm_vcpu *vcpu;
3442         unsigned long prefix;
3443         int i;
3444
3445         if (gmap_is_shadow(gmap))
3446                 return;
3447         if (start >= 1UL << 31)
3448                 /* We are only interested in prefix pages */
3449                 return;
3450         kvm_for_each_vcpu(i, vcpu, kvm) {
3451                 /* match against both prefix pages */
3452                 prefix = kvm_s390_get_prefix(vcpu);
3453                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3454                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3455                                    start, end);
3456                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3457                 }
3458         }
3459 }
3460
3461 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3462 {
3463         /* do not poll with more than halt_poll_max_steal percent of steal time */
3464         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3465             READ_ONCE(halt_poll_max_steal)) {
3466                 vcpu->stat.halt_no_poll_steal++;
3467                 return true;
3468         }
3469         return false;
3470 }
3471
3472 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3473 {
3474         /* kvm common code refers to this, but never calls it */
3475         BUG();
3476         return 0;
3477 }
3478
3479 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3480                                            struct kvm_one_reg *reg)
3481 {
3482         int r = -EINVAL;
3483
3484         switch (reg->id) {
3485         case KVM_REG_S390_TODPR:
3486                 r = put_user(vcpu->arch.sie_block->todpr,
3487                              (u32 __user *)reg->addr);
3488                 break;
3489         case KVM_REG_S390_EPOCHDIFF:
3490                 r = put_user(vcpu->arch.sie_block->epoch,
3491                              (u64 __user *)reg->addr);
3492                 break;
3493         case KVM_REG_S390_CPU_TIMER:
3494                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3495                              (u64 __user *)reg->addr);
3496                 break;
3497         case KVM_REG_S390_CLOCK_COMP:
3498                 r = put_user(vcpu->arch.sie_block->ckc,
3499                              (u64 __user *)reg->addr);
3500                 break;
3501         case KVM_REG_S390_PFTOKEN:
3502                 r = put_user(vcpu->arch.pfault_token,
3503                              (u64 __user *)reg->addr);
3504                 break;
3505         case KVM_REG_S390_PFCOMPARE:
3506                 r = put_user(vcpu->arch.pfault_compare,
3507                              (u64 __user *)reg->addr);
3508                 break;
3509         case KVM_REG_S390_PFSELECT:
3510                 r = put_user(vcpu->arch.pfault_select,
3511                              (u64 __user *)reg->addr);
3512                 break;
3513         case KVM_REG_S390_PP:
3514                 r = put_user(vcpu->arch.sie_block->pp,
3515                              (u64 __user *)reg->addr);
3516                 break;
3517         case KVM_REG_S390_GBEA:
3518                 r = put_user(vcpu->arch.sie_block->gbea,
3519                              (u64 __user *)reg->addr);
3520                 break;
3521         default:
3522                 break;
3523         }
3524
3525         return r;
3526 }
3527
3528 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3529                                            struct kvm_one_reg *reg)
3530 {
3531         int r = -EINVAL;
3532         __u64 val;
3533
3534         switch (reg->id) {
3535         case KVM_REG_S390_TODPR:
3536                 r = get_user(vcpu->arch.sie_block->todpr,
3537                              (u32 __user *)reg->addr);
3538                 break;
3539         case KVM_REG_S390_EPOCHDIFF:
3540                 r = get_user(vcpu->arch.sie_block->epoch,
3541                              (u64 __user *)reg->addr);
3542                 break;
3543         case KVM_REG_S390_CPU_TIMER:
3544                 r = get_user(val, (u64 __user *)reg->addr);
3545                 if (!r)
3546                         kvm_s390_set_cpu_timer(vcpu, val);
3547                 break;
3548         case KVM_REG_S390_CLOCK_COMP:
3549                 r = get_user(vcpu->arch.sie_block->ckc,
3550                              (u64 __user *)reg->addr);
3551                 break;
3552         case KVM_REG_S390_PFTOKEN:
3553                 r = get_user(vcpu->arch.pfault_token,
3554                              (u64 __user *)reg->addr);
3555                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3556                         kvm_clear_async_pf_completion_queue(vcpu);
3557                 break;
3558         case KVM_REG_S390_PFCOMPARE:
3559                 r = get_user(vcpu->arch.pfault_compare,
3560                              (u64 __user *)reg->addr);
3561                 break;
3562         case KVM_REG_S390_PFSELECT:
3563                 r = get_user(vcpu->arch.pfault_select,
3564                              (u64 __user *)reg->addr);
3565                 break;
3566         case KVM_REG_S390_PP:
3567                 r = get_user(vcpu->arch.sie_block->pp,
3568                              (u64 __user *)reg->addr);
3569                 break;
3570         case KVM_REG_S390_GBEA:
3571                 r = get_user(vcpu->arch.sie_block->gbea,
3572                              (u64 __user *)reg->addr);
3573                 break;
3574         default:
3575                 break;
3576         }
3577
3578         return r;
3579 }
3580
3581 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3582 {
3583         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3584         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3585         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3586
3587         kvm_clear_async_pf_completion_queue(vcpu);
3588         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3589                 kvm_s390_vcpu_stop(vcpu);
3590         kvm_s390_clear_local_irqs(vcpu);
3591 }
3592
3593 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3594 {
3595         /* Initial reset is a superset of the normal reset */
3596         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3597
3598         /*
3599          * This equals initial cpu reset in pop, but we don't switch to ESA.
3600          * We do not only reset the internal data, but also ...
3601          */
3602         vcpu->arch.sie_block->gpsw.mask = 0;
3603         vcpu->arch.sie_block->gpsw.addr = 0;
3604         kvm_s390_set_prefix(vcpu, 0);
3605         kvm_s390_set_cpu_timer(vcpu, 0);
3606         vcpu->arch.sie_block->ckc = 0;
3607         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3608         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3609         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3610
3611         /* ... the data in sync regs */
3612         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3613         vcpu->run->s.regs.ckc = 0;
3614         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3615         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3616         vcpu->run->psw_addr = 0;
3617         vcpu->run->psw_mask = 0;
3618         vcpu->run->s.regs.todpr = 0;
3619         vcpu->run->s.regs.cputm = 0;
3620         vcpu->run->s.regs.ckc = 0;
3621         vcpu->run->s.regs.pp = 0;
3622         vcpu->run->s.regs.gbea = 1;
3623         vcpu->run->s.regs.fpc = 0;
3624         /*
3625          * Do not reset these registers in the protected case, as some of
3626          * them are overlayed and they are not accessible in this case
3627          * anyway.
3628          */
3629         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3630                 vcpu->arch.sie_block->gbea = 1;
3631                 vcpu->arch.sie_block->pp = 0;
3632                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3633                 vcpu->arch.sie_block->todpr = 0;
3634         }
3635 }
3636
3637 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3638 {
3639         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3640
3641         /* Clear reset is a superset of the initial reset */
3642         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3643
3644         memset(&regs->gprs, 0, sizeof(regs->gprs));
3645         memset(&regs->vrs, 0, sizeof(regs->vrs));
3646         memset(&regs->acrs, 0, sizeof(regs->acrs));
3647         memset(&regs->gscb, 0, sizeof(regs->gscb));
3648
3649         regs->etoken = 0;
3650         regs->etoken_extension = 0;
3651 }
3652
3653 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3654 {
3655         vcpu_load(vcpu);
3656         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3657         vcpu_put(vcpu);
3658         return 0;
3659 }
3660
3661 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3662 {
3663         vcpu_load(vcpu);
3664         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3665         vcpu_put(vcpu);
3666         return 0;
3667 }
3668
3669 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3670                                   struct kvm_sregs *sregs)
3671 {
3672         vcpu_load(vcpu);
3673
3674         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3675         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3676
3677         vcpu_put(vcpu);
3678         return 0;
3679 }
3680
3681 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3682                                   struct kvm_sregs *sregs)
3683 {
3684         vcpu_load(vcpu);
3685
3686         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3687         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3688
3689         vcpu_put(vcpu);
3690         return 0;
3691 }
3692
3693 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3694 {
3695         int ret = 0;
3696
3697         vcpu_load(vcpu);
3698
3699         if (test_fp_ctl(fpu->fpc)) {
3700                 ret = -EINVAL;
3701                 goto out;
3702         }
3703         vcpu->run->s.regs.fpc = fpu->fpc;
3704         if (MACHINE_HAS_VX)
3705                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3706                                  (freg_t *) fpu->fprs);
3707         else
3708                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3709
3710 out:
3711         vcpu_put(vcpu);
3712         return ret;
3713 }
3714
3715 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3716 {
3717         vcpu_load(vcpu);
3718
3719         /* make sure we have the latest values */
3720         save_fpu_regs();
3721         if (MACHINE_HAS_VX)
3722                 convert_vx_to_fp((freg_t *) fpu->fprs,
3723                                  (__vector128 *) vcpu->run->s.regs.vrs);
3724         else
3725                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3726         fpu->fpc = vcpu->run->s.regs.fpc;
3727
3728         vcpu_put(vcpu);
3729         return 0;
3730 }
3731
3732 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3733 {
3734         int rc = 0;
3735
3736         if (!is_vcpu_stopped(vcpu))
3737                 rc = -EBUSY;
3738         else {
3739                 vcpu->run->psw_mask = psw.mask;
3740                 vcpu->run->psw_addr = psw.addr;
3741         }
3742         return rc;
3743 }
3744
3745 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3746                                   struct kvm_translation *tr)
3747 {
3748         return -EINVAL; /* not implemented yet */
3749 }
3750
3751 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3752                               KVM_GUESTDBG_USE_HW_BP | \
3753                               KVM_GUESTDBG_ENABLE)
3754
3755 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3756                                         struct kvm_guest_debug *dbg)
3757 {
3758         int rc = 0;
3759
3760         vcpu_load(vcpu);
3761
3762         vcpu->guest_debug = 0;
3763         kvm_s390_clear_bp_data(vcpu);
3764
3765         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3766                 rc = -EINVAL;
3767                 goto out;
3768         }
3769         if (!sclp.has_gpere) {
3770                 rc = -EINVAL;
3771                 goto out;
3772         }
3773
3774         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3775                 vcpu->guest_debug = dbg->control;
3776                 /* enforce guest PER */
3777                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3778
3779                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3780                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3781         } else {
3782                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3783                 vcpu->arch.guestdbg.last_bp = 0;
3784         }
3785
3786         if (rc) {
3787                 vcpu->guest_debug = 0;
3788                 kvm_s390_clear_bp_data(vcpu);
3789                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3790         }
3791
3792 out:
3793         vcpu_put(vcpu);
3794         return rc;
3795 }
3796
3797 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3798                                     struct kvm_mp_state *mp_state)
3799 {
3800         int ret;
3801
3802         vcpu_load(vcpu);
3803
3804         /* CHECK_STOP and LOAD are not supported yet */
3805         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3806                                       KVM_MP_STATE_OPERATING;
3807
3808         vcpu_put(vcpu);
3809         return ret;
3810 }
3811
3812 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3813                                     struct kvm_mp_state *mp_state)
3814 {
3815         int rc = 0;
3816
3817         vcpu_load(vcpu);
3818
3819         /* user space knows about this interface - let it control the state */
3820         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3821
3822         switch (mp_state->mp_state) {
3823         case KVM_MP_STATE_STOPPED:
3824                 rc = kvm_s390_vcpu_stop(vcpu);
3825                 break;
3826         case KVM_MP_STATE_OPERATING:
3827                 rc = kvm_s390_vcpu_start(vcpu);
3828                 break;
3829         case KVM_MP_STATE_LOAD:
3830                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3831                         rc = -ENXIO;
3832                         break;
3833                 }
3834                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3835                 break;
3836         case KVM_MP_STATE_CHECK_STOP:
3837                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3838         default:
3839                 rc = -ENXIO;
3840         }
3841
3842         vcpu_put(vcpu);
3843         return rc;
3844 }
3845
3846 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3847 {
3848         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3849 }
3850
3851 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3852 {
3853 retry:
3854         kvm_s390_vcpu_request_handled(vcpu);
3855         if (!kvm_request_pending(vcpu))
3856                 return 0;
3857         /*
3858          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3859          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3860          * This ensures that the ipte instruction for this request has
3861          * already finished. We might race against a second unmapper that
3862          * wants to set the blocking bit. Lets just retry the request loop.
3863          */
3864         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3865                 int rc;
3866                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3867                                           kvm_s390_get_prefix(vcpu),
3868                                           PAGE_SIZE * 2, PROT_WRITE);
3869                 if (rc) {
3870                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3871                         return rc;
3872                 }
3873                 goto retry;
3874         }
3875
3876         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3877                 vcpu->arch.sie_block->ihcpu = 0xffff;
3878                 goto retry;
3879         }
3880
3881         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3882                 if (!ibs_enabled(vcpu)) {
3883                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3884                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3885                 }
3886                 goto retry;
3887         }
3888
3889         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3890                 if (ibs_enabled(vcpu)) {
3891                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3892                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3893                 }
3894                 goto retry;
3895         }
3896
3897         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3898                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3899                 goto retry;
3900         }
3901
3902         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3903                 /*
3904                  * Disable CMM virtualization; we will emulate the ESSA
3905                  * instruction manually, in order to provide additional
3906                  * functionalities needed for live migration.
3907                  */
3908                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3909                 goto retry;
3910         }
3911
3912         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3913                 /*
3914                  * Re-enable CMM virtualization if CMMA is available and
3915                  * CMM has been used.
3916                  */
3917                 if ((vcpu->kvm->arch.use_cmma) &&
3918                     (vcpu->kvm->mm->context.uses_cmm))
3919                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3920                 goto retry;
3921         }
3922
3923         /* nothing to do, just clear the request */
3924         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3925         /* we left the vsie handler, nothing to do, just clear the request */
3926         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3927
3928         return 0;
3929 }
3930
3931 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3932 {
3933         struct kvm_vcpu *vcpu;
3934         union tod_clock clk;
3935         int i;
3936
3937         preempt_disable();
3938
3939         store_tod_clock_ext(&clk);
3940
3941         kvm->arch.epoch = gtod->tod - clk.tod;
3942         kvm->arch.epdx = 0;
3943         if (test_kvm_facility(kvm, 139)) {
3944                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3945                 if (kvm->arch.epoch > gtod->tod)
3946                         kvm->arch.epdx -= 1;
3947         }
3948
3949         kvm_s390_vcpu_block_all(kvm);
3950         kvm_for_each_vcpu(i, vcpu, kvm) {
3951                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3952                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3953         }
3954
3955         kvm_s390_vcpu_unblock_all(kvm);
3956         preempt_enable();
3957 }
3958
3959 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3960 {
3961         if (!mutex_trylock(&kvm->lock))
3962                 return 0;
3963         __kvm_s390_set_tod_clock(kvm, gtod);
3964         mutex_unlock(&kvm->lock);
3965         return 1;
3966 }
3967
3968 /**
3969  * kvm_arch_fault_in_page - fault-in guest page if necessary
3970  * @vcpu: The corresponding virtual cpu
3971  * @gpa: Guest physical address
3972  * @writable: Whether the page should be writable or not
3973  *
3974  * Make sure that a guest page has been faulted-in on the host.
3975  *
3976  * Return: Zero on success, negative error code otherwise.
3977  */
3978 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3979 {
3980         return gmap_fault(vcpu->arch.gmap, gpa,
3981                           writable ? FAULT_FLAG_WRITE : 0);
3982 }
3983
3984 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3985                                       unsigned long token)
3986 {
3987         struct kvm_s390_interrupt inti;
3988         struct kvm_s390_irq irq;
3989
3990         if (start_token) {
3991                 irq.u.ext.ext_params2 = token;
3992                 irq.type = KVM_S390_INT_PFAULT_INIT;
3993                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3994         } else {
3995                 inti.type = KVM_S390_INT_PFAULT_DONE;
3996                 inti.parm64 = token;
3997                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3998         }
3999 }
4000
4001 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4002                                      struct kvm_async_pf *work)
4003 {
4004         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4005         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4006
4007         return true;
4008 }
4009
4010 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4011                                  struct kvm_async_pf *work)
4012 {
4013         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4014         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4015 }
4016
4017 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4018                                struct kvm_async_pf *work)
4019 {
4020         /* s390 will always inject the page directly */
4021 }
4022
4023 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4024 {
4025         /*
4026          * s390 will always inject the page directly,
4027          * but we still want check_async_completion to cleanup
4028          */
4029         return true;
4030 }
4031
4032 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4033 {
4034         hva_t hva;
4035         struct kvm_arch_async_pf arch;
4036
4037         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4038                 return false;
4039         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4040             vcpu->arch.pfault_compare)
4041                 return false;
4042         if (psw_extint_disabled(vcpu))
4043                 return false;
4044         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4045                 return false;
4046         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4047                 return false;
4048         if (!vcpu->arch.gmap->pfault_enabled)
4049                 return false;
4050
4051         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4052         hva += current->thread.gmap_addr & ~PAGE_MASK;
4053         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4054                 return false;
4055
4056         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4057 }
4058
4059 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4060 {
4061         int rc, cpuflags;
4062
4063         /*
4064          * On s390 notifications for arriving pages will be delivered directly
4065          * to the guest but the house keeping for completed pfaults is
4066          * handled outside the worker.
4067          */
4068         kvm_check_async_pf_completion(vcpu);
4069
4070         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4071         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4072
4073         if (need_resched())
4074                 schedule();
4075
4076         if (!kvm_is_ucontrol(vcpu->kvm)) {
4077                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4078                 if (rc)
4079                         return rc;
4080         }
4081
4082         rc = kvm_s390_handle_requests(vcpu);
4083         if (rc)
4084                 return rc;
4085
4086         if (guestdbg_enabled(vcpu)) {
4087                 kvm_s390_backup_guest_per_regs(vcpu);
4088                 kvm_s390_patch_guest_per_regs(vcpu);
4089         }
4090
4091         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4092
4093         vcpu->arch.sie_block->icptcode = 0;
4094         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4095         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4096         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4097
4098         return 0;
4099 }
4100
4101 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4102 {
4103         struct kvm_s390_pgm_info pgm_info = {
4104                 .code = PGM_ADDRESSING,
4105         };
4106         u8 opcode, ilen;
4107         int rc;
4108
4109         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4110         trace_kvm_s390_sie_fault(vcpu);
4111
4112         /*
4113          * We want to inject an addressing exception, which is defined as a
4114          * suppressing or terminating exception. However, since we came here
4115          * by a DAT access exception, the PSW still points to the faulting
4116          * instruction since DAT exceptions are nullifying. So we've got
4117          * to look up the current opcode to get the length of the instruction
4118          * to be able to forward the PSW.
4119          */
4120         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4121         ilen = insn_length(opcode);
4122         if (rc < 0) {
4123                 return rc;
4124         } else if (rc) {
4125                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4126                  * Forward by arbitrary ilc, injection will take care of
4127                  * nullification if necessary.
4128                  */
4129                 pgm_info = vcpu->arch.pgm;
4130                 ilen = 4;
4131         }
4132         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4133         kvm_s390_forward_psw(vcpu, ilen);
4134         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4135 }
4136
4137 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4138 {
4139         struct mcck_volatile_info *mcck_info;
4140         struct sie_page *sie_page;
4141
4142         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4143                    vcpu->arch.sie_block->icptcode);
4144         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4145
4146         if (guestdbg_enabled(vcpu))
4147                 kvm_s390_restore_guest_per_regs(vcpu);
4148
4149         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4150         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4151
4152         if (exit_reason == -EINTR) {
4153                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4154                 sie_page = container_of(vcpu->arch.sie_block,
4155                                         struct sie_page, sie_block);
4156                 mcck_info = &sie_page->mcck_info;
4157                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4158                 return 0;
4159         }
4160
4161         if (vcpu->arch.sie_block->icptcode > 0) {
4162                 int rc = kvm_handle_sie_intercept(vcpu);
4163
4164                 if (rc != -EOPNOTSUPP)
4165                         return rc;
4166                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4167                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4168                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4169                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4170                 return -EREMOTE;
4171         } else if (exit_reason != -EFAULT) {
4172                 vcpu->stat.exit_null++;
4173                 return 0;
4174         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4175                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4176                 vcpu->run->s390_ucontrol.trans_exc_code =
4177                                                 current->thread.gmap_addr;
4178                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4179                 return -EREMOTE;
4180         } else if (current->thread.gmap_pfault) {
4181                 trace_kvm_s390_major_guest_pfault(vcpu);
4182                 current->thread.gmap_pfault = 0;
4183                 if (kvm_arch_setup_async_pf(vcpu))
4184                         return 0;
4185                 vcpu->stat.pfault_sync++;
4186                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4187         }
4188         return vcpu_post_run_fault_in_sie(vcpu);
4189 }
4190
4191 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4192 static int __vcpu_run(struct kvm_vcpu *vcpu)
4193 {
4194         int rc, exit_reason;
4195         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4196
4197         /*
4198          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4199          * ning the guest), so that memslots (and other stuff) are protected
4200          */
4201         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4202
4203         do {
4204                 rc = vcpu_pre_run(vcpu);
4205                 if (rc)
4206                         break;
4207
4208                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4209                 /*
4210                  * As PF_VCPU will be used in fault handler, between
4211                  * guest_enter and guest_exit should be no uaccess.
4212                  */
4213                 local_irq_disable();
4214                 guest_enter_irqoff();
4215                 __disable_cpu_timer_accounting(vcpu);
4216                 local_irq_enable();
4217                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4218                         memcpy(sie_page->pv_grregs,
4219                                vcpu->run->s.regs.gprs,
4220                                sizeof(sie_page->pv_grregs));
4221                 }
4222                 if (test_cpu_flag(CIF_FPU))
4223                         load_fpu_regs();
4224                 exit_reason = sie64a(vcpu->arch.sie_block,
4225                                      vcpu->run->s.regs.gprs);
4226                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4227                         memcpy(vcpu->run->s.regs.gprs,
4228                                sie_page->pv_grregs,
4229                                sizeof(sie_page->pv_grregs));
4230                         /*
4231                          * We're not allowed to inject interrupts on intercepts
4232                          * that leave the guest state in an "in-between" state
4233                          * where the next SIE entry will do a continuation.
4234                          * Fence interrupts in our "internal" PSW.
4235                          */
4236                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4237                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4238                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4239                         }
4240                 }
4241                 local_irq_disable();
4242                 __enable_cpu_timer_accounting(vcpu);
4243                 guest_exit_irqoff();
4244                 local_irq_enable();
4245                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4246
4247                 rc = vcpu_post_run(vcpu, exit_reason);
4248         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4249
4250         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4251         return rc;
4252 }
4253
4254 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4255 {
4256         struct kvm_run *kvm_run = vcpu->run;
4257         struct runtime_instr_cb *riccb;
4258         struct gs_cb *gscb;
4259
4260         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4261         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4262         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4263         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4264         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4265                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4266                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4267                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4268         }
4269         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4270                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4271                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4272                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4273                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4274                         kvm_clear_async_pf_completion_queue(vcpu);
4275         }
4276         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4277                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4278                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4279         }
4280         /*
4281          * If userspace sets the riccb (e.g. after migration) to a valid state,
4282          * we should enable RI here instead of doing the lazy enablement.
4283          */
4284         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4285             test_kvm_facility(vcpu->kvm, 64) &&
4286             riccb->v &&
4287             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4288                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4289                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4290         }
4291         /*
4292          * If userspace sets the gscb (e.g. after migration) to non-zero,
4293          * we should enable GS here instead of doing the lazy enablement.
4294          */
4295         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4296             test_kvm_facility(vcpu->kvm, 133) &&
4297             gscb->gssm &&
4298             !vcpu->arch.gs_enabled) {
4299                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4300                 vcpu->arch.sie_block->ecb |= ECB_GS;
4301                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4302                 vcpu->arch.gs_enabled = 1;
4303         }
4304         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4305             test_kvm_facility(vcpu->kvm, 82)) {
4306                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4307                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4308         }
4309         if (MACHINE_HAS_GS) {
4310                 preempt_disable();
4311                 __ctl_set_bit(2, 4);
4312                 if (current->thread.gs_cb) {
4313                         vcpu->arch.host_gscb = current->thread.gs_cb;
4314                         save_gs_cb(vcpu->arch.host_gscb);
4315                 }
4316                 if (vcpu->arch.gs_enabled) {
4317                         current->thread.gs_cb = (struct gs_cb *)
4318                                                 &vcpu->run->s.regs.gscb;
4319                         restore_gs_cb(current->thread.gs_cb);
4320                 }
4321                 preempt_enable();
4322         }
4323         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4324 }
4325
4326 static void sync_regs(struct kvm_vcpu *vcpu)
4327 {
4328         struct kvm_run *kvm_run = vcpu->run;
4329
4330         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4331                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4332         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4333                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4334                 /* some control register changes require a tlb flush */
4335                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4336         }
4337         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4338                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4339                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4340         }
4341         save_access_regs(vcpu->arch.host_acrs);
4342         restore_access_regs(vcpu->run->s.regs.acrs);
4343         /* save host (userspace) fprs/vrs */
4344         save_fpu_regs();
4345         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4346         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4347         if (MACHINE_HAS_VX)
4348                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4349         else
4350                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4351         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4352         if (test_fp_ctl(current->thread.fpu.fpc))
4353                 /* User space provided an invalid FPC, let's clear it */
4354                 current->thread.fpu.fpc = 0;
4355
4356         /* Sync fmt2 only data */
4357         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4358                 sync_regs_fmt2(vcpu);
4359         } else {
4360                 /*
4361                  * In several places we have to modify our internal view to
4362                  * not do things that are disallowed by the ultravisor. For
4363                  * example we must not inject interrupts after specific exits
4364                  * (e.g. 112 prefix page not secure). We do this by turning
4365                  * off the machine check, external and I/O interrupt bits
4366                  * of our PSW copy. To avoid getting validity intercepts, we
4367                  * do only accept the condition code from userspace.
4368                  */
4369                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4370                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4371                                                    PSW_MASK_CC;
4372         }
4373
4374         kvm_run->kvm_dirty_regs = 0;
4375 }
4376
4377 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4378 {
4379         struct kvm_run *kvm_run = vcpu->run;
4380
4381         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4382         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4383         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4384         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4385         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4386         if (MACHINE_HAS_GS) {
4387                 preempt_disable();
4388                 __ctl_set_bit(2, 4);
4389                 if (vcpu->arch.gs_enabled)
4390                         save_gs_cb(current->thread.gs_cb);
4391                 current->thread.gs_cb = vcpu->arch.host_gscb;
4392                 restore_gs_cb(vcpu->arch.host_gscb);
4393                 if (!vcpu->arch.host_gscb)
4394                         __ctl_clear_bit(2, 4);
4395                 vcpu->arch.host_gscb = NULL;
4396                 preempt_enable();
4397         }
4398         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4399 }
4400
4401 static void store_regs(struct kvm_vcpu *vcpu)
4402 {
4403         struct kvm_run *kvm_run = vcpu->run;
4404
4405         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4406         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4407         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4408         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4409         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4410         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4411         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4412         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4413         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4414         save_access_regs(vcpu->run->s.regs.acrs);
4415         restore_access_regs(vcpu->arch.host_acrs);
4416         /* Save guest register state */
4417         save_fpu_regs();
4418         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4419         /* Restore will be done lazily at return */
4420         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4421         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4422         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4423                 store_regs_fmt2(vcpu);
4424 }
4425
4426 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4427 {
4428         struct kvm_run *kvm_run = vcpu->run;
4429         int rc;
4430
4431         if (kvm_run->immediate_exit)
4432                 return -EINTR;
4433
4434         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4435             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4436                 return -EINVAL;
4437
4438         vcpu_load(vcpu);
4439
4440         if (guestdbg_exit_pending(vcpu)) {
4441                 kvm_s390_prepare_debug_exit(vcpu);
4442                 rc = 0;
4443                 goto out;
4444         }
4445
4446         kvm_sigset_activate(vcpu);
4447
4448         /*
4449          * no need to check the return value of vcpu_start as it can only have
4450          * an error for protvirt, but protvirt means user cpu state
4451          */
4452         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4453                 kvm_s390_vcpu_start(vcpu);
4454         } else if (is_vcpu_stopped(vcpu)) {
4455                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4456                                    vcpu->vcpu_id);
4457                 rc = -EINVAL;
4458                 goto out;
4459         }
4460
4461         sync_regs(vcpu);
4462         enable_cpu_timer_accounting(vcpu);
4463
4464         might_fault();
4465         rc = __vcpu_run(vcpu);
4466
4467         if (signal_pending(current) && !rc) {
4468                 kvm_run->exit_reason = KVM_EXIT_INTR;
4469                 rc = -EINTR;
4470         }
4471
4472         if (guestdbg_exit_pending(vcpu) && !rc)  {
4473                 kvm_s390_prepare_debug_exit(vcpu);
4474                 rc = 0;
4475         }
4476
4477         if (rc == -EREMOTE) {
4478                 /* userspace support is needed, kvm_run has been prepared */
4479                 rc = 0;
4480         }
4481
4482         disable_cpu_timer_accounting(vcpu);
4483         store_regs(vcpu);
4484
4485         kvm_sigset_deactivate(vcpu);
4486
4487         vcpu->stat.exit_userspace++;
4488 out:
4489         vcpu_put(vcpu);
4490         return rc;
4491 }
4492
4493 /*
4494  * store status at address
4495  * we use have two special cases:
4496  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4497  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4498  */
4499 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4500 {
4501         unsigned char archmode = 1;
4502         freg_t fprs[NUM_FPRS];
4503         unsigned int px;
4504         u64 clkcomp, cputm;
4505         int rc;
4506
4507         px = kvm_s390_get_prefix(vcpu);
4508         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4509                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4510                         return -EFAULT;
4511                 gpa = 0;
4512         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4513                 if (write_guest_real(vcpu, 163, &archmode, 1))
4514                         return -EFAULT;
4515                 gpa = px;
4516         } else
4517                 gpa -= __LC_FPREGS_SAVE_AREA;
4518
4519         /* manually convert vector registers if necessary */
4520         if (MACHINE_HAS_VX) {
4521                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4522                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4523                                      fprs, 128);
4524         } else {
4525                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4526                                      vcpu->run->s.regs.fprs, 128);
4527         }
4528         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4529                               vcpu->run->s.regs.gprs, 128);
4530         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4531                               &vcpu->arch.sie_block->gpsw, 16);
4532         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4533                               &px, 4);
4534         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4535                               &vcpu->run->s.regs.fpc, 4);
4536         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4537                               &vcpu->arch.sie_block->todpr, 4);
4538         cputm = kvm_s390_get_cpu_timer(vcpu);
4539         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4540                               &cputm, 8);
4541         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4542         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4543                               &clkcomp, 8);
4544         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4545                               &vcpu->run->s.regs.acrs, 64);
4546         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4547                               &vcpu->arch.sie_block->gcr, 128);
4548         return rc ? -EFAULT : 0;
4549 }
4550
4551 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4552 {
4553         /*
4554          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4555          * switch in the run ioctl. Let's update our copies before we save
4556          * it into the save area
4557          */
4558         save_fpu_regs();
4559         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4560         save_access_regs(vcpu->run->s.regs.acrs);
4561
4562         return kvm_s390_store_status_unloaded(vcpu, addr);
4563 }
4564
4565 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4566 {
4567         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4568         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4569 }
4570
4571 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4572 {
4573         unsigned int i;
4574         struct kvm_vcpu *vcpu;
4575
4576         kvm_for_each_vcpu(i, vcpu, kvm) {
4577                 __disable_ibs_on_vcpu(vcpu);
4578         }
4579 }
4580
4581 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4582 {
4583         if (!sclp.has_ibs)
4584                 return;
4585         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4586         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4587 }
4588
4589 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4590 {
4591         int i, online_vcpus, r = 0, started_vcpus = 0;
4592
4593         if (!is_vcpu_stopped(vcpu))
4594                 return 0;
4595
4596         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4597         /* Only one cpu at a time may enter/leave the STOPPED state. */
4598         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4599         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4600
4601         /* Let's tell the UV that we want to change into the operating state */
4602         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4603                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4604                 if (r) {
4605                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4606                         return r;
4607                 }
4608         }
4609
4610         for (i = 0; i < online_vcpus; i++) {
4611                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4612                         started_vcpus++;
4613         }
4614
4615         if (started_vcpus == 0) {
4616                 /* we're the only active VCPU -> speed it up */
4617                 __enable_ibs_on_vcpu(vcpu);
4618         } else if (started_vcpus == 1) {
4619                 /*
4620                  * As we are starting a second VCPU, we have to disable
4621                  * the IBS facility on all VCPUs to remove potentially
4622                  * outstanding ENABLE requests.
4623                  */
4624                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4625         }
4626
4627         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4628         /*
4629          * The real PSW might have changed due to a RESTART interpreted by the
4630          * ultravisor. We block all interrupts and let the next sie exit
4631          * refresh our view.
4632          */
4633         if (kvm_s390_pv_cpu_is_protected(vcpu))
4634                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4635         /*
4636          * Another VCPU might have used IBS while we were offline.
4637          * Let's play safe and flush the VCPU at startup.
4638          */
4639         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4640         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4641         return 0;
4642 }
4643
4644 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4645 {
4646         int i, online_vcpus, r = 0, started_vcpus = 0;
4647         struct kvm_vcpu *started_vcpu = NULL;
4648
4649         if (is_vcpu_stopped(vcpu))
4650                 return 0;
4651
4652         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4653         /* Only one cpu at a time may enter/leave the STOPPED state. */
4654         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4655         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4656
4657         /* Let's tell the UV that we want to change into the stopped state */
4658         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4659                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4660                 if (r) {
4661                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4662                         return r;
4663                 }
4664         }
4665
4666         /*
4667          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4668          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4669          * have been fully processed. This will ensure that the VCPU
4670          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4671          */
4672         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4673         kvm_s390_clear_stop_irq(vcpu);
4674
4675         __disable_ibs_on_vcpu(vcpu);
4676
4677         for (i = 0; i < online_vcpus; i++) {
4678                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4679                         started_vcpus++;
4680                         started_vcpu = vcpu->kvm->vcpus[i];
4681                 }
4682         }
4683
4684         if (started_vcpus == 1) {
4685                 /*
4686                  * As we only have one VCPU left, we want to enable the
4687                  * IBS facility for that VCPU to speed it up.
4688                  */
4689                 __enable_ibs_on_vcpu(started_vcpu);
4690         }
4691
4692         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4693         return 0;
4694 }
4695
4696 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4697                                      struct kvm_enable_cap *cap)
4698 {
4699         int r;
4700
4701         if (cap->flags)
4702                 return -EINVAL;
4703
4704         switch (cap->cap) {
4705         case KVM_CAP_S390_CSS_SUPPORT:
4706                 if (!vcpu->kvm->arch.css_support) {
4707                         vcpu->kvm->arch.css_support = 1;
4708                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4709                         trace_kvm_s390_enable_css(vcpu->kvm);
4710                 }
4711                 r = 0;
4712                 break;
4713         default:
4714                 r = -EINVAL;
4715                 break;
4716         }
4717         return r;
4718 }
4719
4720 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4721                                    struct kvm_s390_mem_op *mop)
4722 {
4723         void __user *uaddr = (void __user *)mop->buf;
4724         int r = 0;
4725
4726         if (mop->flags || !mop->size)
4727                 return -EINVAL;
4728         if (mop->size + mop->sida_offset < mop->size)
4729                 return -EINVAL;
4730         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4731                 return -E2BIG;
4732         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4733                 return -EINVAL;
4734
4735         switch (mop->op) {
4736         case KVM_S390_MEMOP_SIDA_READ:
4737                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4738                                  mop->sida_offset), mop->size))
4739                         r = -EFAULT;
4740
4741                 break;
4742         case KVM_S390_MEMOP_SIDA_WRITE:
4743                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4744                                    mop->sida_offset), uaddr, mop->size))
4745                         r = -EFAULT;
4746                 break;
4747         }
4748         return r;
4749 }
4750 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4751                                   struct kvm_s390_mem_op *mop)
4752 {
4753         void __user *uaddr = (void __user *)mop->buf;
4754         void *tmpbuf = NULL;
4755         int r = 0;
4756         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4757                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4758
4759         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4760                 return -EINVAL;
4761
4762         if (mop->size > MEM_OP_MAX_SIZE)
4763                 return -E2BIG;
4764
4765         if (kvm_s390_pv_cpu_is_protected(vcpu))
4766                 return -EINVAL;
4767
4768         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4769                 tmpbuf = vmalloc(mop->size);
4770                 if (!tmpbuf)
4771                         return -ENOMEM;
4772         }
4773
4774         switch (mop->op) {
4775         case KVM_S390_MEMOP_LOGICAL_READ:
4776                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4777                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4778                                             mop->size, GACC_FETCH);
4779                         break;
4780                 }
4781                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4782                 if (r == 0) {
4783                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4784                                 r = -EFAULT;
4785                 }
4786                 break;
4787         case KVM_S390_MEMOP_LOGICAL_WRITE:
4788                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4789                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4790                                             mop->size, GACC_STORE);
4791                         break;
4792                 }
4793                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4794                         r = -EFAULT;
4795                         break;
4796                 }
4797                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4798                 break;
4799         }
4800
4801         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4802                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4803
4804         vfree(tmpbuf);
4805         return r;
4806 }
4807
4808 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4809                                       struct kvm_s390_mem_op *mop)
4810 {
4811         int r, srcu_idx;
4812
4813         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4814
4815         switch (mop->op) {
4816         case KVM_S390_MEMOP_LOGICAL_READ:
4817         case KVM_S390_MEMOP_LOGICAL_WRITE:
4818                 r = kvm_s390_guest_mem_op(vcpu, mop);
4819                 break;
4820         case KVM_S390_MEMOP_SIDA_READ:
4821         case KVM_S390_MEMOP_SIDA_WRITE:
4822                 /* we are locked against sida going away by the vcpu->mutex */
4823                 r = kvm_s390_guest_sida_op(vcpu, mop);
4824                 break;
4825         default:
4826                 r = -EINVAL;
4827         }
4828
4829         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4830         return r;
4831 }
4832
4833 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4834                                unsigned int ioctl, unsigned long arg)
4835 {
4836         struct kvm_vcpu *vcpu = filp->private_data;
4837         void __user *argp = (void __user *)arg;
4838
4839         switch (ioctl) {
4840         case KVM_S390_IRQ: {
4841                 struct kvm_s390_irq s390irq;
4842
4843                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4844                         return -EFAULT;
4845                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4846         }
4847         case KVM_S390_INTERRUPT: {
4848                 struct kvm_s390_interrupt s390int;
4849                 struct kvm_s390_irq s390irq = {};
4850
4851                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4852                         return -EFAULT;
4853                 if (s390int_to_s390irq(&s390int, &s390irq))
4854                         return -EINVAL;
4855                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4856         }
4857         }
4858         return -ENOIOCTLCMD;
4859 }
4860
4861 long kvm_arch_vcpu_ioctl(struct file *filp,
4862                          unsigned int ioctl, unsigned long arg)
4863 {
4864         struct kvm_vcpu *vcpu = filp->private_data;
4865         void __user *argp = (void __user *)arg;
4866         int idx;
4867         long r;
4868         u16 rc, rrc;
4869
4870         vcpu_load(vcpu);
4871
4872         switch (ioctl) {
4873         case KVM_S390_STORE_STATUS:
4874                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4875                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4876                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4877                 break;
4878         case KVM_S390_SET_INITIAL_PSW: {
4879                 psw_t psw;
4880
4881                 r = -EFAULT;
4882                 if (copy_from_user(&psw, argp, sizeof(psw)))
4883                         break;
4884                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4885                 break;
4886         }
4887         case KVM_S390_CLEAR_RESET:
4888                 r = 0;
4889                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4890                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4891                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4892                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4893                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4894                                    rc, rrc);
4895                 }
4896                 break;
4897         case KVM_S390_INITIAL_RESET:
4898                 r = 0;
4899                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4900                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4901                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4902                                           UVC_CMD_CPU_RESET_INITIAL,
4903                                           &rc, &rrc);
4904                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4905                                    rc, rrc);
4906                 }
4907                 break;
4908         case KVM_S390_NORMAL_RESET:
4909                 r = 0;
4910                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4911                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4912                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4913                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4914                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4915                                    rc, rrc);
4916                 }
4917                 break;
4918         case KVM_SET_ONE_REG:
4919         case KVM_GET_ONE_REG: {
4920                 struct kvm_one_reg reg;
4921                 r = -EINVAL;
4922                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4923                         break;
4924                 r = -EFAULT;
4925                 if (copy_from_user(&reg, argp, sizeof(reg)))
4926                         break;
4927                 if (ioctl == KVM_SET_ONE_REG)
4928                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4929                 else
4930                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4931                 break;
4932         }
4933 #ifdef CONFIG_KVM_S390_UCONTROL
4934         case KVM_S390_UCAS_MAP: {
4935                 struct kvm_s390_ucas_mapping ucasmap;
4936
4937                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4938                         r = -EFAULT;
4939                         break;
4940                 }
4941
4942                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4943                         r = -EINVAL;
4944                         break;
4945                 }
4946
4947                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4948                                      ucasmap.vcpu_addr, ucasmap.length);
4949                 break;
4950         }
4951         case KVM_S390_UCAS_UNMAP: {
4952                 struct kvm_s390_ucas_mapping ucasmap;
4953
4954                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4955                         r = -EFAULT;
4956                         break;
4957                 }
4958
4959                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4960                         r = -EINVAL;
4961                         break;
4962                 }
4963
4964                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4965                         ucasmap.length);
4966                 break;
4967         }
4968 #endif
4969         case KVM_S390_VCPU_FAULT: {
4970                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4971                 break;
4972         }
4973         case KVM_ENABLE_CAP:
4974         {
4975                 struct kvm_enable_cap cap;
4976                 r = -EFAULT;
4977                 if (copy_from_user(&cap, argp, sizeof(cap)))
4978                         break;
4979                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4980                 break;
4981         }
4982         case KVM_S390_MEM_OP: {
4983                 struct kvm_s390_mem_op mem_op;
4984
4985                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4986                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4987                 else
4988                         r = -EFAULT;
4989                 break;
4990         }
4991         case KVM_S390_SET_IRQ_STATE: {
4992                 struct kvm_s390_irq_state irq_state;
4993
4994                 r = -EFAULT;
4995                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4996                         break;
4997                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4998                     irq_state.len == 0 ||
4999                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5000                         r = -EINVAL;
5001                         break;
5002                 }
5003                 /* do not use irq_state.flags, it will break old QEMUs */
5004                 r = kvm_s390_set_irq_state(vcpu,
5005                                            (void __user *) irq_state.buf,
5006                                            irq_state.len);
5007                 break;
5008         }
5009         case KVM_S390_GET_IRQ_STATE: {
5010                 struct kvm_s390_irq_state irq_state;
5011
5012                 r = -EFAULT;
5013                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5014                         break;
5015                 if (irq_state.len == 0) {
5016                         r = -EINVAL;
5017                         break;
5018                 }
5019                 /* do not use irq_state.flags, it will break old QEMUs */
5020                 r = kvm_s390_get_irq_state(vcpu,
5021                                            (__u8 __user *)  irq_state.buf,
5022                                            irq_state.len);
5023                 break;
5024         }
5025         default:
5026                 r = -ENOTTY;
5027         }
5028
5029         vcpu_put(vcpu);
5030         return r;
5031 }
5032
5033 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5034 {
5035 #ifdef CONFIG_KVM_S390_UCONTROL
5036         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5037                  && (kvm_is_ucontrol(vcpu->kvm))) {
5038                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5039                 get_page(vmf->page);
5040                 return 0;
5041         }
5042 #endif
5043         return VM_FAULT_SIGBUS;
5044 }
5045
5046 /* Section: memory related */
5047 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5048                                    struct kvm_memory_slot *memslot,
5049                                    const struct kvm_userspace_memory_region *mem,
5050                                    enum kvm_mr_change change)
5051 {
5052         /* A few sanity checks. We can have memory slots which have to be
5053            located/ended at a segment boundary (1MB). The memory in userland is
5054            ok to be fragmented into various different vmas. It is okay to mmap()
5055            and munmap() stuff in this slot after doing this call at any time */
5056
5057         if (mem->userspace_addr & 0xffffful)
5058                 return -EINVAL;
5059
5060         if (mem->memory_size & 0xffffful)
5061                 return -EINVAL;
5062
5063         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5064                 return -EINVAL;
5065
5066         /* When we are protected, we should not change the memory slots */
5067         if (kvm_s390_pv_get_handle(kvm))
5068                 return -EINVAL;
5069         return 0;
5070 }
5071
5072 void kvm_arch_commit_memory_region(struct kvm *kvm,
5073                                 const struct kvm_userspace_memory_region *mem,
5074                                 struct kvm_memory_slot *old,
5075                                 const struct kvm_memory_slot *new,
5076                                 enum kvm_mr_change change)
5077 {
5078         int rc = 0;
5079
5080         switch (change) {
5081         case KVM_MR_DELETE:
5082                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5083                                         old->npages * PAGE_SIZE);
5084                 break;
5085         case KVM_MR_MOVE:
5086                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5087                                         old->npages * PAGE_SIZE);
5088                 if (rc)
5089                         break;
5090                 fallthrough;
5091         case KVM_MR_CREATE:
5092                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5093                                       mem->guest_phys_addr, mem->memory_size);
5094                 break;
5095         case KVM_MR_FLAGS_ONLY:
5096                 break;
5097         default:
5098                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5099         }
5100         if (rc)
5101                 pr_warn("failed to commit memory region\n");
5102         return;
5103 }
5104
5105 static inline unsigned long nonhyp_mask(int i)
5106 {
5107         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5108
5109         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5110 }
5111
5112 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5113 {
5114         vcpu->valid_wakeup = false;
5115 }
5116
5117 static int __init kvm_s390_init(void)
5118 {
5119         int i;
5120
5121         if (!sclp.has_sief2) {
5122                 pr_info("SIE is not available\n");
5123                 return -ENODEV;
5124         }
5125
5126         if (nested && hpage) {
5127                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5128                 return -EINVAL;
5129         }
5130
5131         for (i = 0; i < 16; i++)
5132                 kvm_s390_fac_base[i] |=
5133                         stfle_fac_list[i] & nonhyp_mask(i);
5134
5135         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5136 }
5137
5138 static void __exit kvm_s390_exit(void)
5139 {
5140         kvm_exit();
5141 }
5142
5143 module_init(kvm_s390_init);
5144 module_exit(kvm_s390_exit);
5145
5146 /*
5147  * Enable autoloading of the kvm module.
5148  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5149  * since x86 takes a different approach.
5150  */
5151 #include <linux/miscdevice.h>
5152 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5153 MODULE_ALIAS("devname:kvm");