arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         int i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 break;
 589         case KVM_CAP_S390_COW:
 590                 r = MACHINE_HAS_ESOP;
 591                 break;
 592         case KVM_CAP_S390_VECTOR_REGISTERS:
 593                 r = MACHINE_HAS_VX;
 594                 break;
 595         case KVM_CAP_S390_RI:
 596                 r = test_facility(64);
 597                 break;
 598         case KVM_CAP_S390_GS:
 599                 r = test_facility(133);
 600                 break;
 601         case KVM_CAP_S390_BPB:
 602                 r = test_facility(82);
 603                 break;
 604         case KVM_CAP_S390_PROTECTED:
 605                 r = is_prot_virt_host();
 606                 break;
 607         default:
 608                 r = 0;
 609         }
 610         return r;
 611 }
 612
 613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 614 {
 615         int i;
 616         gfn_t cur_gfn, last_gfn;
 617         unsigned long gaddr, vmaddr;
 618         struct gmap *gmap = kvm->arch.gmap;
 619         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 620
 621         /* Loop over all guest segments */
 622         cur_gfn = memslot->base_gfn;
 623         last_gfn = memslot->base_gfn + memslot->npages;
 624         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 625                 gaddr = gfn_to_gpa(cur_gfn);
 626                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 627                 if (kvm_is_error_hva(vmaddr))
 628                         continue;
 629
 630                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 631                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 632                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 633                         if (test_bit(i, bitmap))
 634                                 mark_page_dirty(kvm, cur_gfn + i);
 635                 }
 636
 637                 if (fatal_signal_pending(current))
 638                         return;
 639                 cond_resched();
 640         }
 641 }
 642
 643 /* Section: vm related */
 644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 645
 646 /*
 647  * Get (and clear) the dirty memory log for a memory slot.
 648  */
 649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 650                                struct kvm_dirty_log *log)
 651 {
 652         int r;
 653         unsigned long n;
 654         struct kvm_memory_slot *memslot;
 655         int is_dirty;
 656
 657         if (kvm_is_ucontrol(kvm))
 658                 return -EINVAL;
 659
 660         mutex_lock(&kvm->slots_lock);
 661
 662         r = -EINVAL;
 663         if (log->slot >= KVM_USER_MEM_SLOTS)
 664                 goto out;
 665
 666         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 667         if (r)
 668                 goto out;
 669
 670         /* Clear the dirty log */
 671         if (is_dirty) {
 672                 n = kvm_dirty_bitmap_bytes(memslot);
 673                 memset(memslot->dirty_bitmap, 0, n);
 674         }
 675         r = 0;
 676 out:
 677         mutex_unlock(&kvm->slots_lock);
 678         return r;
 679 }
 680
 681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 682 {
 683         unsigned int i;
 684         struct kvm_vcpu *vcpu;
 685
 686         kvm_for_each_vcpu(i, vcpu, kvm) {
 687                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 688         }
 689 }
 690
 691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 692 {
 693         int r;
 694
 695         if (cap->flags)
 696                 return -EINVAL;
 697
 698         switch (cap->cap) {
 699         case KVM_CAP_S390_IRQCHIP:
 700                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 701                 kvm->arch.use_irqchip = 1;
 702                 r = 0;
 703                 break;
 704         case KVM_CAP_S390_USER_SIGP:
 705                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 706                 kvm->arch.user_sigp = 1;
 707                 r = 0;
 708                 break;
 709         case KVM_CAP_S390_VECTOR_REGISTERS:
 710                 mutex_lock(&kvm->lock);
 711                 if (kvm->created_vcpus) {
 712                         r = -EBUSY;
 713                 } else if (MACHINE_HAS_VX) {
 714                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 715                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 716                         if (test_facility(134)) {
 717                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 718                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 719                         }
 720                         if (test_facility(135)) {
 721                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 722                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 723                         }
 724                         if (test_facility(148)) {
 725                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 726                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 727                         }
 728                         if (test_facility(152)) {
 729                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 730                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 731                         }
 732                         if (test_facility(192)) {
 733                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 734                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 735                         }
 736                         r = 0;
 737                 } else
 738                         r = -EINVAL;
 739                 mutex_unlock(&kvm->lock);
 740                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 741                          r ? "(not available)" : "(success)");
 742                 break;
 743         case KVM_CAP_S390_RI:
 744                 r = -EINVAL;
 745                 mutex_lock(&kvm->lock);
 746                 if (kvm->created_vcpus) {
 747                         r = -EBUSY;
 748                 } else if (test_facility(64)) {
 749                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 750                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 751                         r = 0;
 752                 }
 753                 mutex_unlock(&kvm->lock);
 754                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 755                          r ? "(not available)" : "(success)");
 756                 break;
 757         case KVM_CAP_S390_AIS:
 758                 mutex_lock(&kvm->lock);
 759                 if (kvm->created_vcpus) {
 760                         r = -EBUSY;
 761                 } else {
 762                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 763                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 764                         r = 0;
 765                 }
 766                 mutex_unlock(&kvm->lock);
 767                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 768                          r ? "(not available)" : "(success)");
 769                 break;
 770         case KVM_CAP_S390_GS:
 771                 r = -EINVAL;
 772                 mutex_lock(&kvm->lock);
 773                 if (kvm->created_vcpus) {
 774                         r = -EBUSY;
 775                 } else if (test_facility(133)) {
 776                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 777                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 778                         r = 0;
 779                 }
 780                 mutex_unlock(&kvm->lock);
 781                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 782                          r ? "(not available)" : "(success)");
 783                 break;
 784         case KVM_CAP_S390_HPAGE_1M:
 785                 mutex_lock(&kvm->lock);
 786                 if (kvm->created_vcpus)
 787                         r = -EBUSY;
 788                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 789                         r = -EINVAL;
 790                 else {
 791                         r = 0;
 792                         mmap_write_lock(kvm->mm);
 793                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 794                         mmap_write_unlock(kvm->mm);
 795                         /*
 796                          * We might have to create fake 4k page
 797                          * tables. To avoid that the hardware works on
 798                          * stale PGSTEs, we emulate these instructions.
 799                          */
 800                         kvm->arch.use_skf = 0;
 801                         kvm->arch.use_pfmfi = 0;
 802                 }
 803                 mutex_unlock(&kvm->lock);
 804                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 805                          r ? "(not available)" : "(success)");
 806                 break;
 807         case KVM_CAP_S390_USER_STSI:
 808                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 809                 kvm->arch.user_stsi = 1;
 810                 r = 0;
 811                 break;
 812         case KVM_CAP_S390_USER_INSTR0:
 813                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 814                 kvm->arch.user_instr0 = 1;
 815                 icpt_operexc_on_all_vcpus(kvm);
 816                 r = 0;
 817                 break;
 818         default:
 819                 r = -EINVAL;
 820                 break;
 821         }
 822         return r;
 823 }
 824
 825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 826 {
 827         int ret;
 828
 829         switch (attr->attr) {
 830         case KVM_S390_VM_MEM_LIMIT_SIZE:
 831                 ret = 0;
 832                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 833                          kvm->arch.mem_limit);
 834                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 835                         ret = -EFAULT;
 836                 break;
 837         default:
 838                 ret = -ENXIO;
 839                 break;
 840         }
 841         return ret;
 842 }
 843
 844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 845 {
 846         int ret;
 847         unsigned int idx;
 848         switch (attr->attr) {
 849         case KVM_S390_VM_MEM_ENABLE_CMMA:
 850                 ret = -ENXIO;
 851                 if (!sclp.has_cmma)
 852                         break;
 853
 854                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 855                 mutex_lock(&kvm->lock);
 856                 if (kvm->created_vcpus)
 857                         ret = -EBUSY;
 858                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 859                         ret = -EINVAL;
 860                 else {
 861                         kvm->arch.use_cmma = 1;
 862                         /* Not compatible with cmma. */
 863                         kvm->arch.use_pfmfi = 0;
 864                         ret = 0;
 865                 }
 866                 mutex_unlock(&kvm->lock);
 867                 break;
 868         case KVM_S390_VM_MEM_CLR_CMMA:
 869                 ret = -ENXIO;
 870                 if (!sclp.has_cmma)
 871                         break;
 872                 ret = -EINVAL;
 873                 if (!kvm->arch.use_cmma)
 874                         break;
 875
 876                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 877                 mutex_lock(&kvm->lock);
 878                 idx = srcu_read_lock(&kvm->srcu);
 879                 s390_reset_cmma(kvm->arch.gmap->mm);
 880                 srcu_read_unlock(&kvm->srcu, idx);
 881                 mutex_unlock(&kvm->lock);
 882                 ret = 0;
 883                 break;
 884         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 885                 unsigned long new_limit;
 886
 887                 if (kvm_is_ucontrol(kvm))
 888                         return -EINVAL;
 889
 890                 if (get_user(new_limit, (u64 __user *)attr->addr))
 891                         return -EFAULT;
 892
 893                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 894                     new_limit > kvm->arch.mem_limit)
 895                         return -E2BIG;
 896
 897                 if (!new_limit)
 898                         return -EINVAL;
 899
 900                 /* gmap_create takes last usable address */
 901                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 902                         new_limit -= 1;
 903
 904                 ret = -EBUSY;
 905                 mutex_lock(&kvm->lock);
 906                 if (!kvm->created_vcpus) {
 907                         /* gmap_create will round the limit up */
 908                         struct gmap *new = gmap_create(current->mm, new_limit);
 909
 910                         if (!new) {
 911                                 ret = -ENOMEM;
 912                         } else {
 913                                 gmap_remove(kvm->arch.gmap);
 914                                 new->private = kvm;
 915                                 kvm->arch.gmap = new;
 916                                 ret = 0;
 917                         }
 918                 }
 919                 mutex_unlock(&kvm->lock);
 920                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 921                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 922                          (void *) kvm->arch.gmap->asce);
 923                 break;
 924         }
 925         default:
 926                 ret = -ENXIO;
 927                 break;
 928         }
 929         return ret;
 930 }
 931
 932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 933
 934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 935 {
 936         struct kvm_vcpu *vcpu;
 937         int i;
 938
 939         kvm_s390_vcpu_block_all(kvm);
 940
 941         kvm_for_each_vcpu(i, vcpu, kvm) {
 942                 kvm_s390_vcpu_crypto_setup(vcpu);
 943                 /* recreate the shadow crycb by leaving the VSIE handler */
 944                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 945         }
 946
 947         kvm_s390_vcpu_unblock_all(kvm);
 948 }
 949
 950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 951 {
 952         mutex_lock(&kvm->lock);
 953         switch (attr->attr) {
 954         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 955                 if (!test_kvm_facility(kvm, 76)) {
 956                         mutex_unlock(&kvm->lock);
 957                         return -EINVAL;
 958                 }
 959                 get_random_bytes(
 960                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 961                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 962                 kvm->arch.crypto.aes_kw = 1;
 963                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 964                 break;
 965         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 966                 if (!test_kvm_facility(kvm, 76)) {
 967                         mutex_unlock(&kvm->lock);
 968                         return -EINVAL;
 969                 }
 970                 get_random_bytes(
 971                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 972                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 973                 kvm->arch.crypto.dea_kw = 1;
 974                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 975                 break;
 976         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 977                 if (!test_kvm_facility(kvm, 76)) {
 978                         mutex_unlock(&kvm->lock);
 979                         return -EINVAL;
 980                 }
 981                 kvm->arch.crypto.aes_kw = 0;
 982                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 983                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 984                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 985                 break;
 986         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 987                 if (!test_kvm_facility(kvm, 76)) {
 988                         mutex_unlock(&kvm->lock);
 989                         return -EINVAL;
 990                 }
 991                 kvm->arch.crypto.dea_kw = 0;
 992                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 993                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 994                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 995                 break;
 996         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 997                 if (!ap_instructions_available()) {
 998                         mutex_unlock(&kvm->lock);
 999                         return -EOPNOTSUPP;
1000                 }
1001                 kvm->arch.crypto.apie = 1;
1002                 break;
1003         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004                 if (!ap_instructions_available()) {
1005                         mutex_unlock(&kvm->lock);
1006                         return -EOPNOTSUPP;
1007                 }
1008                 kvm->arch.crypto.apie = 0;
1009                 break;
1010         default:
1011                 mutex_unlock(&kvm->lock);
1012                 return -ENXIO;
1013         }
1014
1015         kvm_s390_vcpu_crypto_reset_all(kvm);
1016         mutex_unlock(&kvm->lock);
1017         return 0;
1018 }
1019
1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022         int cx;
1023         struct kvm_vcpu *vcpu;
1024
1025         kvm_for_each_vcpu(cx, vcpu, kvm)
1026                 kvm_s390_sync_request(req, vcpu);
1027 }
1028
1029 /*
1030  * Must be called with kvm->srcu held to avoid races on memslots, and with
1031  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032  */
1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035         struct kvm_memory_slot *ms;
1036         struct kvm_memslots *slots;
1037         unsigned long ram_pages = 0;
1038         int slotnr;
1039
1040         /* migration mode already enabled */
1041         if (kvm->arch.migration_mode)
1042                 return 0;
1043         slots = kvm_memslots(kvm);
1044         if (!slots || !slots->used_slots)
1045                 return -EINVAL;
1046
1047         if (!kvm->arch.use_cmma) {
1048                 kvm->arch.migration_mode = 1;
1049                 return 0;
1050         }
1051         /* mark all the pages in active slots as dirty */
1052         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053                 ms = slots->memslots + slotnr;
1054                 if (!ms->dirty_bitmap)
1055                         return -EINVAL;
1056                 /*
1057                  * The second half of the bitmap is only used on x86,
1058                  * and would be wasted otherwise, so we put it to good
1059                  * use here to keep track of the state of the storage
1060                  * attributes.
1061                  */
1062                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063                 ram_pages += ms->npages;
1064         }
1065         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066         kvm->arch.migration_mode = 1;
1067         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068         return 0;
1069 }
1070
1071 /*
1072  * Must be called with kvm->slots_lock to avoid races with ourselves and
1073  * kvm_s390_vm_start_migration.
1074  */
1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077         /* migration mode already disabled */
1078         if (!kvm->arch.migration_mode)
1079                 return 0;
1080         kvm->arch.migration_mode = 0;
1081         if (kvm->arch.use_cmma)
1082                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083         return 0;
1084 }
1085
1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087                                      struct kvm_device_attr *attr)
1088 {
1089         int res = -ENXIO;
1090
1091         mutex_lock(&kvm->slots_lock);
1092         switch (attr->attr) {
1093         case KVM_S390_VM_MIGRATION_START:
1094                 res = kvm_s390_vm_start_migration(kvm);
1095                 break;
1096         case KVM_S390_VM_MIGRATION_STOP:
1097                 res = kvm_s390_vm_stop_migration(kvm);
1098                 break;
1099         default:
1100                 break;
1101         }
1102         mutex_unlock(&kvm->slots_lock);
1103
1104         return res;
1105 }
1106
1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108                                      struct kvm_device_attr *attr)
1109 {
1110         u64 mig = kvm->arch.migration_mode;
1111
1112         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113                 return -ENXIO;
1114
1115         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116                 return -EFAULT;
1117         return 0;
1118 }
1119
1120 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         struct kvm_s390_vm_tod_clock gtod;
1123
1124         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1125                 return -EFAULT;
1126
1127         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1128                 return -EINVAL;
1129         kvm_s390_set_tod_clock(kvm, &gtod);
1130
1131         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1132                 gtod.epoch_idx, gtod.tod);
1133
1134         return 0;
1135 }
1136
1137 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139         u8 gtod_high;
1140
1141         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1142                                            sizeof(gtod_high)))
1143                 return -EFAULT;
1144
1145         if (gtod_high != 0)
1146                 return -EINVAL;
1147         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1148
1149         return 0;
1150 }
1151
1152 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1153 {
1154         struct kvm_s390_vm_tod_clock gtod = { 0 };
1155
1156         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1157                            sizeof(gtod.tod)))
1158                 return -EFAULT;
1159
1160         kvm_s390_set_tod_clock(kvm, &gtod);
1161         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1162         return 0;
1163 }
1164
1165 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1166 {
1167         int ret;
1168
1169         if (attr->flags)
1170                 return -EINVAL;
1171
1172         switch (attr->attr) {
1173         case KVM_S390_VM_TOD_EXT:
1174                 ret = kvm_s390_set_tod_ext(kvm, attr);
1175                 break;
1176         case KVM_S390_VM_TOD_HIGH:
1177                 ret = kvm_s390_set_tod_high(kvm, attr);
1178                 break;
1179         case KVM_S390_VM_TOD_LOW:
1180                 ret = kvm_s390_set_tod_low(kvm, attr);
1181                 break;
1182         default:
1183                 ret = -ENXIO;
1184                 break;
1185         }
1186         return ret;
1187 }
1188
1189 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1190                                    struct kvm_s390_vm_tod_clock *gtod)
1191 {
1192         union tod_clock clk;
1193
1194         preempt_disable();
1195
1196         store_tod_clock_ext(&clk);
1197
1198         gtod->tod = clk.tod + kvm->arch.epoch;
1199         gtod->epoch_idx = 0;
1200         if (test_kvm_facility(kvm, 139)) {
1201                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1202                 if (gtod->tod < clk.tod)
1203                         gtod->epoch_idx += 1;
1204         }
1205
1206         preempt_enable();
1207 }
1208
1209 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211         struct kvm_s390_vm_tod_clock gtod;
1212
1213         memset(&gtod, 0, sizeof(gtod));
1214         kvm_s390_get_tod_clock(kvm, &gtod);
1215         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216                 return -EFAULT;
1217
1218         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1219                 gtod.epoch_idx, gtod.tod);
1220         return 0;
1221 }
1222
1223 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225         u8 gtod_high = 0;
1226
1227         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1228                                          sizeof(gtod_high)))
1229                 return -EFAULT;
1230         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1231
1232         return 0;
1233 }
1234
1235 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         u64 gtod;
1238
1239         gtod = kvm_s390_get_tod_clock_fast(kvm);
1240         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1241                 return -EFAULT;
1242         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1243
1244         return 0;
1245 }
1246
1247 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1248 {
1249         int ret;
1250
1251         if (attr->flags)
1252                 return -EINVAL;
1253
1254         switch (attr->attr) {
1255         case KVM_S390_VM_TOD_EXT:
1256                 ret = kvm_s390_get_tod_ext(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_TOD_HIGH:
1259                 ret = kvm_s390_get_tod_high(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_TOD_LOW:
1262                 ret = kvm_s390_get_tod_low(kvm, attr);
1263                 break;
1264         default:
1265                 ret = -ENXIO;
1266                 break;
1267         }
1268         return ret;
1269 }
1270
1271 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273         struct kvm_s390_vm_cpu_processor *proc;
1274         u16 lowest_ibc, unblocked_ibc;
1275         int ret = 0;
1276
1277         mutex_lock(&kvm->lock);
1278         if (kvm->created_vcpus) {
1279                 ret = -EBUSY;
1280                 goto out;
1281         }
1282         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1283         if (!proc) {
1284                 ret = -ENOMEM;
1285                 goto out;
1286         }
1287         if (!copy_from_user(proc, (void __user *)attr->addr,
1288                             sizeof(*proc))) {
1289                 kvm->arch.model.cpuid = proc->cpuid;
1290                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1291                 unblocked_ibc = sclp.ibc & 0xfff;
1292                 if (lowest_ibc && proc->ibc) {
1293                         if (proc->ibc > unblocked_ibc)
1294                                 kvm->arch.model.ibc = unblocked_ibc;
1295                         else if (proc->ibc < lowest_ibc)
1296                                 kvm->arch.model.ibc = lowest_ibc;
1297                         else
1298                                 kvm->arch.model.ibc = proc->ibc;
1299                 }
1300                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1301                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1302                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303                          kvm->arch.model.ibc,
1304                          kvm->arch.model.cpuid);
1305                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306                          kvm->arch.model.fac_list[0],
1307                          kvm->arch.model.fac_list[1],
1308                          kvm->arch.model.fac_list[2]);
1309         } else
1310                 ret = -EFAULT;
1311         kfree(proc);
1312 out:
1313         mutex_unlock(&kvm->lock);
1314         return ret;
1315 }
1316
1317 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1318                                        struct kvm_device_attr *attr)
1319 {
1320         struct kvm_s390_vm_cpu_feat data;
1321
1322         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1323                 return -EFAULT;
1324         if (!bitmap_subset((unsigned long *) data.feat,
1325                            kvm_s390_available_cpu_feat,
1326                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1327                 return -EINVAL;
1328
1329         mutex_lock(&kvm->lock);
1330         if (kvm->created_vcpus) {
1331                 mutex_unlock(&kvm->lock);
1332                 return -EBUSY;
1333         }
1334         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1335                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1336         mutex_unlock(&kvm->lock);
1337         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1338                          data.feat[0],
1339                          data.feat[1],
1340                          data.feat[2]);
1341         return 0;
1342 }
1343
1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345                                           struct kvm_device_attr *attr)
1346 {
1347         mutex_lock(&kvm->lock);
1348         if (kvm->created_vcpus) {
1349                 mutex_unlock(&kvm->lock);
1350                 return -EBUSY;
1351         }
1352
1353         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355                 mutex_unlock(&kvm->lock);
1356                 return -EFAULT;
1357         }
1358         mutex_unlock(&kvm->lock);
1359
1360         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1399                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1402                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1405                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1417
1418         return 0;
1419 }
1420
1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1422 {
1423         int ret = -ENXIO;
1424
1425         switch (attr->attr) {
1426         case KVM_S390_VM_CPU_PROCESSOR:
1427                 ret = kvm_s390_set_processor(kvm, attr);
1428                 break;
1429         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430                 ret = kvm_s390_set_processor_feat(kvm, attr);
1431                 break;
1432         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1434                 break;
1435         }
1436         return ret;
1437 }
1438
1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1440 {
1441         struct kvm_s390_vm_cpu_processor *proc;
1442         int ret = 0;
1443
1444         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1445         if (!proc) {
1446                 ret = -ENOMEM;
1447                 goto out;
1448         }
1449         proc->cpuid = kvm->arch.model.cpuid;
1450         proc->ibc = kvm->arch.model.ibc;
1451         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452                S390_ARCH_FAC_LIST_SIZE_BYTE);
1453         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454                  kvm->arch.model.ibc,
1455                  kvm->arch.model.cpuid);
1456         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457                  kvm->arch.model.fac_list[0],
1458                  kvm->arch.model.fac_list[1],
1459                  kvm->arch.model.fac_list[2]);
1460         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1461                 ret = -EFAULT;
1462         kfree(proc);
1463 out:
1464         return ret;
1465 }
1466
1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1468 {
1469         struct kvm_s390_vm_cpu_machine *mach;
1470         int ret = 0;
1471
1472         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1473         if (!mach) {
1474                 ret = -ENOMEM;
1475                 goto out;
1476         }
1477         get_cpu_id((struct cpuid *) &mach->cpuid);
1478         mach->ibc = sclp.ibc;
1479         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480                S390_ARCH_FAC_LIST_SIZE_BYTE);
1481         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482                sizeof(stfle_fac_list));
1483         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1484                  kvm->arch.model.ibc,
1485                  kvm->arch.model.cpuid);
1486         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1487                  mach->fac_mask[0],
1488                  mach->fac_mask[1],
1489                  mach->fac_mask[2]);
1490         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1491                  mach->fac_list[0],
1492                  mach->fac_list[1],
1493                  mach->fac_list[2]);
1494         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1495                 ret = -EFAULT;
1496         kfree(mach);
1497 out:
1498         return ret;
1499 }
1500
1501 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502                                        struct kvm_device_attr *attr)
1503 {
1504         struct kvm_s390_vm_cpu_feat data;
1505
1506         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1507                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1508         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1509                 return -EFAULT;
1510         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1511                          data.feat[0],
1512                          data.feat[1],
1513                          data.feat[2]);
1514         return 0;
1515 }
1516
1517 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1518                                      struct kvm_device_attr *attr)
1519 {
1520         struct kvm_s390_vm_cpu_feat data;
1521
1522         bitmap_copy((unsigned long *) data.feat,
1523                     kvm_s390_available_cpu_feat,
1524                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1525         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1526                 return -EFAULT;
1527         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1528                          data.feat[0],
1529                          data.feat[1],
1530                          data.feat[2]);
1531         return 0;
1532 }
1533
1534 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1535                                           struct kvm_device_attr *attr)
1536 {
1537         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1538             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1539                 return -EFAULT;
1540
1541         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1546         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1555         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1558         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1561         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1564         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1567         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1570         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1573         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1576         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1579         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1582         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1585         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1588         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1593         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1598
1599         return 0;
1600 }
1601
1602 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1603                                         struct kvm_device_attr *attr)
1604 {
1605         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1606             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1607                 return -EFAULT;
1608
1609         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1614         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1623         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1624                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1626         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1629         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1630                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1631                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1632         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1633                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1635         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1636                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1638         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1639                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1641         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1642                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1644         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1645                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1646                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1647         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1648                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1649                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1650         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1651                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1653         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1654                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1656         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1657                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1658                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1661         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1662                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1663                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1666
1667         return 0;
1668 }
1669
1670 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1671 {
1672         int ret = -ENXIO;
1673
1674         switch (attr->attr) {
1675         case KVM_S390_VM_CPU_PROCESSOR:
1676                 ret = kvm_s390_get_processor(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_CPU_MACHINE:
1679                 ret = kvm_s390_get_machine(kvm, attr);
1680                 break;
1681         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1682                 ret = kvm_s390_get_processor_feat(kvm, attr);
1683                 break;
1684         case KVM_S390_VM_CPU_MACHINE_FEAT:
1685                 ret = kvm_s390_get_machine_feat(kvm, attr);
1686                 break;
1687         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1688                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1689                 break;
1690         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1691                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1692                 break;
1693         }
1694         return ret;
1695 }
1696
1697 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1698 {
1699         int ret;
1700
1701         switch (attr->group) {
1702         case KVM_S390_VM_MEM_CTRL:
1703                 ret = kvm_s390_set_mem_control(kvm, attr);
1704                 break;
1705         case KVM_S390_VM_TOD:
1706                 ret = kvm_s390_set_tod(kvm, attr);
1707                 break;
1708         case KVM_S390_VM_CPU_MODEL:
1709                 ret = kvm_s390_set_cpu_model(kvm, attr);
1710                 break;
1711         case KVM_S390_VM_CRYPTO:
1712                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1713                 break;
1714         case KVM_S390_VM_MIGRATION:
1715                 ret = kvm_s390_vm_set_migration(kvm, attr);
1716                 break;
1717         default:
1718                 ret = -ENXIO;
1719                 break;
1720         }
1721
1722         return ret;
1723 }
1724
1725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727         int ret;
1728
1729         switch (attr->group) {
1730         case KVM_S390_VM_MEM_CTRL:
1731                 ret = kvm_s390_get_mem_control(kvm, attr);
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 ret = kvm_s390_get_tod(kvm, attr);
1735                 break;
1736         case KVM_S390_VM_CPU_MODEL:
1737                 ret = kvm_s390_get_cpu_model(kvm, attr);
1738                 break;
1739         case KVM_S390_VM_MIGRATION:
1740                 ret = kvm_s390_vm_get_migration(kvm, attr);
1741                 break;
1742         default:
1743                 ret = -ENXIO;
1744                 break;
1745         }
1746
1747         return ret;
1748 }
1749
1750 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1751 {
1752         int ret;
1753
1754         switch (attr->group) {
1755         case KVM_S390_VM_MEM_CTRL:
1756                 switch (attr->attr) {
1757                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1758                 case KVM_S390_VM_MEM_CLR_CMMA:
1759                         ret = sclp.has_cmma ? 0 : -ENXIO;
1760                         break;
1761                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1762                         ret = 0;
1763                         break;
1764                 default:
1765                         ret = -ENXIO;
1766                         break;
1767                 }
1768                 break;
1769         case KVM_S390_VM_TOD:
1770                 switch (attr->attr) {
1771                 case KVM_S390_VM_TOD_LOW:
1772                 case KVM_S390_VM_TOD_HIGH:
1773                         ret = 0;
1774                         break;
1775                 default:
1776                         ret = -ENXIO;
1777                         break;
1778                 }
1779                 break;
1780         case KVM_S390_VM_CPU_MODEL:
1781                 switch (attr->attr) {
1782                 case KVM_S390_VM_CPU_PROCESSOR:
1783                 case KVM_S390_VM_CPU_MACHINE:
1784                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1785                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1786                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1787                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1788                         ret = 0;
1789                         break;
1790                 default:
1791                         ret = -ENXIO;
1792                         break;
1793                 }
1794                 break;
1795         case KVM_S390_VM_CRYPTO:
1796                 switch (attr->attr) {
1797                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1798                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1799                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1800                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1801                         ret = 0;
1802                         break;
1803                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1804                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1805                         ret = ap_instructions_available() ? 0 : -ENXIO;
1806                         break;
1807                 default:
1808                         ret = -ENXIO;
1809                         break;
1810                 }
1811                 break;
1812         case KVM_S390_VM_MIGRATION:
1813                 ret = 0;
1814                 break;
1815         default:
1816                 ret = -ENXIO;
1817                 break;
1818         }
1819
1820         return ret;
1821 }
1822
1823 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1824 {
1825         uint8_t *keys;
1826         uint64_t hva;
1827         int srcu_idx, i, r = 0;
1828
1829         if (args->flags != 0)
1830                 return -EINVAL;
1831
1832         /* Is this guest using storage keys? */
1833         if (!mm_uses_skeys(current->mm))
1834                 return KVM_S390_GET_SKEYS_NONE;
1835
1836         /* Enforce sane limit on memory allocation */
1837         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1838                 return -EINVAL;
1839
1840         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1841         if (!keys)
1842                 return -ENOMEM;
1843
1844         mmap_read_lock(current->mm);
1845         srcu_idx = srcu_read_lock(&kvm->srcu);
1846         for (i = 0; i < args->count; i++) {
1847                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1848                 if (kvm_is_error_hva(hva)) {
1849                         r = -EFAULT;
1850                         break;
1851                 }
1852
1853                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1854                 if (r)
1855                         break;
1856         }
1857         srcu_read_unlock(&kvm->srcu, srcu_idx);
1858         mmap_read_unlock(current->mm);
1859
1860         if (!r) {
1861                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1862                                  sizeof(uint8_t) * args->count);
1863                 if (r)
1864                         r = -EFAULT;
1865         }
1866
1867         kvfree(keys);
1868         return r;
1869 }
1870
1871 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1872 {
1873         uint8_t *keys;
1874         uint64_t hva;
1875         int srcu_idx, i, r = 0;
1876         bool unlocked;
1877
1878         if (args->flags != 0)
1879                 return -EINVAL;
1880
1881         /* Enforce sane limit on memory allocation */
1882         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1883                 return -EINVAL;
1884
1885         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1886         if (!keys)
1887                 return -ENOMEM;
1888
1889         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1890                            sizeof(uint8_t) * args->count);
1891         if (r) {
1892                 r = -EFAULT;
1893                 goto out;
1894         }
1895
1896         /* Enable storage key handling for the guest */
1897         r = s390_enable_skey();
1898         if (r)
1899                 goto out;
1900
1901         i = 0;
1902         mmap_read_lock(current->mm);
1903         srcu_idx = srcu_read_lock(&kvm->srcu);
1904         while (i < args->count) {
1905                 unlocked = false;
1906                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1907                 if (kvm_is_error_hva(hva)) {
1908                         r = -EFAULT;
1909                         break;
1910                 }
1911
1912                 /* Lowest order bit is reserved */
1913                 if (keys[i] & 0x01) {
1914                         r = -EINVAL;
1915                         break;
1916                 }
1917
1918                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1919                 if (r) {
1920                         r = fixup_user_fault(current->mm, hva,
1921                                              FAULT_FLAG_WRITE, &unlocked);
1922                         if (r)
1923                                 break;
1924                 }
1925                 if (!r)
1926                         i++;
1927         }
1928         srcu_read_unlock(&kvm->srcu, srcu_idx);
1929         mmap_read_unlock(current->mm);
1930 out:
1931         kvfree(keys);
1932         return r;
1933 }
1934
1935 /*
1936  * Base address and length must be sent at the start of each block, therefore
1937  * it's cheaper to send some clean data, as long as it's less than the size of
1938  * two longs.
1939  */
1940 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1941 /* for consistency */
1942 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1943
1944 /*
1945  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1946  * address falls in a hole. In that case the index of one of the memslots
1947  * bordering the hole is returned.
1948  */
1949 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1950 {
1951         int start = 0, end = slots->used_slots;
1952         int slot = atomic_read(&slots->last_used_slot);
1953         struct kvm_memory_slot *memslots = slots->memslots;
1954
1955         if (gfn >= memslots[slot].base_gfn &&
1956             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1957                 return slot;
1958
1959         while (start < end) {
1960                 slot = start + (end - start) / 2;
1961
1962                 if (gfn >= memslots[slot].base_gfn)
1963                         end = slot;
1964                 else
1965                         start = slot + 1;
1966         }
1967
1968         if (start >= slots->used_slots)
1969                 return slots->used_slots - 1;
1970
1971         if (gfn >= memslots[start].base_gfn &&
1972             gfn < memslots[start].base_gfn + memslots[start].npages) {
1973                 atomic_set(&slots->last_used_slot, start);
1974         }
1975
1976         return start;
1977 }
1978
1979 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1980                               u8 *res, unsigned long bufsize)
1981 {
1982         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1983
1984         args->count = 0;
1985         while (args->count < bufsize) {
1986                 hva = gfn_to_hva(kvm, cur_gfn);
1987                 /*
1988                  * We return an error if the first value was invalid, but we
1989                  * return successfully if at least one value was copied.
1990                  */
1991                 if (kvm_is_error_hva(hva))
1992                         return args->count ? 0 : -EFAULT;
1993                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1994                         pgstev = 0;
1995                 res[args->count++] = (pgstev >> 24) & 0x43;
1996                 cur_gfn++;
1997         }
1998
1999         return 0;
2000 }
2001
2002 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2003                                               unsigned long cur_gfn)
2004 {
2005         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2006         struct kvm_memory_slot *ms = slots->memslots + slotidx;
2007         unsigned long ofs = cur_gfn - ms->base_gfn;
2008
2009         if (ms->base_gfn + ms->npages <= cur_gfn) {
2010                 slotidx--;
2011                 /* If we are above the highest slot, wrap around */
2012                 if (slotidx < 0)
2013                         slotidx = slots->used_slots - 1;
2014
2015                 ms = slots->memslots + slotidx;
2016                 ofs = 0;
2017         }
2018         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2019         while ((slotidx > 0) && (ofs >= ms->npages)) {
2020                 slotidx--;
2021                 ms = slots->memslots + slotidx;
2022                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2023         }
2024         return ms->base_gfn + ofs;
2025 }
2026
2027 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2028                              u8 *res, unsigned long bufsize)
2029 {
2030         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2031         struct kvm_memslots *slots = kvm_memslots(kvm);
2032         struct kvm_memory_slot *ms;
2033
2034         if (unlikely(!slots->used_slots))
2035                 return 0;
2036
2037         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2038         ms = gfn_to_memslot(kvm, cur_gfn);
2039         args->count = 0;
2040         args->start_gfn = cur_gfn;
2041         if (!ms)
2042                 return 0;
2043         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2044         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2045
2046         while (args->count < bufsize) {
2047                 hva = gfn_to_hva(kvm, cur_gfn);
2048                 if (kvm_is_error_hva(hva))
2049                         return 0;
2050                 /* Decrement only if we actually flipped the bit to 0 */
2051                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2052                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2053                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2054                         pgstev = 0;
2055                 /* Save the value */
2056                 res[args->count++] = (pgstev >> 24) & 0x43;
2057                 /* If the next bit is too far away, stop. */
2058                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2059                         return 0;
2060                 /* If we reached the previous "next", find the next one */
2061                 if (cur_gfn == next_gfn)
2062                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063                 /* Reached the end of memory or of the buffer, stop */
2064                 if ((next_gfn >= mem_end) ||
2065                     (next_gfn - args->start_gfn >= bufsize))
2066                         return 0;
2067                 cur_gfn++;
2068                 /* Reached the end of the current memslot, take the next one. */
2069                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2070                         ms = gfn_to_memslot(kvm, cur_gfn);
2071                         if (!ms)
2072                                 return 0;
2073                 }
2074         }
2075         return 0;
2076 }
2077
2078 /*
2079  * This function searches for the next page with dirty CMMA attributes, and
2080  * saves the attributes in the buffer up to either the end of the buffer or
2081  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2082  * no trailing clean bytes are saved.
2083  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2084  * output buffer will indicate 0 as length.
2085  */
2086 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2087                                   struct kvm_s390_cmma_log *args)
2088 {
2089         unsigned long bufsize;
2090         int srcu_idx, peek, ret;
2091         u8 *values;
2092
2093         if (!kvm->arch.use_cmma)
2094                 return -ENXIO;
2095         /* Invalid/unsupported flags were specified */
2096         if (args->flags & ~KVM_S390_CMMA_PEEK)
2097                 return -EINVAL;
2098         /* Migration mode query, and we are not doing a migration */
2099         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2100         if (!peek && !kvm->arch.migration_mode)
2101                 return -EINVAL;
2102         /* CMMA is disabled or was not used, or the buffer has length zero */
2103         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2104         if (!bufsize || !kvm->mm->context.uses_cmm) {
2105                 memset(args, 0, sizeof(*args));
2106                 return 0;
2107         }
2108         /* We are not peeking, and there are no dirty pages */
2109         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2110                 memset(args, 0, sizeof(*args));
2111                 return 0;
2112         }
2113
2114         values = vmalloc(bufsize);
2115         if (!values)
2116                 return -ENOMEM;
2117
2118         mmap_read_lock(kvm->mm);
2119         srcu_idx = srcu_read_lock(&kvm->srcu);
2120         if (peek)
2121                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2122         else
2123                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2124         srcu_read_unlock(&kvm->srcu, srcu_idx);
2125         mmap_read_unlock(kvm->mm);
2126
2127         if (kvm->arch.migration_mode)
2128                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2129         else
2130                 args->remaining = 0;
2131
2132         if (copy_to_user((void __user *)args->values, values, args->count))
2133                 ret = -EFAULT;
2134
2135         vfree(values);
2136         return ret;
2137 }
2138
2139 /*
2140  * This function sets the CMMA attributes for the given pages. If the input
2141  * buffer has zero length, no action is taken, otherwise the attributes are
2142  * set and the mm->context.uses_cmm flag is set.
2143  */
2144 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2145                                   const struct kvm_s390_cmma_log *args)
2146 {
2147         unsigned long hva, mask, pgstev, i;
2148         uint8_t *bits;
2149         int srcu_idx, r = 0;
2150
2151         mask = args->mask;
2152
2153         if (!kvm->arch.use_cmma)
2154                 return -ENXIO;
2155         /* invalid/unsupported flags */
2156         if (args->flags != 0)
2157                 return -EINVAL;
2158         /* Enforce sane limit on memory allocation */
2159         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2160                 return -EINVAL;
2161         /* Nothing to do */
2162         if (args->count == 0)
2163                 return 0;
2164
2165         bits = vmalloc(array_size(sizeof(*bits), args->count));
2166         if (!bits)
2167                 return -ENOMEM;
2168
2169         r = copy_from_user(bits, (void __user *)args->values, args->count);
2170         if (r) {
2171                 r = -EFAULT;
2172                 goto out;
2173         }
2174
2175         mmap_read_lock(kvm->mm);
2176         srcu_idx = srcu_read_lock(&kvm->srcu);
2177         for (i = 0; i < args->count; i++) {
2178                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2179                 if (kvm_is_error_hva(hva)) {
2180                         r = -EFAULT;
2181                         break;
2182                 }
2183
2184                 pgstev = bits[i];
2185                 pgstev = pgstev << 24;
2186                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2187                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2188         }
2189         srcu_read_unlock(&kvm->srcu, srcu_idx);
2190         mmap_read_unlock(kvm->mm);
2191
2192         if (!kvm->mm->context.uses_cmm) {
2193                 mmap_write_lock(kvm->mm);
2194                 kvm->mm->context.uses_cmm = 1;
2195                 mmap_write_unlock(kvm->mm);
2196         }
2197 out:
2198         vfree(bits);
2199         return r;
2200 }
2201
2202 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2203 {
2204         struct kvm_vcpu *vcpu;
2205         u16 rc, rrc;
2206         int ret = 0;
2207         int i;
2208
2209         /*
2210          * We ignore failures and try to destroy as many CPUs as possible.
2211          * At the same time we must not free the assigned resources when
2212          * this fails, as the ultravisor has still access to that memory.
2213          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2214          * behind.
2215          * We want to return the first failure rc and rrc, though.
2216          */
2217         kvm_for_each_vcpu(i, vcpu, kvm) {
2218                 mutex_lock(&vcpu->mutex);
2219                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2220                         *rcp = rc;
2221                         *rrcp = rrc;
2222                         ret = -EIO;
2223                 }
2224                 mutex_unlock(&vcpu->mutex);
2225         }
2226         return ret;
2227 }
2228
2229 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2230 {
2231         int i, r = 0;
2232         u16 dummy;
2233
2234         struct kvm_vcpu *vcpu;
2235
2236         kvm_for_each_vcpu(i, vcpu, kvm) {
2237                 mutex_lock(&vcpu->mutex);
2238                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2239                 mutex_unlock(&vcpu->mutex);
2240                 if (r)
2241                         break;
2242         }
2243         if (r)
2244                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2245         return r;
2246 }
2247
2248 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2249 {
2250         int r = 0;
2251         u16 dummy;
2252         void __user *argp = (void __user *)cmd->data;
2253
2254         switch (cmd->cmd) {
2255         case KVM_PV_ENABLE: {
2256                 r = -EINVAL;
2257                 if (kvm_s390_pv_is_protected(kvm))
2258                         break;
2259
2260                 /*
2261                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2262                  *  esca, we need no cleanup in the error cases below
2263                  */
2264                 r = sca_switch_to_extended(kvm);
2265                 if (r)
2266                         break;
2267
2268                 mmap_write_lock(current->mm);
2269                 r = gmap_mark_unmergeable();
2270                 mmap_write_unlock(current->mm);
2271                 if (r)
2272                         break;
2273
2274                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2275                 if (r)
2276                         break;
2277
2278                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2279                 if (r)
2280                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2281
2282                 /* we need to block service interrupts from now on */
2283                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2284                 break;
2285         }
2286         case KVM_PV_DISABLE: {
2287                 r = -EINVAL;
2288                 if (!kvm_s390_pv_is_protected(kvm))
2289                         break;
2290
2291                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2292                 /*
2293                  * If a CPU could not be destroyed, destroy VM will also fail.
2294                  * There is no point in trying to destroy it. Instead return
2295                  * the rc and rrc from the first CPU that failed destroying.
2296                  */
2297                 if (r)
2298                         break;
2299                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2300
2301                 /* no need to block service interrupts any more */
2302                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2303                 break;
2304         }
2305         case KVM_PV_SET_SEC_PARMS: {
2306                 struct kvm_s390_pv_sec_parm parms = {};
2307                 void *hdr;
2308
2309                 r = -EINVAL;
2310                 if (!kvm_s390_pv_is_protected(kvm))
2311                         break;
2312
2313                 r = -EFAULT;
2314                 if (copy_from_user(&parms, argp, sizeof(parms)))
2315                         break;
2316
2317                 /* Currently restricted to 8KB */
2318                 r = -EINVAL;
2319                 if (parms.length > PAGE_SIZE * 2)
2320                         break;
2321
2322                 r = -ENOMEM;
2323                 hdr = vmalloc(parms.length);
2324                 if (!hdr)
2325                         break;
2326
2327                 r = -EFAULT;
2328                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2329                                     parms.length))
2330                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2331                                                       &cmd->rc, &cmd->rrc);
2332
2333                 vfree(hdr);
2334                 break;
2335         }
2336         case KVM_PV_UNPACK: {
2337                 struct kvm_s390_pv_unp unp = {};
2338
2339                 r = -EINVAL;
2340                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2341                         break;
2342
2343                 r = -EFAULT;
2344                 if (copy_from_user(&unp, argp, sizeof(unp)))
2345                         break;
2346
2347                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2348                                        &cmd->rc, &cmd->rrc);
2349                 break;
2350         }
2351         case KVM_PV_VERIFY: {
2352                 r = -EINVAL;
2353                 if (!kvm_s390_pv_is_protected(kvm))
2354                         break;
2355
2356                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2357                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2358                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2359                              cmd->rrc);
2360                 break;
2361         }
2362         case KVM_PV_PREP_RESET: {
2363                 r = -EINVAL;
2364                 if (!kvm_s390_pv_is_protected(kvm))
2365                         break;
2366
2367                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2368                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2369                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2370                              cmd->rc, cmd->rrc);
2371                 break;
2372         }
2373         case KVM_PV_UNSHARE_ALL: {
2374                 r = -EINVAL;
2375                 if (!kvm_s390_pv_is_protected(kvm))
2376                         break;
2377
2378                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2379                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2380                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2381                              cmd->rc, cmd->rrc);
2382                 break;
2383         }
2384         default:
2385                 r = -ENOTTY;
2386         }
2387         return r;
2388 }
2389
2390 long kvm_arch_vm_ioctl(struct file *filp,
2391                        unsigned int ioctl, unsigned long arg)
2392 {
2393         struct kvm *kvm = filp->private_data;
2394         void __user *argp = (void __user *)arg;
2395         struct kvm_device_attr attr;
2396         int r;
2397
2398         switch (ioctl) {
2399         case KVM_S390_INTERRUPT: {
2400                 struct kvm_s390_interrupt s390int;
2401
2402                 r = -EFAULT;
2403                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2404                         break;
2405                 r = kvm_s390_inject_vm(kvm, &s390int);
2406                 break;
2407         }
2408         case KVM_CREATE_IRQCHIP: {
2409                 struct kvm_irq_routing_entry routing;
2410
2411                 r = -EINVAL;
2412                 if (kvm->arch.use_irqchip) {
2413                         /* Set up dummy routing. */
2414                         memset(&routing, 0, sizeof(routing));
2415                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2416                 }
2417                 break;
2418         }
2419         case KVM_SET_DEVICE_ATTR: {
2420                 r = -EFAULT;
2421                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2422                         break;
2423                 r = kvm_s390_vm_set_attr(kvm, &attr);
2424                 break;
2425         }
2426         case KVM_GET_DEVICE_ATTR: {
2427                 r = -EFAULT;
2428                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2429                         break;
2430                 r = kvm_s390_vm_get_attr(kvm, &attr);
2431                 break;
2432         }
2433         case KVM_HAS_DEVICE_ATTR: {
2434                 r = -EFAULT;
2435                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2436                         break;
2437                 r = kvm_s390_vm_has_attr(kvm, &attr);
2438                 break;
2439         }
2440         case KVM_S390_GET_SKEYS: {
2441                 struct kvm_s390_skeys args;
2442
2443                 r = -EFAULT;
2444                 if (copy_from_user(&args, argp,
2445                                    sizeof(struct kvm_s390_skeys)))
2446                         break;
2447                 r = kvm_s390_get_skeys(kvm, &args);
2448                 break;
2449         }
2450         case KVM_S390_SET_SKEYS: {
2451                 struct kvm_s390_skeys args;
2452
2453                 r = -EFAULT;
2454                 if (copy_from_user(&args, argp,
2455                                    sizeof(struct kvm_s390_skeys)))
2456                         break;
2457                 r = kvm_s390_set_skeys(kvm, &args);
2458                 break;
2459         }
2460         case KVM_S390_GET_CMMA_BITS: {
2461                 struct kvm_s390_cmma_log args;
2462
2463                 r = -EFAULT;
2464                 if (copy_from_user(&args, argp, sizeof(args)))
2465                         break;
2466                 mutex_lock(&kvm->slots_lock);
2467                 r = kvm_s390_get_cmma_bits(kvm, &args);
2468                 mutex_unlock(&kvm->slots_lock);
2469                 if (!r) {
2470                         r = copy_to_user(argp, &args, sizeof(args));
2471                         if (r)
2472                                 r = -EFAULT;
2473                 }
2474                 break;
2475         }
2476         case KVM_S390_SET_CMMA_BITS: {
2477                 struct kvm_s390_cmma_log args;
2478
2479                 r = -EFAULT;
2480                 if (copy_from_user(&args, argp, sizeof(args)))
2481                         break;
2482                 mutex_lock(&kvm->slots_lock);
2483                 r = kvm_s390_set_cmma_bits(kvm, &args);
2484                 mutex_unlock(&kvm->slots_lock);
2485                 break;
2486         }
2487         case KVM_S390_PV_COMMAND: {
2488                 struct kvm_pv_cmd args;
2489
2490                 /* protvirt means user sigp */
2491                 kvm->arch.user_cpu_state_ctrl = 1;
2492                 r = 0;
2493                 if (!is_prot_virt_host()) {
2494                         r = -EINVAL;
2495                         break;
2496                 }
2497                 if (copy_from_user(&args, argp, sizeof(args))) {
2498                         r = -EFAULT;
2499                         break;
2500                 }
2501                 if (args.flags) {
2502                         r = -EINVAL;
2503                         break;
2504                 }
2505                 mutex_lock(&kvm->lock);
2506                 r = kvm_s390_handle_pv(kvm, &args);
2507                 mutex_unlock(&kvm->lock);
2508                 if (copy_to_user(argp, &args, sizeof(args))) {
2509                         r = -EFAULT;
2510                         break;
2511                 }
2512                 break;
2513         }
2514         default:
2515                 r = -ENOTTY;
2516         }
2517
2518         return r;
2519 }
2520
2521 static int kvm_s390_apxa_installed(void)
2522 {
2523         struct ap_config_info info;
2524
2525         if (ap_instructions_available()) {
2526                 if (ap_qci(&info) == 0)
2527                         return info.apxa;
2528         }
2529
2530         return 0;
2531 }
2532
2533 /*
2534  * The format of the crypto control block (CRYCB) is specified in the 3 low
2535  * order bits of the CRYCB designation (CRYCBD) field as follows:
2536  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2537  *           AP extended addressing (APXA) facility are installed.
2538  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2539  * Format 2: Both the APXA and MSAX3 facilities are installed
2540  */
2541 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2542 {
2543         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2544
2545         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2546         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2547
2548         /* Check whether MSAX3 is installed */
2549         if (!test_kvm_facility(kvm, 76))
2550                 return;
2551
2552         if (kvm_s390_apxa_installed())
2553                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2554         else
2555                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2556 }
2557
2558 /*
2559  * kvm_arch_crypto_set_masks
2560  *
2561  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2562  *       to be set.
2563  * @apm: the mask identifying the accessible AP adapters
2564  * @aqm: the mask identifying the accessible AP domains
2565  * @adm: the mask identifying the accessible AP control domains
2566  *
2567  * Set the masks that identify the adapters, domains and control domains to
2568  * which the KVM guest is granted access.
2569  *
2570  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2571  *       function.
2572  */
2573 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2574                                unsigned long *aqm, unsigned long *adm)
2575 {
2576         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2577
2578         kvm_s390_vcpu_block_all(kvm);
2579
2580         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2581         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2582                 memcpy(crycb->apcb1.apm, apm, 32);
2583                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2584                          apm[0], apm[1], apm[2], apm[3]);
2585                 memcpy(crycb->apcb1.aqm, aqm, 32);
2586                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2587                          aqm[0], aqm[1], aqm[2], aqm[3]);
2588                 memcpy(crycb->apcb1.adm, adm, 32);
2589                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2590                          adm[0], adm[1], adm[2], adm[3]);
2591                 break;
2592         case CRYCB_FORMAT1:
2593         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2594                 memcpy(crycb->apcb0.apm, apm, 8);
2595                 memcpy(crycb->apcb0.aqm, aqm, 2);
2596                 memcpy(crycb->apcb0.adm, adm, 2);
2597                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2598                          apm[0], *((unsigned short *)aqm),
2599                          *((unsigned short *)adm));
2600                 break;
2601         default:        /* Can not happen */
2602                 break;
2603         }
2604
2605         /* recreate the shadow crycb for each vcpu */
2606         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607         kvm_s390_vcpu_unblock_all(kvm);
2608 }
2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2610
2611 /*
2612  * kvm_arch_crypto_clear_masks
2613  *
2614  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2615  *       to be cleared.
2616  *
2617  * Clear the masks that identify the adapters, domains and control domains to
2618  * which the KVM guest is granted access.
2619  *
2620  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2621  *       function.
2622  */
2623 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2624 {
2625         kvm_s390_vcpu_block_all(kvm);
2626
2627         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2628                sizeof(kvm->arch.crypto.crycb->apcb0));
2629         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2630                sizeof(kvm->arch.crypto.crycb->apcb1));
2631
2632         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2633         /* recreate the shadow crycb for each vcpu */
2634         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2635         kvm_s390_vcpu_unblock_all(kvm);
2636 }
2637 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2638
2639 static u64 kvm_s390_get_initial_cpuid(void)
2640 {
2641         struct cpuid cpuid;
2642
2643         get_cpu_id(&cpuid);
2644         cpuid.version = 0xff;
2645         return *((u64 *) &cpuid);
2646 }
2647
2648 static void kvm_s390_crypto_init(struct kvm *kvm)
2649 {
2650         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2651         kvm_s390_set_crycb_format(kvm);
2652         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2653
2654         if (!test_kvm_facility(kvm, 76))
2655                 return;
2656
2657         /* Enable AES/DEA protected key functions by default */
2658         kvm->arch.crypto.aes_kw = 1;
2659         kvm->arch.crypto.dea_kw = 1;
2660         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2661                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2662         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2663                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2664 }
2665
2666 static void sca_dispose(struct kvm *kvm)
2667 {
2668         if (kvm->arch.use_esca)
2669                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2670         else
2671                 free_page((unsigned long)(kvm->arch.sca));
2672         kvm->arch.sca = NULL;
2673 }
2674
2675 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2676 {
2677         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2678         int i, rc;
2679         char debug_name[16];
2680         static unsigned long sca_offset;
2681
2682         rc = -EINVAL;
2683 #ifdef CONFIG_KVM_S390_UCONTROL
2684         if (type & ~KVM_VM_S390_UCONTROL)
2685                 goto out_err;
2686         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2687                 goto out_err;
2688 #else
2689         if (type)
2690                 goto out_err;
2691 #endif
2692
2693         rc = s390_enable_sie();
2694         if (rc)
2695                 goto out_err;
2696
2697         rc = -ENOMEM;
2698
2699         if (!sclp.has_64bscao)
2700                 alloc_flags |= GFP_DMA;
2701         rwlock_init(&kvm->arch.sca_lock);
2702         /* start with basic SCA */
2703         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2704         if (!kvm->arch.sca)
2705                 goto out_err;
2706         mutex_lock(&kvm_lock);
2707         sca_offset += 16;
2708         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2709                 sca_offset = 0;
2710         kvm->arch.sca = (struct bsca_block *)
2711                         ((char *) kvm->arch.sca + sca_offset);
2712         mutex_unlock(&kvm_lock);
2713
2714         sprintf(debug_name, "kvm-%u", current->pid);
2715
2716         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2717         if (!kvm->arch.dbf)
2718                 goto out_err;
2719
2720         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2721         kvm->arch.sie_page2 =
2722              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2723         if (!kvm->arch.sie_page2)
2724                 goto out_err;
2725
2726         kvm->arch.sie_page2->kvm = kvm;
2727         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2728
2729         for (i = 0; i < kvm_s390_fac_size(); i++) {
2730                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2731                                               (kvm_s390_fac_base[i] |
2732                                                kvm_s390_fac_ext[i]);
2733                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2734                                               kvm_s390_fac_base[i];
2735         }
2736         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2737
2738         /* we are always in czam mode - even on pre z14 machines */
2739         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2740         set_kvm_facility(kvm->arch.model.fac_list, 138);
2741         /* we emulate STHYI in kvm */
2742         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2743         set_kvm_facility(kvm->arch.model.fac_list, 74);
2744         if (MACHINE_HAS_TLB_GUEST) {
2745                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2746                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2747         }
2748
2749         if (css_general_characteristics.aiv && test_facility(65))
2750                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2751
2752         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2753         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2754
2755         kvm_s390_crypto_init(kvm);
2756
2757         mutex_init(&kvm->arch.float_int.ais_lock);
2758         spin_lock_init(&kvm->arch.float_int.lock);
2759         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2760                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2761         init_waitqueue_head(&kvm->arch.ipte_wq);
2762         mutex_init(&kvm->arch.ipte_mutex);
2763
2764         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2765         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2766
2767         if (type & KVM_VM_S390_UCONTROL) {
2768                 kvm->arch.gmap = NULL;
2769                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2770         } else {
2771                 if (sclp.hamax == U64_MAX)
2772                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2773                 else
2774                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2775                                                     sclp.hamax + 1);
2776                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2777                 if (!kvm->arch.gmap)
2778                         goto out_err;
2779                 kvm->arch.gmap->private = kvm;
2780                 kvm->arch.gmap->pfault_enabled = 0;
2781         }
2782
2783         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2784         kvm->arch.use_skf = sclp.has_skey;
2785         spin_lock_init(&kvm->arch.start_stop_lock);
2786         kvm_s390_vsie_init(kvm);
2787         if (use_gisa)
2788                 kvm_s390_gisa_init(kvm);
2789         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2790
2791         return 0;
2792 out_err:
2793         free_page((unsigned long)kvm->arch.sie_page2);
2794         debug_unregister(kvm->arch.dbf);
2795         sca_dispose(kvm);
2796         KVM_EVENT(3, "creation of vm failed: %d", rc);
2797         return rc;
2798 }
2799
2800 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2801 {
2802         u16 rc, rrc;
2803
2804         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2805         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2806         kvm_s390_clear_local_irqs(vcpu);
2807         kvm_clear_async_pf_completion_queue(vcpu);
2808         if (!kvm_is_ucontrol(vcpu->kvm))
2809                 sca_del_vcpu(vcpu);
2810
2811         if (kvm_is_ucontrol(vcpu->kvm))
2812                 gmap_remove(vcpu->arch.gmap);
2813
2814         if (vcpu->kvm->arch.use_cmma)
2815                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2816         /* We can not hold the vcpu mutex here, we are already dying */
2817         if (kvm_s390_pv_cpu_get_handle(vcpu))
2818                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2819         free_page((unsigned long)(vcpu->arch.sie_block));
2820 }
2821
2822 static void kvm_free_vcpus(struct kvm *kvm)
2823 {
2824         unsigned int i;
2825         struct kvm_vcpu *vcpu;
2826
2827         kvm_for_each_vcpu(i, vcpu, kvm)
2828                 kvm_vcpu_destroy(vcpu);
2829
2830         mutex_lock(&kvm->lock);
2831         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2832                 kvm->vcpus[i] = NULL;
2833
2834         atomic_set(&kvm->online_vcpus, 0);
2835         mutex_unlock(&kvm->lock);
2836 }
2837
2838 void kvm_arch_destroy_vm(struct kvm *kvm)
2839 {
2840         u16 rc, rrc;
2841
2842         kvm_free_vcpus(kvm);
2843         sca_dispose(kvm);
2844         kvm_s390_gisa_destroy(kvm);
2845         /*
2846          * We are already at the end of life and kvm->lock is not taken.
2847          * This is ok as the file descriptor is closed by now and nobody
2848          * can mess with the pv state. To avoid lockdep_assert_held from
2849          * complaining we do not use kvm_s390_pv_is_protected.
2850          */
2851         if (kvm_s390_pv_get_handle(kvm))
2852                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2853         debug_unregister(kvm->arch.dbf);
2854         free_page((unsigned long)kvm->arch.sie_page2);
2855         if (!kvm_is_ucontrol(kvm))
2856                 gmap_remove(kvm->arch.gmap);
2857         kvm_s390_destroy_adapters(kvm);
2858         kvm_s390_clear_float_irqs(kvm);
2859         kvm_s390_vsie_destroy(kvm);
2860         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2861 }
2862
2863 /* Section: vcpu related */
2864 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2865 {
2866         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2867         if (!vcpu->arch.gmap)
2868                 return -ENOMEM;
2869         vcpu->arch.gmap->private = vcpu->kvm;
2870
2871         return 0;
2872 }
2873
2874 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2875 {
2876         if (!kvm_s390_use_sca_entries())
2877                 return;
2878         read_lock(&vcpu->kvm->arch.sca_lock);
2879         if (vcpu->kvm->arch.use_esca) {
2880                 struct esca_block *sca = vcpu->kvm->arch.sca;
2881
2882                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2883                 sca->cpu[vcpu->vcpu_id].sda = 0;
2884         } else {
2885                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2886
2887                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2888                 sca->cpu[vcpu->vcpu_id].sda = 0;
2889         }
2890         read_unlock(&vcpu->kvm->arch.sca_lock);
2891 }
2892
2893 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2894 {
2895         if (!kvm_s390_use_sca_entries()) {
2896                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2897
2898                 /* we still need the basic sca for the ipte control */
2899                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2900                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2901                 return;
2902         }
2903         read_lock(&vcpu->kvm->arch.sca_lock);
2904         if (vcpu->kvm->arch.use_esca) {
2905                 struct esca_block *sca = vcpu->kvm->arch.sca;
2906
2907                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2908                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2909                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2910                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2911                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2912         } else {
2913                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2914
2915                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2916                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2917                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2918                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2919         }
2920         read_unlock(&vcpu->kvm->arch.sca_lock);
2921 }
2922
2923 /* Basic SCA to Extended SCA data copy routines */
2924 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2925 {
2926         d->sda = s->sda;
2927         d->sigp_ctrl.c = s->sigp_ctrl.c;
2928         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2929 }
2930
2931 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2932 {
2933         int i;
2934
2935         d->ipte_control = s->ipte_control;
2936         d->mcn[0] = s->mcn;
2937         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2938                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2939 }
2940
2941 static int sca_switch_to_extended(struct kvm *kvm)
2942 {
2943         struct bsca_block *old_sca = kvm->arch.sca;
2944         struct esca_block *new_sca;
2945         struct kvm_vcpu *vcpu;
2946         unsigned int vcpu_idx;
2947         u32 scaol, scaoh;
2948
2949         if (kvm->arch.use_esca)
2950                 return 0;
2951
2952         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2953         if (!new_sca)
2954                 return -ENOMEM;
2955
2956         scaoh = (u32)((u64)(new_sca) >> 32);
2957         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2958
2959         kvm_s390_vcpu_block_all(kvm);
2960         write_lock(&kvm->arch.sca_lock);
2961
2962         sca_copy_b_to_e(new_sca, old_sca);
2963
2964         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2965                 vcpu->arch.sie_block->scaoh = scaoh;
2966                 vcpu->arch.sie_block->scaol = scaol;
2967                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2968         }
2969         kvm->arch.sca = new_sca;
2970         kvm->arch.use_esca = 1;
2971
2972         write_unlock(&kvm->arch.sca_lock);
2973         kvm_s390_vcpu_unblock_all(kvm);
2974
2975         free_page((unsigned long)old_sca);
2976
2977         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2978                  old_sca, kvm->arch.sca);
2979         return 0;
2980 }
2981
2982 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2983 {
2984         int rc;
2985
2986         if (!kvm_s390_use_sca_entries()) {
2987                 if (id < KVM_MAX_VCPUS)
2988                         return true;
2989                 return false;
2990         }
2991         if (id < KVM_S390_BSCA_CPU_SLOTS)
2992                 return true;
2993         if (!sclp.has_esca || !sclp.has_64bscao)
2994                 return false;
2995
2996         mutex_lock(&kvm->lock);
2997         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2998         mutex_unlock(&kvm->lock);
2999
3000         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3001 }
3002
3003 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3004 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005 {
3006         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3007         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008         vcpu->arch.cputm_start = get_tod_clock_fast();
3009         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3010 }
3011
3012 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3013 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3014 {
3015         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3016         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3017         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3018         vcpu->arch.cputm_start = 0;
3019         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3020 }
3021
3022 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3023 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3024 {
3025         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3026         vcpu->arch.cputm_enabled = true;
3027         __start_cpu_timer_accounting(vcpu);
3028 }
3029
3030 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3031 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3032 {
3033         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3034         __stop_cpu_timer_accounting(vcpu);
3035         vcpu->arch.cputm_enabled = false;
3036 }
3037
3038 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3039 {
3040         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3041         __enable_cpu_timer_accounting(vcpu);
3042         preempt_enable();
3043 }
3044
3045 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3046 {
3047         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3048         __disable_cpu_timer_accounting(vcpu);
3049         preempt_enable();
3050 }
3051
3052 /* set the cpu timer - may only be called from the VCPU thread itself */
3053 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3054 {
3055         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3056         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057         if (vcpu->arch.cputm_enabled)
3058                 vcpu->arch.cputm_start = get_tod_clock_fast();
3059         vcpu->arch.sie_block->cputm = cputm;
3060         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3061         preempt_enable();
3062 }
3063
3064 /* update and get the cpu timer - can also be called from other VCPU threads */
3065 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3066 {
3067         unsigned int seq;
3068         __u64 value;
3069
3070         if (unlikely(!vcpu->arch.cputm_enabled))
3071                 return vcpu->arch.sie_block->cputm;
3072
3073         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3074         do {
3075                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3076                 /*
3077                  * If the writer would ever execute a read in the critical
3078                  * section, e.g. in irq context, we have a deadlock.
3079                  */
3080                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3081                 value = vcpu->arch.sie_block->cputm;
3082                 /* if cputm_start is 0, accounting is being started/stopped */
3083                 if (likely(vcpu->arch.cputm_start))
3084                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3085         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3086         preempt_enable();
3087         return value;
3088 }
3089
3090 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3091 {
3092
3093         gmap_enable(vcpu->arch.enabled_gmap);
3094         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3095         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3096                 __start_cpu_timer_accounting(vcpu);
3097         vcpu->cpu = cpu;
3098 }
3099
3100 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3101 {
3102         vcpu->cpu = -1;
3103         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3104                 __stop_cpu_timer_accounting(vcpu);
3105         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3106         vcpu->arch.enabled_gmap = gmap_get_enabled();
3107         gmap_disable(vcpu->arch.enabled_gmap);
3108
3109 }
3110
3111 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3112 {
3113         mutex_lock(&vcpu->kvm->lock);
3114         preempt_disable();
3115         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3116         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3117         preempt_enable();
3118         mutex_unlock(&vcpu->kvm->lock);
3119         if (!kvm_is_ucontrol(vcpu->kvm)) {
3120                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3121                 sca_add_vcpu(vcpu);
3122         }
3123         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3124                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3125         /* make vcpu_load load the right gmap on the first trigger */
3126         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3127 }
3128
3129 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3130 {
3131         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3132             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3133                 return true;
3134         return false;
3135 }
3136
3137 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3138 {
3139         /* At least one ECC subfunction must be present */
3140         return kvm_has_pckmo_subfunc(kvm, 32) ||
3141                kvm_has_pckmo_subfunc(kvm, 33) ||
3142                kvm_has_pckmo_subfunc(kvm, 34) ||
3143                kvm_has_pckmo_subfunc(kvm, 40) ||
3144                kvm_has_pckmo_subfunc(kvm, 41);
3145
3146 }
3147
3148 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3149 {
3150         /*
3151          * If the AP instructions are not being interpreted and the MSAX3
3152          * facility is not configured for the guest, there is nothing to set up.
3153          */
3154         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3155                 return;
3156
3157         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3158         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3159         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3160         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3161
3162         if (vcpu->kvm->arch.crypto.apie)
3163                 vcpu->arch.sie_block->eca |= ECA_APIE;
3164
3165         /* Set up protected key support */
3166         if (vcpu->kvm->arch.crypto.aes_kw) {
3167                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3168                 /* ecc is also wrapped with AES key */
3169                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3170                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3171         }
3172
3173         if (vcpu->kvm->arch.crypto.dea_kw)
3174                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3175 }
3176
3177 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3178 {
3179         free_page(vcpu->arch.sie_block->cbrlo);
3180         vcpu->arch.sie_block->cbrlo = 0;
3181 }
3182
3183 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3184 {
3185         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3186         if (!vcpu->arch.sie_block->cbrlo)
3187                 return -ENOMEM;
3188         return 0;
3189 }
3190
3191 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3192 {
3193         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3194
3195         vcpu->arch.sie_block->ibc = model->ibc;
3196         if (test_kvm_facility(vcpu->kvm, 7))
3197                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3198 }
3199
3200 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3201 {
3202         int rc = 0;
3203         u16 uvrc, uvrrc;
3204
3205         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3206                                                     CPUSTAT_SM |
3207                                                     CPUSTAT_STOPPED);
3208
3209         if (test_kvm_facility(vcpu->kvm, 78))
3210                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3211         else if (test_kvm_facility(vcpu->kvm, 8))
3212                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3213
3214         kvm_s390_vcpu_setup_model(vcpu);
3215
3216         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3217         if (MACHINE_HAS_ESOP)
3218                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3219         if (test_kvm_facility(vcpu->kvm, 9))
3220                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3221         if (test_kvm_facility(vcpu->kvm, 73))
3222                 vcpu->arch.sie_block->ecb |= ECB_TE;
3223         if (!kvm_is_ucontrol(vcpu->kvm))
3224                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3225
3226         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3227                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3228         if (test_kvm_facility(vcpu->kvm, 130))
3229                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3230         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3231         if (sclp.has_cei)
3232                 vcpu->arch.sie_block->eca |= ECA_CEI;
3233         if (sclp.has_ib)
3234                 vcpu->arch.sie_block->eca |= ECA_IB;
3235         if (sclp.has_siif)
3236                 vcpu->arch.sie_block->eca |= ECA_SII;
3237         if (sclp.has_sigpif)
3238                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3239         if (test_kvm_facility(vcpu->kvm, 129)) {
3240                 vcpu->arch.sie_block->eca |= ECA_VX;
3241                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3242         }
3243         if (test_kvm_facility(vcpu->kvm, 139))
3244                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3245         if (test_kvm_facility(vcpu->kvm, 156))
3246                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3247         if (vcpu->arch.sie_block->gd) {
3248                 vcpu->arch.sie_block->eca |= ECA_AIV;
3249                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3250                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3251         }
3252         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3253                                         | SDNXC;
3254         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3255
3256         if (sclp.has_kss)
3257                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3258         else
3259                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3260
3261         if (vcpu->kvm->arch.use_cmma) {
3262                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3263                 if (rc)
3264                         return rc;
3265         }
3266         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3267         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3268
3269         vcpu->arch.sie_block->hpid = HPID_KVM;
3270
3271         kvm_s390_vcpu_crypto_setup(vcpu);
3272
3273         mutex_lock(&vcpu->kvm->lock);
3274         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3275                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3276                 if (rc)
3277                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3278         }
3279         mutex_unlock(&vcpu->kvm->lock);
3280
3281         return rc;
3282 }
3283
3284 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3285 {
3286         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3287                 return -EINVAL;
3288         return 0;
3289 }
3290
3291 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3292 {
3293         struct sie_page *sie_page;
3294         int rc;
3295
3296         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3297         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3298         if (!sie_page)
3299                 return -ENOMEM;
3300
3301         vcpu->arch.sie_block = &sie_page->sie_block;
3302         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3303
3304         /* the real guest size will always be smaller than msl */
3305         vcpu->arch.sie_block->mso = 0;
3306         vcpu->arch.sie_block->msl = sclp.hamax;
3307
3308         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3309         spin_lock_init(&vcpu->arch.local_int.lock);
3310         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3311         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3312                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3313         seqcount_init(&vcpu->arch.cputm_seqcount);
3314
3315         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3316         kvm_clear_async_pf_completion_queue(vcpu);
3317         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3318                                     KVM_SYNC_GPRS |
3319                                     KVM_SYNC_ACRS |
3320                                     KVM_SYNC_CRS |
3321                                     KVM_SYNC_ARCH0 |
3322                                     KVM_SYNC_PFAULT |
3323                                     KVM_SYNC_DIAG318;
3324         kvm_s390_set_prefix(vcpu, 0);
3325         if (test_kvm_facility(vcpu->kvm, 64))
3326                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3327         if (test_kvm_facility(vcpu->kvm, 82))
3328                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3329         if (test_kvm_facility(vcpu->kvm, 133))
3330                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3331         if (test_kvm_facility(vcpu->kvm, 156))
3332                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3333         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3334          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3335          */
3336         if (MACHINE_HAS_VX)
3337                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3338         else
3339                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3340
3341         if (kvm_is_ucontrol(vcpu->kvm)) {
3342                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3343                 if (rc)
3344                         goto out_free_sie_block;
3345         }
3346
3347         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3348                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3349         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3350
3351         rc = kvm_s390_vcpu_setup(vcpu);
3352         if (rc)
3353                 goto out_ucontrol_uninit;
3354         return 0;
3355
3356 out_ucontrol_uninit:
3357         if (kvm_is_ucontrol(vcpu->kvm))
3358                 gmap_remove(vcpu->arch.gmap);
3359 out_free_sie_block:
3360         free_page((unsigned long)(vcpu->arch.sie_block));
3361         return rc;
3362 }
3363
3364 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3365 {
3366         return kvm_s390_vcpu_has_irq(vcpu, 0);
3367 }
3368
3369 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3370 {
3371         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3372 }
3373
3374 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3375 {
3376         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3377         exit_sie(vcpu);
3378 }
3379
3380 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3381 {
3382         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3383 }
3384
3385 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3386 {
3387         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3388         exit_sie(vcpu);
3389 }
3390
3391 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3392 {
3393         return atomic_read(&vcpu->arch.sie_block->prog20) &
3394                (PROG_BLOCK_SIE | PROG_REQUEST);
3395 }
3396
3397 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3398 {
3399         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3400 }
3401
3402 /*
3403  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3404  * If the CPU is not running (e.g. waiting as idle) the function will
3405  * return immediately. */
3406 void exit_sie(struct kvm_vcpu *vcpu)
3407 {
3408         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3409         kvm_s390_vsie_kick(vcpu);
3410         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3411                 cpu_relax();
3412 }
3413
3414 /* Kick a guest cpu out of SIE to process a request synchronously */
3415 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3416 {
3417         kvm_make_request(req, vcpu);
3418         kvm_s390_vcpu_request(vcpu);
3419 }
3420
3421 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3422                               unsigned long end)
3423 {
3424         struct kvm *kvm = gmap->private;
3425         struct kvm_vcpu *vcpu;
3426         unsigned long prefix;
3427         int i;
3428
3429         if (gmap_is_shadow(gmap))
3430                 return;
3431         if (start >= 1UL << 31)
3432                 /* We are only interested in prefix pages */
3433                 return;
3434         kvm_for_each_vcpu(i, vcpu, kvm) {
3435                 /* match against both prefix pages */
3436                 prefix = kvm_s390_get_prefix(vcpu);
3437                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3438                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3439                                    start, end);
3440                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3441                 }
3442         }
3443 }
3444
3445 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3446 {
3447         /* do not poll with more than halt_poll_max_steal percent of steal time */
3448         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3449             halt_poll_max_steal) {
3450                 vcpu->stat.halt_no_poll_steal++;
3451                 return true;
3452         }
3453         return false;
3454 }
3455
3456 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3457 {
3458         /* kvm common code refers to this, but never calls it */
3459         BUG();
3460         return 0;
3461 }
3462
3463 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3464                                            struct kvm_one_reg *reg)
3465 {
3466         int r = -EINVAL;
3467
3468         switch (reg->id) {
3469         case KVM_REG_S390_TODPR:
3470                 r = put_user(vcpu->arch.sie_block->todpr,
3471                              (u32 __user *)reg->addr);
3472                 break;
3473         case KVM_REG_S390_EPOCHDIFF:
3474                 r = put_user(vcpu->arch.sie_block->epoch,
3475                              (u64 __user *)reg->addr);
3476                 break;
3477         case KVM_REG_S390_CPU_TIMER:
3478                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3479                              (u64 __user *)reg->addr);
3480                 break;
3481         case KVM_REG_S390_CLOCK_COMP:
3482                 r = put_user(vcpu->arch.sie_block->ckc,
3483                              (u64 __user *)reg->addr);
3484                 break;
3485         case KVM_REG_S390_PFTOKEN:
3486                 r = put_user(vcpu->arch.pfault_token,
3487                              (u64 __user *)reg->addr);
3488                 break;
3489         case KVM_REG_S390_PFCOMPARE:
3490                 r = put_user(vcpu->arch.pfault_compare,
3491                              (u64 __user *)reg->addr);
3492                 break;
3493         case KVM_REG_S390_PFSELECT:
3494                 r = put_user(vcpu->arch.pfault_select,
3495                              (u64 __user *)reg->addr);
3496                 break;
3497         case KVM_REG_S390_PP:
3498                 r = put_user(vcpu->arch.sie_block->pp,
3499                              (u64 __user *)reg->addr);
3500                 break;
3501         case KVM_REG_S390_GBEA:
3502                 r = put_user(vcpu->arch.sie_block->gbea,
3503                              (u64 __user *)reg->addr);
3504                 break;
3505         default:
3506                 break;
3507         }
3508
3509         return r;
3510 }
3511
3512 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3513                                            struct kvm_one_reg *reg)
3514 {
3515         int r = -EINVAL;
3516         __u64 val;
3517
3518         switch (reg->id) {
3519         case KVM_REG_S390_TODPR:
3520                 r = get_user(vcpu->arch.sie_block->todpr,
3521                              (u32 __user *)reg->addr);
3522                 break;
3523         case KVM_REG_S390_EPOCHDIFF:
3524                 r = get_user(vcpu->arch.sie_block->epoch,
3525                              (u64 __user *)reg->addr);
3526                 break;
3527         case KVM_REG_S390_CPU_TIMER:
3528                 r = get_user(val, (u64 __user *)reg->addr);
3529                 if (!r)
3530                         kvm_s390_set_cpu_timer(vcpu, val);
3531                 break;
3532         case KVM_REG_S390_CLOCK_COMP:
3533                 r = get_user(vcpu->arch.sie_block->ckc,
3534                              (u64 __user *)reg->addr);
3535                 break;
3536         case KVM_REG_S390_PFTOKEN:
3537                 r = get_user(vcpu->arch.pfault_token,
3538                              (u64 __user *)reg->addr);
3539                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3540                         kvm_clear_async_pf_completion_queue(vcpu);
3541                 break;
3542         case KVM_REG_S390_PFCOMPARE:
3543                 r = get_user(vcpu->arch.pfault_compare,
3544                              (u64 __user *)reg->addr);
3545                 break;
3546         case KVM_REG_S390_PFSELECT:
3547                 r = get_user(vcpu->arch.pfault_select,
3548                              (u64 __user *)reg->addr);
3549                 break;
3550         case KVM_REG_S390_PP:
3551                 r = get_user(vcpu->arch.sie_block->pp,
3552                              (u64 __user *)reg->addr);
3553                 break;
3554         case KVM_REG_S390_GBEA:
3555                 r = get_user(vcpu->arch.sie_block->gbea,
3556                              (u64 __user *)reg->addr);
3557                 break;
3558         default:
3559                 break;
3560         }
3561
3562         return r;
3563 }
3564
3565 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3566 {
3567         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3568         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3569         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3570
3571         kvm_clear_async_pf_completion_queue(vcpu);
3572         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3573                 kvm_s390_vcpu_stop(vcpu);
3574         kvm_s390_clear_local_irqs(vcpu);
3575 }
3576
3577 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3578 {
3579         /* Initial reset is a superset of the normal reset */
3580         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3581
3582         /*
3583          * This equals initial cpu reset in pop, but we don't switch to ESA.
3584          * We do not only reset the internal data, but also ...
3585          */
3586         vcpu->arch.sie_block->gpsw.mask = 0;
3587         vcpu->arch.sie_block->gpsw.addr = 0;
3588         kvm_s390_set_prefix(vcpu, 0);
3589         kvm_s390_set_cpu_timer(vcpu, 0);
3590         vcpu->arch.sie_block->ckc = 0;
3591         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3592         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3593         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3594
3595         /* ... the data in sync regs */
3596         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3597         vcpu->run->s.regs.ckc = 0;
3598         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3599         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3600         vcpu->run->psw_addr = 0;
3601         vcpu->run->psw_mask = 0;
3602         vcpu->run->s.regs.todpr = 0;
3603         vcpu->run->s.regs.cputm = 0;
3604         vcpu->run->s.regs.ckc = 0;
3605         vcpu->run->s.regs.pp = 0;
3606         vcpu->run->s.regs.gbea = 1;
3607         vcpu->run->s.regs.fpc = 0;
3608         /*
3609          * Do not reset these registers in the protected case, as some of
3610          * them are overlayed and they are not accessible in this case
3611          * anyway.
3612          */
3613         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3614                 vcpu->arch.sie_block->gbea = 1;
3615                 vcpu->arch.sie_block->pp = 0;
3616                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3617                 vcpu->arch.sie_block->todpr = 0;
3618         }
3619 }
3620
3621 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3622 {
3623         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3624
3625         /* Clear reset is a superset of the initial reset */
3626         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3627
3628         memset(&regs->gprs, 0, sizeof(regs->gprs));
3629         memset(&regs->vrs, 0, sizeof(regs->vrs));
3630         memset(&regs->acrs, 0, sizeof(regs->acrs));
3631         memset(&regs->gscb, 0, sizeof(regs->gscb));
3632
3633         regs->etoken = 0;
3634         regs->etoken_extension = 0;
3635 }
3636
3637 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3638 {
3639         vcpu_load(vcpu);
3640         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3641         vcpu_put(vcpu);
3642         return 0;
3643 }
3644
3645 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3646 {
3647         vcpu_load(vcpu);
3648         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3649         vcpu_put(vcpu);
3650         return 0;
3651 }
3652
3653 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3654                                   struct kvm_sregs *sregs)
3655 {
3656         vcpu_load(vcpu);
3657
3658         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3659         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3660
3661         vcpu_put(vcpu);
3662         return 0;
3663 }
3664
3665 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3666                                   struct kvm_sregs *sregs)
3667 {
3668         vcpu_load(vcpu);
3669
3670         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3671         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3672
3673         vcpu_put(vcpu);
3674         return 0;
3675 }
3676
3677 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3678 {
3679         int ret = 0;
3680
3681         vcpu_load(vcpu);
3682
3683         if (test_fp_ctl(fpu->fpc)) {
3684                 ret = -EINVAL;
3685                 goto out;
3686         }
3687         vcpu->run->s.regs.fpc = fpu->fpc;
3688         if (MACHINE_HAS_VX)
3689                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3690                                  (freg_t *) fpu->fprs);
3691         else
3692                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3693
3694 out:
3695         vcpu_put(vcpu);
3696         return ret;
3697 }
3698
3699 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3700 {
3701         vcpu_load(vcpu);
3702
3703         /* make sure we have the latest values */
3704         save_fpu_regs();
3705         if (MACHINE_HAS_VX)
3706                 convert_vx_to_fp((freg_t *) fpu->fprs,
3707                                  (__vector128 *) vcpu->run->s.regs.vrs);
3708         else
3709                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3710         fpu->fpc = vcpu->run->s.regs.fpc;
3711
3712         vcpu_put(vcpu);
3713         return 0;
3714 }
3715
3716 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3717 {
3718         int rc = 0;
3719
3720         if (!is_vcpu_stopped(vcpu))
3721                 rc = -EBUSY;
3722         else {
3723                 vcpu->run->psw_mask = psw.mask;
3724                 vcpu->run->psw_addr = psw.addr;
3725         }
3726         return rc;
3727 }
3728
3729 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3730                                   struct kvm_translation *tr)
3731 {
3732         return -EINVAL; /* not implemented yet */
3733 }
3734
3735 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3736                               KVM_GUESTDBG_USE_HW_BP | \
3737                               KVM_GUESTDBG_ENABLE)
3738
3739 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3740                                         struct kvm_guest_debug *dbg)
3741 {
3742         int rc = 0;
3743
3744         vcpu_load(vcpu);
3745
3746         vcpu->guest_debug = 0;
3747         kvm_s390_clear_bp_data(vcpu);
3748
3749         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3750                 rc = -EINVAL;
3751                 goto out;
3752         }
3753         if (!sclp.has_gpere) {
3754                 rc = -EINVAL;
3755                 goto out;
3756         }
3757
3758         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3759                 vcpu->guest_debug = dbg->control;
3760                 /* enforce guest PER */
3761                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3762
3763                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3764                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3765         } else {
3766                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3767                 vcpu->arch.guestdbg.last_bp = 0;
3768         }
3769
3770         if (rc) {
3771                 vcpu->guest_debug = 0;
3772                 kvm_s390_clear_bp_data(vcpu);
3773                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3774         }
3775
3776 out:
3777         vcpu_put(vcpu);
3778         return rc;
3779 }
3780
3781 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3782                                     struct kvm_mp_state *mp_state)
3783 {
3784         int ret;
3785
3786         vcpu_load(vcpu);
3787
3788         /* CHECK_STOP and LOAD are not supported yet */
3789         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3790                                       KVM_MP_STATE_OPERATING;
3791
3792         vcpu_put(vcpu);
3793         return ret;
3794 }
3795
3796 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3797                                     struct kvm_mp_state *mp_state)
3798 {
3799         int rc = 0;
3800
3801         vcpu_load(vcpu);
3802
3803         /* user space knows about this interface - let it control the state */
3804         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3805
3806         switch (mp_state->mp_state) {
3807         case KVM_MP_STATE_STOPPED:
3808                 rc = kvm_s390_vcpu_stop(vcpu);
3809                 break;
3810         case KVM_MP_STATE_OPERATING:
3811                 rc = kvm_s390_vcpu_start(vcpu);
3812                 break;
3813         case KVM_MP_STATE_LOAD:
3814                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3815                         rc = -ENXIO;
3816                         break;
3817                 }
3818                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3819                 break;
3820         case KVM_MP_STATE_CHECK_STOP:
3821                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3822         default:
3823                 rc = -ENXIO;
3824         }
3825
3826         vcpu_put(vcpu);
3827         return rc;
3828 }
3829
3830 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3831 {
3832         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3833 }
3834
3835 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3836 {
3837 retry:
3838         kvm_s390_vcpu_request_handled(vcpu);
3839         if (!kvm_request_pending(vcpu))
3840                 return 0;
3841         /*
3842          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3843          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3844          * This ensures that the ipte instruction for this request has
3845          * already finished. We might race against a second unmapper that
3846          * wants to set the blocking bit. Lets just retry the request loop.
3847          */
3848         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3849                 int rc;
3850                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3851                                           kvm_s390_get_prefix(vcpu),
3852                                           PAGE_SIZE * 2, PROT_WRITE);
3853                 if (rc) {
3854                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3855                         return rc;
3856                 }
3857                 goto retry;
3858         }
3859
3860         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3861                 vcpu->arch.sie_block->ihcpu = 0xffff;
3862                 goto retry;
3863         }
3864
3865         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3866                 if (!ibs_enabled(vcpu)) {
3867                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3868                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3869                 }
3870                 goto retry;
3871         }
3872
3873         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3874                 if (ibs_enabled(vcpu)) {
3875                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3876                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3877                 }
3878                 goto retry;
3879         }
3880
3881         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3882                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3883                 goto retry;
3884         }
3885
3886         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3887                 /*
3888                  * Disable CMM virtualization; we will emulate the ESSA
3889                  * instruction manually, in order to provide additional
3890                  * functionalities needed for live migration.
3891                  */
3892                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3893                 goto retry;
3894         }
3895
3896         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3897                 /*
3898                  * Re-enable CMM virtualization if CMMA is available and
3899                  * CMM has been used.
3900                  */
3901                 if ((vcpu->kvm->arch.use_cmma) &&
3902                     (vcpu->kvm->mm->context.uses_cmm))
3903                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3904                 goto retry;
3905         }
3906
3907         /* nothing to do, just clear the request */
3908         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3909         /* we left the vsie handler, nothing to do, just clear the request */
3910         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3911
3912         return 0;
3913 }
3914
3915 void kvm_s390_set_tod_clock(struct kvm *kvm,
3916                             const struct kvm_s390_vm_tod_clock *gtod)
3917 {
3918         struct kvm_vcpu *vcpu;
3919         union tod_clock clk;
3920         int i;
3921
3922         mutex_lock(&kvm->lock);
3923         preempt_disable();
3924
3925         store_tod_clock_ext(&clk);
3926
3927         kvm->arch.epoch = gtod->tod - clk.tod;
3928         kvm->arch.epdx = 0;
3929         if (test_kvm_facility(kvm, 139)) {
3930                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3931                 if (kvm->arch.epoch > gtod->tod)
3932                         kvm->arch.epdx -= 1;
3933         }
3934
3935         kvm_s390_vcpu_block_all(kvm);
3936         kvm_for_each_vcpu(i, vcpu, kvm) {
3937                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3938                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3939         }
3940
3941         kvm_s390_vcpu_unblock_all(kvm);
3942         preempt_enable();
3943         mutex_unlock(&kvm->lock);
3944 }
3945
3946 /**
3947  * kvm_arch_fault_in_page - fault-in guest page if necessary
3948  * @vcpu: The corresponding virtual cpu
3949  * @gpa: Guest physical address
3950  * @writable: Whether the page should be writable or not
3951  *
3952  * Make sure that a guest page has been faulted-in on the host.
3953  *
3954  * Return: Zero on success, negative error code otherwise.
3955  */
3956 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3957 {
3958         return gmap_fault(vcpu->arch.gmap, gpa,
3959                           writable ? FAULT_FLAG_WRITE : 0);
3960 }
3961
3962 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3963                                       unsigned long token)
3964 {
3965         struct kvm_s390_interrupt inti;
3966         struct kvm_s390_irq irq;
3967
3968         if (start_token) {
3969                 irq.u.ext.ext_params2 = token;
3970                 irq.type = KVM_S390_INT_PFAULT_INIT;
3971                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3972         } else {
3973                 inti.type = KVM_S390_INT_PFAULT_DONE;
3974                 inti.parm64 = token;
3975                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3976         }
3977 }
3978
3979 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3980                                      struct kvm_async_pf *work)
3981 {
3982         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3983         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3984
3985         return true;
3986 }
3987
3988 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3989                                  struct kvm_async_pf *work)
3990 {
3991         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3992         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3993 }
3994
3995 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3996                                struct kvm_async_pf *work)
3997 {
3998         /* s390 will always inject the page directly */
3999 }
4000
4001 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4002 {
4003         /*
4004          * s390 will always inject the page directly,
4005          * but we still want check_async_completion to cleanup
4006          */
4007         return true;
4008 }
4009
4010 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4011 {
4012         hva_t hva;
4013         struct kvm_arch_async_pf arch;
4014
4015         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4016                 return false;
4017         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4018             vcpu->arch.pfault_compare)
4019                 return false;
4020         if (psw_extint_disabled(vcpu))
4021                 return false;
4022         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4023                 return false;
4024         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4025                 return false;
4026         if (!vcpu->arch.gmap->pfault_enabled)
4027                 return false;
4028
4029         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4030         hva += current->thread.gmap_addr & ~PAGE_MASK;
4031         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4032                 return false;
4033
4034         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4035 }
4036
4037 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4038 {
4039         int rc, cpuflags;
4040
4041         /*
4042          * On s390 notifications for arriving pages will be delivered directly
4043          * to the guest but the house keeping for completed pfaults is
4044          * handled outside the worker.
4045          */
4046         kvm_check_async_pf_completion(vcpu);
4047
4048         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4049         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4050
4051         if (need_resched())
4052                 schedule();
4053
4054         if (!kvm_is_ucontrol(vcpu->kvm)) {
4055                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4056                 if (rc)
4057                         return rc;
4058         }
4059
4060         rc = kvm_s390_handle_requests(vcpu);
4061         if (rc)
4062                 return rc;
4063
4064         if (guestdbg_enabled(vcpu)) {
4065                 kvm_s390_backup_guest_per_regs(vcpu);
4066                 kvm_s390_patch_guest_per_regs(vcpu);
4067         }
4068
4069         clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4070
4071         vcpu->arch.sie_block->icptcode = 0;
4072         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4073         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4074         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4075
4076         return 0;
4077 }
4078
4079 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4080 {
4081         struct kvm_s390_pgm_info pgm_info = {
4082                 .code = PGM_ADDRESSING,
4083         };
4084         u8 opcode, ilen;
4085         int rc;
4086
4087         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4088         trace_kvm_s390_sie_fault(vcpu);
4089
4090         /*
4091          * We want to inject an addressing exception, which is defined as a
4092          * suppressing or terminating exception. However, since we came here
4093          * by a DAT access exception, the PSW still points to the faulting
4094          * instruction since DAT exceptions are nullifying. So we've got
4095          * to look up the current opcode to get the length of the instruction
4096          * to be able to forward the PSW.
4097          */
4098         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4099         ilen = insn_length(opcode);
4100         if (rc < 0) {
4101                 return rc;
4102         } else if (rc) {
4103                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4104                  * Forward by arbitrary ilc, injection will take care of
4105                  * nullification if necessary.
4106                  */
4107                 pgm_info = vcpu->arch.pgm;
4108                 ilen = 4;
4109         }
4110         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4111         kvm_s390_forward_psw(vcpu, ilen);
4112         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4113 }
4114
4115 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4116 {
4117         struct mcck_volatile_info *mcck_info;
4118         struct sie_page *sie_page;
4119
4120         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4121                    vcpu->arch.sie_block->icptcode);
4122         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4123
4124         if (guestdbg_enabled(vcpu))
4125                 kvm_s390_restore_guest_per_regs(vcpu);
4126
4127         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4128         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4129
4130         if (exit_reason == -EINTR) {
4131                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4132                 sie_page = container_of(vcpu->arch.sie_block,
4133                                         struct sie_page, sie_block);
4134                 mcck_info = &sie_page->mcck_info;
4135                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4136                 return 0;
4137         }
4138
4139         if (vcpu->arch.sie_block->icptcode > 0) {
4140                 int rc = kvm_handle_sie_intercept(vcpu);
4141
4142                 if (rc != -EOPNOTSUPP)
4143                         return rc;
4144                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4145                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4146                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4147                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4148                 return -EREMOTE;
4149         } else if (exit_reason != -EFAULT) {
4150                 vcpu->stat.exit_null++;
4151                 return 0;
4152         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4153                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4154                 vcpu->run->s390_ucontrol.trans_exc_code =
4155                                                 current->thread.gmap_addr;
4156                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4157                 return -EREMOTE;
4158         } else if (current->thread.gmap_pfault) {
4159                 trace_kvm_s390_major_guest_pfault(vcpu);
4160                 current->thread.gmap_pfault = 0;
4161                 if (kvm_arch_setup_async_pf(vcpu))
4162                         return 0;
4163                 vcpu->stat.pfault_sync++;
4164                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4165         }
4166         return vcpu_post_run_fault_in_sie(vcpu);
4167 }
4168
4169 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4170 static int __vcpu_run(struct kvm_vcpu *vcpu)
4171 {
4172         int rc, exit_reason;
4173         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4174
4175         /*
4176          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4177          * ning the guest), so that memslots (and other stuff) are protected
4178          */
4179         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4180
4181         do {
4182                 rc = vcpu_pre_run(vcpu);
4183                 if (rc)
4184                         break;
4185
4186                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4187                 /*
4188                  * As PF_VCPU will be used in fault handler, between
4189                  * guest_enter and guest_exit should be no uaccess.
4190                  */
4191                 local_irq_disable();
4192                 guest_enter_irqoff();
4193                 __disable_cpu_timer_accounting(vcpu);
4194                 local_irq_enable();
4195                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4196                         memcpy(sie_page->pv_grregs,
4197                                vcpu->run->s.regs.gprs,
4198                                sizeof(sie_page->pv_grregs));
4199                 }
4200                 if (test_cpu_flag(CIF_FPU))
4201                         load_fpu_regs();
4202                 exit_reason = sie64a(vcpu->arch.sie_block,
4203                                      vcpu->run->s.regs.gprs);
4204                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4205                         memcpy(vcpu->run->s.regs.gprs,
4206                                sie_page->pv_grregs,
4207                                sizeof(sie_page->pv_grregs));
4208                         /*
4209                          * We're not allowed to inject interrupts on intercepts
4210                          * that leave the guest state in an "in-between" state
4211                          * where the next SIE entry will do a continuation.
4212                          * Fence interrupts in our "internal" PSW.
4213                          */
4214                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4215                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4216                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4217                         }
4218                 }
4219                 local_irq_disable();
4220                 __enable_cpu_timer_accounting(vcpu);
4221                 guest_exit_irqoff();
4222                 local_irq_enable();
4223                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4224
4225                 rc = vcpu_post_run(vcpu, exit_reason);
4226         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4227
4228         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4229         return rc;
4230 }
4231
4232 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4233 {
4234         struct kvm_run *kvm_run = vcpu->run;
4235         struct runtime_instr_cb *riccb;
4236         struct gs_cb *gscb;
4237
4238         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4239         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4240         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4241         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4242         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4243                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4244                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4245                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4246         }
4247         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4248                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4249                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4250                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4251                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4252                         kvm_clear_async_pf_completion_queue(vcpu);
4253         }
4254         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4255                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4256                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4257         }
4258         /*
4259          * If userspace sets the riccb (e.g. after migration) to a valid state,
4260          * we should enable RI here instead of doing the lazy enablement.
4261          */
4262         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4263             test_kvm_facility(vcpu->kvm, 64) &&
4264             riccb->v &&
4265             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4266                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4267                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4268         }
4269         /*
4270          * If userspace sets the gscb (e.g. after migration) to non-zero,
4271          * we should enable GS here instead of doing the lazy enablement.
4272          */
4273         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4274             test_kvm_facility(vcpu->kvm, 133) &&
4275             gscb->gssm &&
4276             !vcpu->arch.gs_enabled) {
4277                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4278                 vcpu->arch.sie_block->ecb |= ECB_GS;
4279                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4280                 vcpu->arch.gs_enabled = 1;
4281         }
4282         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4283             test_kvm_facility(vcpu->kvm, 82)) {
4284                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4285                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4286         }
4287         if (MACHINE_HAS_GS) {
4288                 preempt_disable();
4289                 __ctl_set_bit(2, 4);
4290                 if (current->thread.gs_cb) {
4291                         vcpu->arch.host_gscb = current->thread.gs_cb;
4292                         save_gs_cb(vcpu->arch.host_gscb);
4293                 }
4294                 if (vcpu->arch.gs_enabled) {
4295                         current->thread.gs_cb = (struct gs_cb *)
4296                                                 &vcpu->run->s.regs.gscb;
4297                         restore_gs_cb(current->thread.gs_cb);
4298                 }
4299                 preempt_enable();
4300         }
4301         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4302 }
4303
4304 static void sync_regs(struct kvm_vcpu *vcpu)
4305 {
4306         struct kvm_run *kvm_run = vcpu->run;
4307
4308         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4309                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4310         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4311                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4312                 /* some control register changes require a tlb flush */
4313                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4314         }
4315         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4316                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4317                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4318         }
4319         save_access_regs(vcpu->arch.host_acrs);
4320         restore_access_regs(vcpu->run->s.regs.acrs);
4321         /* save host (userspace) fprs/vrs */
4322         save_fpu_regs();
4323         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4324         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4325         if (MACHINE_HAS_VX)
4326                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4327         else
4328                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4329         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4330         if (test_fp_ctl(current->thread.fpu.fpc))
4331                 /* User space provided an invalid FPC, let's clear it */
4332                 current->thread.fpu.fpc = 0;
4333
4334         /* Sync fmt2 only data */
4335         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4336                 sync_regs_fmt2(vcpu);
4337         } else {
4338                 /*
4339                  * In several places we have to modify our internal view to
4340                  * not do things that are disallowed by the ultravisor. For
4341                  * example we must not inject interrupts after specific exits
4342                  * (e.g. 112 prefix page not secure). We do this by turning
4343                  * off the machine check, external and I/O interrupt bits
4344                  * of our PSW copy. To avoid getting validity intercepts, we
4345                  * do only accept the condition code from userspace.
4346                  */
4347                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4348                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4349                                                    PSW_MASK_CC;
4350         }
4351
4352         kvm_run->kvm_dirty_regs = 0;
4353 }
4354
4355 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4356 {
4357         struct kvm_run *kvm_run = vcpu->run;
4358
4359         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4360         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4361         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4362         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4363         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4364         if (MACHINE_HAS_GS) {
4365                 preempt_disable();
4366                 __ctl_set_bit(2, 4);
4367                 if (vcpu->arch.gs_enabled)
4368                         save_gs_cb(current->thread.gs_cb);
4369                 current->thread.gs_cb = vcpu->arch.host_gscb;
4370                 restore_gs_cb(vcpu->arch.host_gscb);
4371                 if (!vcpu->arch.host_gscb)
4372                         __ctl_clear_bit(2, 4);
4373                 vcpu->arch.host_gscb = NULL;
4374                 preempt_enable();
4375         }
4376         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4377 }
4378
4379 static void store_regs(struct kvm_vcpu *vcpu)
4380 {
4381         struct kvm_run *kvm_run = vcpu->run;
4382
4383         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4384         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4385         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4386         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4387         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4388         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4389         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4390         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4391         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4392         save_access_regs(vcpu->run->s.regs.acrs);
4393         restore_access_regs(vcpu->arch.host_acrs);
4394         /* Save guest register state */
4395         save_fpu_regs();
4396         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4397         /* Restore will be done lazily at return */
4398         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4399         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4400         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4401                 store_regs_fmt2(vcpu);
4402 }
4403
4404 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4405 {
4406         struct kvm_run *kvm_run = vcpu->run;
4407         int rc;
4408
4409         if (kvm_run->immediate_exit)
4410                 return -EINTR;
4411
4412         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4413             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4414                 return -EINVAL;
4415
4416         vcpu_load(vcpu);
4417
4418         if (guestdbg_exit_pending(vcpu)) {
4419                 kvm_s390_prepare_debug_exit(vcpu);
4420                 rc = 0;
4421                 goto out;
4422         }
4423
4424         kvm_sigset_activate(vcpu);
4425
4426         /*
4427          * no need to check the return value of vcpu_start as it can only have
4428          * an error for protvirt, but protvirt means user cpu state
4429          */
4430         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4431                 kvm_s390_vcpu_start(vcpu);
4432         } else if (is_vcpu_stopped(vcpu)) {
4433                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4434                                    vcpu->vcpu_id);
4435                 rc = -EINVAL;
4436                 goto out;
4437         }
4438
4439         sync_regs(vcpu);
4440         enable_cpu_timer_accounting(vcpu);
4441
4442         might_fault();
4443         rc = __vcpu_run(vcpu);
4444
4445         if (signal_pending(current) && !rc) {
4446                 kvm_run->exit_reason = KVM_EXIT_INTR;
4447                 rc = -EINTR;
4448         }
4449
4450         if (guestdbg_exit_pending(vcpu) && !rc)  {
4451                 kvm_s390_prepare_debug_exit(vcpu);
4452                 rc = 0;
4453         }
4454
4455         if (rc == -EREMOTE) {
4456                 /* userspace support is needed, kvm_run has been prepared */
4457                 rc = 0;
4458         }
4459
4460         disable_cpu_timer_accounting(vcpu);
4461         store_regs(vcpu);
4462
4463         kvm_sigset_deactivate(vcpu);
4464
4465         vcpu->stat.exit_userspace++;
4466 out:
4467         vcpu_put(vcpu);
4468         return rc;
4469 }
4470
4471 /*
4472  * store status at address
4473  * we use have two special cases:
4474  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4475  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4476  */
4477 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4478 {
4479         unsigned char archmode = 1;
4480         freg_t fprs[NUM_FPRS];
4481         unsigned int px;
4482         u64 clkcomp, cputm;
4483         int rc;
4484
4485         px = kvm_s390_get_prefix(vcpu);
4486         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4487                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4488                         return -EFAULT;
4489                 gpa = 0;
4490         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4491                 if (write_guest_real(vcpu, 163, &archmode, 1))
4492                         return -EFAULT;
4493                 gpa = px;
4494         } else
4495                 gpa -= __LC_FPREGS_SAVE_AREA;
4496
4497         /* manually convert vector registers if necessary */
4498         if (MACHINE_HAS_VX) {
4499                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4500                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4501                                      fprs, 128);
4502         } else {
4503                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4504                                      vcpu->run->s.regs.fprs, 128);
4505         }
4506         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4507                               vcpu->run->s.regs.gprs, 128);
4508         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4509                               &vcpu->arch.sie_block->gpsw, 16);
4510         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4511                               &px, 4);
4512         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4513                               &vcpu->run->s.regs.fpc, 4);
4514         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4515                               &vcpu->arch.sie_block->todpr, 4);
4516         cputm = kvm_s390_get_cpu_timer(vcpu);
4517         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4518                               &cputm, 8);
4519         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4520         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4521                               &clkcomp, 8);
4522         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4523                               &vcpu->run->s.regs.acrs, 64);
4524         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4525                               &vcpu->arch.sie_block->gcr, 128);
4526         return rc ? -EFAULT : 0;
4527 }
4528
4529 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4530 {
4531         /*
4532          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4533          * switch in the run ioctl. Let's update our copies before we save
4534          * it into the save area
4535          */
4536         save_fpu_regs();
4537         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4538         save_access_regs(vcpu->run->s.regs.acrs);
4539
4540         return kvm_s390_store_status_unloaded(vcpu, addr);
4541 }
4542
4543 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4544 {
4545         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4546         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4547 }
4548
4549 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4550 {
4551         unsigned int i;
4552         struct kvm_vcpu *vcpu;
4553
4554         kvm_for_each_vcpu(i, vcpu, kvm) {
4555                 __disable_ibs_on_vcpu(vcpu);
4556         }
4557 }
4558
4559 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4560 {
4561         if (!sclp.has_ibs)
4562                 return;
4563         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4564         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4565 }
4566
4567 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4568 {
4569         int i, online_vcpus, r = 0, started_vcpus = 0;
4570
4571         if (!is_vcpu_stopped(vcpu))
4572                 return 0;
4573
4574         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4575         /* Only one cpu at a time may enter/leave the STOPPED state. */
4576         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4577         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4578
4579         /* Let's tell the UV that we want to change into the operating state */
4580         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4581                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4582                 if (r) {
4583                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4584                         return r;
4585                 }
4586         }
4587
4588         for (i = 0; i < online_vcpus; i++) {
4589                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4590                         started_vcpus++;
4591         }
4592
4593         if (started_vcpus == 0) {
4594                 /* we're the only active VCPU -> speed it up */
4595                 __enable_ibs_on_vcpu(vcpu);
4596         } else if (started_vcpus == 1) {
4597                 /*
4598                  * As we are starting a second VCPU, we have to disable
4599                  * the IBS facility on all VCPUs to remove potentially
4600                  * outstanding ENABLE requests.
4601                  */
4602                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4603         }
4604
4605         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4606         /*
4607          * The real PSW might have changed due to a RESTART interpreted by the
4608          * ultravisor. We block all interrupts and let the next sie exit
4609          * refresh our view.
4610          */
4611         if (kvm_s390_pv_cpu_is_protected(vcpu))
4612                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4613         /*
4614          * Another VCPU might have used IBS while we were offline.
4615          * Let's play safe and flush the VCPU at startup.
4616          */
4617         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4618         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4619         return 0;
4620 }
4621
4622 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4623 {
4624         int i, online_vcpus, r = 0, started_vcpus = 0;
4625         struct kvm_vcpu *started_vcpu = NULL;
4626
4627         if (is_vcpu_stopped(vcpu))
4628                 return 0;
4629
4630         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4631         /* Only one cpu at a time may enter/leave the STOPPED state. */
4632         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4633         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4634
4635         /* Let's tell the UV that we want to change into the stopped state */
4636         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4637                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4638                 if (r) {
4639                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4640                         return r;
4641                 }
4642         }
4643
4644         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4645         kvm_s390_clear_stop_irq(vcpu);
4646
4647         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4648         __disable_ibs_on_vcpu(vcpu);
4649
4650         for (i = 0; i < online_vcpus; i++) {
4651                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4652                         started_vcpus++;
4653                         started_vcpu = vcpu->kvm->vcpus[i];
4654                 }
4655         }
4656
4657         if (started_vcpus == 1) {
4658                 /*
4659                  * As we only have one VCPU left, we want to enable the
4660                  * IBS facility for that VCPU to speed it up.
4661                  */
4662                 __enable_ibs_on_vcpu(started_vcpu);
4663         }
4664
4665         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4666         return 0;
4667 }
4668
4669 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4670                                      struct kvm_enable_cap *cap)
4671 {
4672         int r;
4673
4674         if (cap->flags)
4675                 return -EINVAL;
4676
4677         switch (cap->cap) {
4678         case KVM_CAP_S390_CSS_SUPPORT:
4679                 if (!vcpu->kvm->arch.css_support) {
4680                         vcpu->kvm->arch.css_support = 1;
4681                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4682                         trace_kvm_s390_enable_css(vcpu->kvm);
4683                 }
4684                 r = 0;
4685                 break;
4686         default:
4687                 r = -EINVAL;
4688                 break;
4689         }
4690         return r;
4691 }
4692
4693 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4694                                    struct kvm_s390_mem_op *mop)
4695 {
4696         void __user *uaddr = (void __user *)mop->buf;
4697         int r = 0;
4698
4699         if (mop->flags || !mop->size)
4700                 return -EINVAL;
4701         if (mop->size + mop->sida_offset < mop->size)
4702                 return -EINVAL;
4703         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4704                 return -E2BIG;
4705
4706         switch (mop->op) {
4707         case KVM_S390_MEMOP_SIDA_READ:
4708                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4709                                  mop->sida_offset), mop->size))
4710                         r = -EFAULT;
4711
4712                 break;
4713         case KVM_S390_MEMOP_SIDA_WRITE:
4714                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4715                                    mop->sida_offset), uaddr, mop->size))
4716                         r = -EFAULT;
4717                 break;
4718         }
4719         return r;
4720 }
4721 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4722                                   struct kvm_s390_mem_op *mop)
4723 {
4724         void __user *uaddr = (void __user *)mop->buf;
4725         void *tmpbuf = NULL;
4726         int r = 0;
4727         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4728                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4729
4730         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4731                 return -EINVAL;
4732
4733         if (mop->size > MEM_OP_MAX_SIZE)
4734                 return -E2BIG;
4735
4736         if (kvm_s390_pv_cpu_is_protected(vcpu))
4737                 return -EINVAL;
4738
4739         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4740                 tmpbuf = vmalloc(mop->size);
4741                 if (!tmpbuf)
4742                         return -ENOMEM;
4743         }
4744
4745         switch (mop->op) {
4746         case KVM_S390_MEMOP_LOGICAL_READ:
4747                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4748                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4749                                             mop->size, GACC_FETCH);
4750                         break;
4751                 }
4752                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4753                 if (r == 0) {
4754                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4755                                 r = -EFAULT;
4756                 }
4757                 break;
4758         case KVM_S390_MEMOP_LOGICAL_WRITE:
4759                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4760                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4761                                             mop->size, GACC_STORE);
4762                         break;
4763                 }
4764                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4765                         r = -EFAULT;
4766                         break;
4767                 }
4768                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4769                 break;
4770         }
4771
4772         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4773                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4774
4775         vfree(tmpbuf);
4776         return r;
4777 }
4778
4779 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4780                                       struct kvm_s390_mem_op *mop)
4781 {
4782         int r, srcu_idx;
4783
4784         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4785
4786         switch (mop->op) {
4787         case KVM_S390_MEMOP_LOGICAL_READ:
4788         case KVM_S390_MEMOP_LOGICAL_WRITE:
4789                 r = kvm_s390_guest_mem_op(vcpu, mop);
4790                 break;
4791         case KVM_S390_MEMOP_SIDA_READ:
4792         case KVM_S390_MEMOP_SIDA_WRITE:
4793                 /* we are locked against sida going away by the vcpu->mutex */
4794                 r = kvm_s390_guest_sida_op(vcpu, mop);
4795                 break;
4796         default:
4797                 r = -EINVAL;
4798         }
4799
4800         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4801         return r;
4802 }
4803
4804 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4805                                unsigned int ioctl, unsigned long arg)
4806 {
4807         struct kvm_vcpu *vcpu = filp->private_data;
4808         void __user *argp = (void __user *)arg;
4809
4810         switch (ioctl) {
4811         case KVM_S390_IRQ: {
4812                 struct kvm_s390_irq s390irq;
4813
4814                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4815                         return -EFAULT;
4816                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4817         }
4818         case KVM_S390_INTERRUPT: {
4819                 struct kvm_s390_interrupt s390int;
4820                 struct kvm_s390_irq s390irq = {};
4821
4822                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4823                         return -EFAULT;
4824                 if (s390int_to_s390irq(&s390int, &s390irq))
4825                         return -EINVAL;
4826                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4827         }
4828         }
4829         return -ENOIOCTLCMD;
4830 }
4831
4832 long kvm_arch_vcpu_ioctl(struct file *filp,
4833                          unsigned int ioctl, unsigned long arg)
4834 {
4835         struct kvm_vcpu *vcpu = filp->private_data;
4836         void __user *argp = (void __user *)arg;
4837         int idx;
4838         long r;
4839         u16 rc, rrc;
4840
4841         vcpu_load(vcpu);
4842
4843         switch (ioctl) {
4844         case KVM_S390_STORE_STATUS:
4845                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4846                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4847                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4848                 break;
4849         case KVM_S390_SET_INITIAL_PSW: {
4850                 psw_t psw;
4851
4852                 r = -EFAULT;
4853                 if (copy_from_user(&psw, argp, sizeof(psw)))
4854                         break;
4855                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4856                 break;
4857         }
4858         case KVM_S390_CLEAR_RESET:
4859                 r = 0;
4860                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4861                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4862                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4863                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4864                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4865                                    rc, rrc);
4866                 }
4867                 break;
4868         case KVM_S390_INITIAL_RESET:
4869                 r = 0;
4870                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4871                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4872                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4873                                           UVC_CMD_CPU_RESET_INITIAL,
4874                                           &rc, &rrc);
4875                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4876                                    rc, rrc);
4877                 }
4878                 break;
4879         case KVM_S390_NORMAL_RESET:
4880                 r = 0;
4881                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4882                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4883                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4884                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4885                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4886                                    rc, rrc);
4887                 }
4888                 break;
4889         case KVM_SET_ONE_REG:
4890         case KVM_GET_ONE_REG: {
4891                 struct kvm_one_reg reg;
4892                 r = -EINVAL;
4893                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4894                         break;
4895                 r = -EFAULT;
4896                 if (copy_from_user(&reg, argp, sizeof(reg)))
4897                         break;
4898                 if (ioctl == KVM_SET_ONE_REG)
4899                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4900                 else
4901                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4902                 break;
4903         }
4904 #ifdef CONFIG_KVM_S390_UCONTROL
4905         case KVM_S390_UCAS_MAP: {
4906                 struct kvm_s390_ucas_mapping ucasmap;
4907
4908                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4909                         r = -EFAULT;
4910                         break;
4911                 }
4912
4913                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4914                         r = -EINVAL;
4915                         break;
4916                 }
4917
4918                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4919                                      ucasmap.vcpu_addr, ucasmap.length);
4920                 break;
4921         }
4922         case KVM_S390_UCAS_UNMAP: {
4923                 struct kvm_s390_ucas_mapping ucasmap;
4924
4925                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4926                         r = -EFAULT;
4927                         break;
4928                 }
4929
4930                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4931                         r = -EINVAL;
4932                         break;
4933                 }
4934
4935                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4936                         ucasmap.length);
4937                 break;
4938         }
4939 #endif
4940         case KVM_S390_VCPU_FAULT: {
4941                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4942                 break;
4943         }
4944         case KVM_ENABLE_CAP:
4945         {
4946                 struct kvm_enable_cap cap;
4947                 r = -EFAULT;
4948                 if (copy_from_user(&cap, argp, sizeof(cap)))
4949                         break;
4950                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4951                 break;
4952         }
4953         case KVM_S390_MEM_OP: {
4954                 struct kvm_s390_mem_op mem_op;
4955
4956                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4957                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4958                 else
4959                         r = -EFAULT;
4960                 break;
4961         }
4962         case KVM_S390_SET_IRQ_STATE: {
4963                 struct kvm_s390_irq_state irq_state;
4964
4965                 r = -EFAULT;
4966                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4967                         break;
4968                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4969                     irq_state.len == 0 ||
4970                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4971                         r = -EINVAL;
4972                         break;
4973                 }
4974                 /* do not use irq_state.flags, it will break old QEMUs */
4975                 r = kvm_s390_set_irq_state(vcpu,
4976                                            (void __user *) irq_state.buf,
4977                                            irq_state.len);
4978                 break;
4979         }
4980         case KVM_S390_GET_IRQ_STATE: {
4981                 struct kvm_s390_irq_state irq_state;
4982
4983                 r = -EFAULT;
4984                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4985                         break;
4986                 if (irq_state.len == 0) {
4987                         r = -EINVAL;
4988                         break;
4989                 }
4990                 /* do not use irq_state.flags, it will break old QEMUs */
4991                 r = kvm_s390_get_irq_state(vcpu,
4992                                            (__u8 __user *)  irq_state.buf,
4993                                            irq_state.len);
4994                 break;
4995         }
4996         default:
4997                 r = -ENOTTY;
4998         }
4999
5000         vcpu_put(vcpu);
5001         return r;
5002 }
5003
5004 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5005 {
5006 #ifdef CONFIG_KVM_S390_UCONTROL
5007         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5008                  && (kvm_is_ucontrol(vcpu->kvm))) {
5009                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5010                 get_page(vmf->page);
5011                 return 0;
5012         }
5013 #endif
5014         return VM_FAULT_SIGBUS;
5015 }
5016
5017 /* Section: memory related */
5018 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5019                                    struct kvm_memory_slot *memslot,
5020                                    const struct kvm_userspace_memory_region *mem,
5021                                    enum kvm_mr_change change)
5022 {
5023         /* A few sanity checks. We can have memory slots which have to be
5024            located/ended at a segment boundary (1MB). The memory in userland is
5025            ok to be fragmented into various different vmas. It is okay to mmap()
5026            and munmap() stuff in this slot after doing this call at any time */
5027
5028         if (mem->userspace_addr & 0xffffful)
5029                 return -EINVAL;
5030
5031         if (mem->memory_size & 0xffffful)
5032                 return -EINVAL;
5033
5034         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5035                 return -EINVAL;
5036
5037         /* When we are protected, we should not change the memory slots */
5038         if (kvm_s390_pv_get_handle(kvm))
5039                 return -EINVAL;
5040         return 0;
5041 }
5042
5043 void kvm_arch_commit_memory_region(struct kvm *kvm,
5044                                 const struct kvm_userspace_memory_region *mem,
5045                                 struct kvm_memory_slot *old,
5046                                 const struct kvm_memory_slot *new,
5047                                 enum kvm_mr_change change)
5048 {
5049         int rc = 0;
5050
5051         switch (change) {
5052         case KVM_MR_DELETE:
5053                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5054                                         old->npages * PAGE_SIZE);
5055                 break;
5056         case KVM_MR_MOVE:
5057                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5058                                         old->npages * PAGE_SIZE);
5059                 if (rc)
5060                         break;
5061                 fallthrough;
5062         case KVM_MR_CREATE:
5063                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5064                                       mem->guest_phys_addr, mem->memory_size);
5065                 break;
5066         case KVM_MR_FLAGS_ONLY:
5067                 break;
5068         default:
5069                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5070         }
5071         if (rc)
5072                 pr_warn("failed to commit memory region\n");
5073         return;
5074 }
5075
5076 static inline unsigned long nonhyp_mask(int i)
5077 {
5078         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5079
5080         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5081 }
5082
5083 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5084 {
5085         vcpu->valid_wakeup = false;
5086 }
5087
5088 static int __init kvm_s390_init(void)
5089 {
5090         int i;
5091
5092         if (!sclp.has_sief2) {
5093                 pr_info("SIE is not available\n");
5094                 return -ENODEV;
5095         }
5096
5097         if (nested && hpage) {
5098                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5099                 return -EINVAL;
5100         }
5101
5102         for (i = 0; i < 16; i++)
5103                 kvm_s390_fac_base[i] |=
5104                         stfle_fac_list[i] & nonhyp_mask(i);
5105
5106         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5107 }
5108
5109 static void __exit kvm_s390_exit(void)
5110 {
5111         kvm_exit();
5112 }
5113
5114 module_init(kvm_s390_init);
5115 module_exit(kvm_s390_exit);
5116
5117 /*
5118  * Enable autoloading of the kvm module.
5119  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5120  * since x86 takes a different approach.
5121  */
5122 #include <linux/miscdevice.h>
5123 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5124 MODULE_ALIAS("devname:kvm");