1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
185 /* allow nested virtualization in KVM (if enabled by user space) */
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
190 /* allow 1m huge page guest backing, if !nested */
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
215 #define SIZE_INTERNAL 16
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
228 static unsigned long kvm_s390_fac_size(void)
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
235 return SIZE_INTERNAL;
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
251 /* every s390 is virtualization enabled ;-) */
255 int kvm_arch_check_processor_compat(void *opaque)
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
263 static int sca_switch_to_extended(struct kvm *kvm);
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
275 /* sign-extension - we're adding to signed values below */
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
297 struct kvm_vcpu *vcpu;
299 unsigned long long *delta = v;
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
322 int kvm_arch_hardware_setup(void *opaque)
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
333 void kvm_arch_hardware_unsetup(void)
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
341 static void allow_cpu_feat(unsigned long nr)
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 static inline int plo_test_bit(unsigned char nr)
348 unsigned long function = (unsigned long)nr | 0x100;
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
358 : [function] "d" (function)
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
378 static void kvm_s390_cpu_feat_init(void)
382 for (i = 0; i < 256; ++i) {
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
481 int kvm_arch_init(void *opaque)
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 kvm_s390_cpu_feat_init();
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
506 rc = kvm_s390_gib_init(GAL_ISC);
517 void kvm_arch_exit(void)
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
572 case KVM_CAP_S390_HPAGE_1M:
574 if (hpage && !kvm_is_ucontrol(kvm))
577 case KVM_CAP_S390_MEM_OP:
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
589 case KVM_CAP_S390_COW:
590 r = MACHINE_HAS_ESOP;
592 case KVM_CAP_S390_VECTOR_REGISTERS:
595 case KVM_CAP_S390_RI:
596 r = test_facility(64);
598 case KVM_CAP_S390_GS:
599 r = test_facility(133);
601 case KVM_CAP_S390_BPB:
602 r = test_facility(82);
604 case KVM_CAP_S390_PROTECTED:
605 r = is_prot_virt_host();
613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 gfn_t cur_gfn, last_gfn;
617 unsigned long gaddr, vmaddr;
618 struct gmap *gmap = kvm->arch.gmap;
619 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
621 /* Loop over all guest segments */
622 cur_gfn = memslot->base_gfn;
623 last_gfn = memslot->base_gfn + memslot->npages;
624 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 gaddr = gfn_to_gpa(cur_gfn);
626 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 if (kvm_is_error_hva(vmaddr))
630 bitmap_zero(bitmap, _PAGE_ENTRIES);
631 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 for (i = 0; i < _PAGE_ENTRIES; i++) {
633 if (test_bit(i, bitmap))
634 mark_page_dirty(kvm, cur_gfn + i);
637 if (fatal_signal_pending(current))
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 * Get (and clear) the dirty memory log for a memory slot.
649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 struct kvm_dirty_log *log)
654 struct kvm_memory_slot *memslot;
657 if (kvm_is_ucontrol(kvm))
660 mutex_lock(&kvm->slots_lock);
663 if (log->slot >= KVM_USER_MEM_SLOTS)
666 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
670 /* Clear the dirty log */
672 n = kvm_dirty_bitmap_bytes(memslot);
673 memset(memslot->dirty_bitmap, 0, n);
677 mutex_unlock(&kvm->slots_lock);
681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 struct kvm_vcpu *vcpu;
686 kvm_for_each_vcpu(i, vcpu, kvm) {
687 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
699 case KVM_CAP_S390_IRQCHIP:
700 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 kvm->arch.use_irqchip = 1;
704 case KVM_CAP_S390_USER_SIGP:
705 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 kvm->arch.user_sigp = 1;
709 case KVM_CAP_S390_VECTOR_REGISTERS:
710 mutex_lock(&kvm->lock);
711 if (kvm->created_vcpus) {
713 } else if (MACHINE_HAS_VX) {
714 set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 set_kvm_facility(kvm->arch.model.fac_list, 129);
716 if (test_facility(134)) {
717 set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 set_kvm_facility(kvm->arch.model.fac_list, 134);
720 if (test_facility(135)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 set_kvm_facility(kvm->arch.model.fac_list, 135);
724 if (test_facility(148)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 set_kvm_facility(kvm->arch.model.fac_list, 148);
728 if (test_facility(152)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 set_kvm_facility(kvm->arch.model.fac_list, 152);
732 if (test_facility(192)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 set_kvm_facility(kvm->arch.model.fac_list, 192);
739 mutex_unlock(&kvm->lock);
740 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 r ? "(not available)" : "(success)");
743 case KVM_CAP_S390_RI:
745 mutex_lock(&kvm->lock);
746 if (kvm->created_vcpus) {
748 } else if (test_facility(64)) {
749 set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 set_kvm_facility(kvm->arch.model.fac_list, 64);
753 mutex_unlock(&kvm->lock);
754 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 r ? "(not available)" : "(success)");
757 case KVM_CAP_S390_AIS:
758 mutex_lock(&kvm->lock);
759 if (kvm->created_vcpus) {
762 set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 set_kvm_facility(kvm->arch.model.fac_list, 72);
766 mutex_unlock(&kvm->lock);
767 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 r ? "(not available)" : "(success)");
770 case KVM_CAP_S390_GS:
772 mutex_lock(&kvm->lock);
773 if (kvm->created_vcpus) {
775 } else if (test_facility(133)) {
776 set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 set_kvm_facility(kvm->arch.model.fac_list, 133);
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 r ? "(not available)" : "(success)");
784 case KVM_CAP_S390_HPAGE_1M:
785 mutex_lock(&kvm->lock);
786 if (kvm->created_vcpus)
788 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
792 mmap_write_lock(kvm->mm);
793 kvm->mm->context.allow_gmap_hpage_1m = 1;
794 mmap_write_unlock(kvm->mm);
796 * We might have to create fake 4k page
797 * tables. To avoid that the hardware works on
798 * stale PGSTEs, we emulate these instructions.
800 kvm->arch.use_skf = 0;
801 kvm->arch.use_pfmfi = 0;
803 mutex_unlock(&kvm->lock);
804 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 r ? "(not available)" : "(success)");
807 case KVM_CAP_S390_USER_STSI:
808 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 kvm->arch.user_stsi = 1;
812 case KVM_CAP_S390_USER_INSTR0:
813 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 kvm->arch.user_instr0 = 1;
815 icpt_operexc_on_all_vcpus(kvm);
825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
829 switch (attr->attr) {
830 case KVM_S390_VM_MEM_LIMIT_SIZE:
832 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 kvm->arch.mem_limit);
834 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
848 switch (attr->attr) {
849 case KVM_S390_VM_MEM_ENABLE_CMMA:
854 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 mutex_lock(&kvm->lock);
856 if (kvm->created_vcpus)
858 else if (kvm->mm->context.allow_gmap_hpage_1m)
861 kvm->arch.use_cmma = 1;
862 /* Not compatible with cmma. */
863 kvm->arch.use_pfmfi = 0;
866 mutex_unlock(&kvm->lock);
868 case KVM_S390_VM_MEM_CLR_CMMA:
873 if (!kvm->arch.use_cmma)
876 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 mutex_lock(&kvm->lock);
878 idx = srcu_read_lock(&kvm->srcu);
879 s390_reset_cmma(kvm->arch.gmap->mm);
880 srcu_read_unlock(&kvm->srcu, idx);
881 mutex_unlock(&kvm->lock);
884 case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 unsigned long new_limit;
887 if (kvm_is_ucontrol(kvm))
890 if (get_user(new_limit, (u64 __user *)attr->addr))
893 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 new_limit > kvm->arch.mem_limit)
900 /* gmap_create takes last usable address */
901 if (new_limit != KVM_S390_NO_MEM_LIMIT)
905 mutex_lock(&kvm->lock);
906 if (!kvm->created_vcpus) {
907 /* gmap_create will round the limit up */
908 struct gmap *new = gmap_create(current->mm, new_limit);
913 gmap_remove(kvm->arch.gmap);
915 kvm->arch.gmap = new;
919 mutex_unlock(&kvm->lock);
920 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 (void *) kvm->arch.gmap->asce);
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
936 struct kvm_vcpu *vcpu;
939 kvm_s390_vcpu_block_all(kvm);
941 kvm_for_each_vcpu(i, vcpu, kvm) {
942 kvm_s390_vcpu_crypto_setup(vcpu);
943 /* recreate the shadow crycb by leaving the VSIE handler */
944 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 kvm_s390_vcpu_unblock_all(kvm);
950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
952 mutex_lock(&kvm->lock);
953 switch (attr->attr) {
954 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 if (!test_kvm_facility(kvm, 76)) {
956 mutex_unlock(&kvm->lock);
960 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 kvm->arch.crypto.aes_kw = 1;
963 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
965 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 if (!test_kvm_facility(kvm, 76)) {
967 mutex_unlock(&kvm->lock);
971 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 kvm->arch.crypto.dea_kw = 1;
974 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
976 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 if (!test_kvm_facility(kvm, 76)) {
978 mutex_unlock(&kvm->lock);
981 kvm->arch.crypto.aes_kw = 0;
982 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
986 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 if (!test_kvm_facility(kvm, 76)) {
988 mutex_unlock(&kvm->lock);
991 kvm->arch.crypto.dea_kw = 0;
992 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
996 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 if (!ap_instructions_available()) {
998 mutex_unlock(&kvm->lock);
1001 kvm->arch.crypto.apie = 1;
1003 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 if (!ap_instructions_available()) {
1005 mutex_unlock(&kvm->lock);
1008 kvm->arch.crypto.apie = 0;
1011 mutex_unlock(&kvm->lock);
1015 kvm_s390_vcpu_crypto_reset_all(kvm);
1016 mutex_unlock(&kvm->lock);
1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 struct kvm_vcpu *vcpu;
1025 kvm_for_each_vcpu(cx, vcpu, kvm)
1026 kvm_s390_sync_request(req, vcpu);
1030 * Must be called with kvm->srcu held to avoid races on memslots, and with
1031 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1035 struct kvm_memory_slot *ms;
1036 struct kvm_memslots *slots;
1037 unsigned long ram_pages = 0;
1040 /* migration mode already enabled */
1041 if (kvm->arch.migration_mode)
1043 slots = kvm_memslots(kvm);
1044 if (!slots || !slots->used_slots)
1047 if (!kvm->arch.use_cmma) {
1048 kvm->arch.migration_mode = 1;
1051 /* mark all the pages in active slots as dirty */
1052 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 ms = slots->memslots + slotnr;
1054 if (!ms->dirty_bitmap)
1057 * The second half of the bitmap is only used on x86,
1058 * and would be wasted otherwise, so we put it to good
1059 * use here to keep track of the state of the storage
1062 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 ram_pages += ms->npages;
1065 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 kvm->arch.migration_mode = 1;
1067 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072 * Must be called with kvm->slots_lock to avoid races with ourselves and
1073 * kvm_s390_vm_start_migration.
1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 /* migration mode already disabled */
1078 if (!kvm->arch.migration_mode)
1080 kvm->arch.migration_mode = 0;
1081 if (kvm->arch.use_cmma)
1082 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1091 mutex_lock(&kvm->slots_lock);
1092 switch (attr->attr) {
1093 case KVM_S390_VM_MIGRATION_START:
1094 res = kvm_s390_vm_start_migration(kvm);
1096 case KVM_S390_VM_MIGRATION_STOP:
1097 res = kvm_s390_vm_stop_migration(kvm);
1102 mutex_unlock(&kvm->slots_lock);
1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1110 u64 mig = kvm->arch.migration_mode;
1112 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1115 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1124 struct kvm_s390_vm_tod_clock gtod;
1126 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1129 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1131 __kvm_s390_set_tod_clock(kvm, >od);
1133 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 gtod.epoch_idx, gtod.tod);
1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1143 if (copy_from_user(>od_high, (void __user *)attr->addr,
1149 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1156 struct kvm_s390_vm_tod_clock gtod = { 0 };
1158 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1162 __kvm_s390_set_tod_clock(kvm, >od);
1163 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1174 mutex_lock(&kvm->lock);
1176 * For protected guests, the TOD is managed by the ultravisor, so trying
1177 * to change it will never bring the expected results.
1179 if (kvm_s390_pv_is_protected(kvm)) {
1184 switch (attr->attr) {
1185 case KVM_S390_VM_TOD_EXT:
1186 ret = kvm_s390_set_tod_ext(kvm, attr);
1188 case KVM_S390_VM_TOD_HIGH:
1189 ret = kvm_s390_set_tod_high(kvm, attr);
1191 case KVM_S390_VM_TOD_LOW:
1192 ret = kvm_s390_set_tod_low(kvm, attr);
1200 mutex_unlock(&kvm->lock);
1204 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1205 struct kvm_s390_vm_tod_clock *gtod)
1207 union tod_clock clk;
1211 store_tod_clock_ext(&clk);
1213 gtod->tod = clk.tod + kvm->arch.epoch;
1214 gtod->epoch_idx = 0;
1215 if (test_kvm_facility(kvm, 139)) {
1216 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1217 if (gtod->tod < clk.tod)
1218 gtod->epoch_idx += 1;
1224 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1226 struct kvm_s390_vm_tod_clock gtod;
1228 memset(>od, 0, sizeof(gtod));
1229 kvm_s390_get_tod_clock(kvm, >od);
1230 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1233 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1234 gtod.epoch_idx, gtod.tod);
1238 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1242 if (copy_to_user((void __user *)attr->addr, >od_high,
1245 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1250 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1254 gtod = kvm_s390_get_tod_clock_fast(kvm);
1255 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1257 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1262 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1269 switch (attr->attr) {
1270 case KVM_S390_VM_TOD_EXT:
1271 ret = kvm_s390_get_tod_ext(kvm, attr);
1273 case KVM_S390_VM_TOD_HIGH:
1274 ret = kvm_s390_get_tod_high(kvm, attr);
1276 case KVM_S390_VM_TOD_LOW:
1277 ret = kvm_s390_get_tod_low(kvm, attr);
1286 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1288 struct kvm_s390_vm_cpu_processor *proc;
1289 u16 lowest_ibc, unblocked_ibc;
1292 mutex_lock(&kvm->lock);
1293 if (kvm->created_vcpus) {
1297 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1302 if (!copy_from_user(proc, (void __user *)attr->addr,
1304 kvm->arch.model.cpuid = proc->cpuid;
1305 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1306 unblocked_ibc = sclp.ibc & 0xfff;
1307 if (lowest_ibc && proc->ibc) {
1308 if (proc->ibc > unblocked_ibc)
1309 kvm->arch.model.ibc = unblocked_ibc;
1310 else if (proc->ibc < lowest_ibc)
1311 kvm->arch.model.ibc = lowest_ibc;
1313 kvm->arch.model.ibc = proc->ibc;
1315 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1316 S390_ARCH_FAC_LIST_SIZE_BYTE);
1317 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1318 kvm->arch.model.ibc,
1319 kvm->arch.model.cpuid);
1320 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1321 kvm->arch.model.fac_list[0],
1322 kvm->arch.model.fac_list[1],
1323 kvm->arch.model.fac_list[2]);
1328 mutex_unlock(&kvm->lock);
1332 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1333 struct kvm_device_attr *attr)
1335 struct kvm_s390_vm_cpu_feat data;
1337 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1339 if (!bitmap_subset((unsigned long *) data.feat,
1340 kvm_s390_available_cpu_feat,
1341 KVM_S390_VM_CPU_FEAT_NR_BITS))
1344 mutex_lock(&kvm->lock);
1345 if (kvm->created_vcpus) {
1346 mutex_unlock(&kvm->lock);
1349 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1350 KVM_S390_VM_CPU_FEAT_NR_BITS);
1351 mutex_unlock(&kvm->lock);
1352 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1359 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1360 struct kvm_device_attr *attr)
1362 mutex_lock(&kvm->lock);
1363 if (kvm->created_vcpus) {
1364 mutex_unlock(&kvm->lock);
1368 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1369 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1370 mutex_unlock(&kvm->lock);
1373 mutex_unlock(&kvm->lock);
1375 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1379 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1380 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1381 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1383 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1384 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1386 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1387 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1389 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1390 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1392 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1395 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1396 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1397 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1398 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1399 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1401 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1402 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1404 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1405 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1407 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1408 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1409 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1410 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1411 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1413 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1416 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1417 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1418 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1419 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1420 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1421 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1422 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1423 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1424 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1425 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1426 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1427 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1428 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1429 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1430 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1431 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1436 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1440 switch (attr->attr) {
1441 case KVM_S390_VM_CPU_PROCESSOR:
1442 ret = kvm_s390_set_processor(kvm, attr);
1444 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1445 ret = kvm_s390_set_processor_feat(kvm, attr);
1447 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1448 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1454 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1456 struct kvm_s390_vm_cpu_processor *proc;
1459 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1464 proc->cpuid = kvm->arch.model.cpuid;
1465 proc->ibc = kvm->arch.model.ibc;
1466 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1467 S390_ARCH_FAC_LIST_SIZE_BYTE);
1468 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1469 kvm->arch.model.ibc,
1470 kvm->arch.model.cpuid);
1471 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1472 kvm->arch.model.fac_list[0],
1473 kvm->arch.model.fac_list[1],
1474 kvm->arch.model.fac_list[2]);
1475 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1482 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1484 struct kvm_s390_vm_cpu_machine *mach;
1487 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1492 get_cpu_id((struct cpuid *) &mach->cpuid);
1493 mach->ibc = sclp.ibc;
1494 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1495 S390_ARCH_FAC_LIST_SIZE_BYTE);
1496 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1497 sizeof(stfle_fac_list));
1498 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1499 kvm->arch.model.ibc,
1500 kvm->arch.model.cpuid);
1501 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1505 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1509 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1516 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1517 struct kvm_device_attr *attr)
1519 struct kvm_s390_vm_cpu_feat data;
1521 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1522 KVM_S390_VM_CPU_FEAT_NR_BITS);
1523 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1525 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1533 struct kvm_device_attr *attr)
1535 struct kvm_s390_vm_cpu_feat data;
1537 bitmap_copy((unsigned long *) data.feat,
1538 kvm_s390_available_cpu_feat,
1539 KVM_S390_VM_CPU_FEAT_NR_BITS);
1540 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1542 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1549 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1550 struct kvm_device_attr *attr)
1552 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1553 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1556 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1561 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1562 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1564 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1567 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1568 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1570 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1571 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1573 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1576 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1577 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1578 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1579 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1580 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1582 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1583 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1585 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1586 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1588 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1589 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1590 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1591 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1592 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1594 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1597 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1599 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1600 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1601 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1602 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1603 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1604 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1605 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1606 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1607 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1608 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1609 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1610 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1611 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1612 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1617 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1618 struct kvm_device_attr *attr)
1620 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1621 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1624 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1627 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1628 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1629 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1630 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1631 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1632 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1633 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1634 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1635 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1636 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1638 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1639 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1640 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1641 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1642 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1643 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1644 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1645 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1646 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1647 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1648 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1649 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1650 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1651 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1652 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1653 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1655 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1656 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1657 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1658 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1659 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1660 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1661 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1662 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1665 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1667 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1668 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1669 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1670 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1671 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1672 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1673 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1674 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1675 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1676 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1677 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1678 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1679 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1680 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1689 switch (attr->attr) {
1690 case KVM_S390_VM_CPU_PROCESSOR:
1691 ret = kvm_s390_get_processor(kvm, attr);
1693 case KVM_S390_VM_CPU_MACHINE:
1694 ret = kvm_s390_get_machine(kvm, attr);
1696 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1697 ret = kvm_s390_get_processor_feat(kvm, attr);
1699 case KVM_S390_VM_CPU_MACHINE_FEAT:
1700 ret = kvm_s390_get_machine_feat(kvm, attr);
1702 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1703 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1705 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1706 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 switch (attr->group) {
1717 case KVM_S390_VM_MEM_CTRL:
1718 ret = kvm_s390_set_mem_control(kvm, attr);
1720 case KVM_S390_VM_TOD:
1721 ret = kvm_s390_set_tod(kvm, attr);
1723 case KVM_S390_VM_CPU_MODEL:
1724 ret = kvm_s390_set_cpu_model(kvm, attr);
1726 case KVM_S390_VM_CRYPTO:
1727 ret = kvm_s390_vm_set_crypto(kvm, attr);
1729 case KVM_S390_VM_MIGRATION:
1730 ret = kvm_s390_vm_set_migration(kvm, attr);
1740 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1744 switch (attr->group) {
1745 case KVM_S390_VM_MEM_CTRL:
1746 ret = kvm_s390_get_mem_control(kvm, attr);
1748 case KVM_S390_VM_TOD:
1749 ret = kvm_s390_get_tod(kvm, attr);
1751 case KVM_S390_VM_CPU_MODEL:
1752 ret = kvm_s390_get_cpu_model(kvm, attr);
1754 case KVM_S390_VM_MIGRATION:
1755 ret = kvm_s390_vm_get_migration(kvm, attr);
1765 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1769 switch (attr->group) {
1770 case KVM_S390_VM_MEM_CTRL:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_MEM_ENABLE_CMMA:
1773 case KVM_S390_VM_MEM_CLR_CMMA:
1774 ret = sclp.has_cmma ? 0 : -ENXIO;
1776 case KVM_S390_VM_MEM_LIMIT_SIZE:
1784 case KVM_S390_VM_TOD:
1785 switch (attr->attr) {
1786 case KVM_S390_VM_TOD_LOW:
1787 case KVM_S390_VM_TOD_HIGH:
1795 case KVM_S390_VM_CPU_MODEL:
1796 switch (attr->attr) {
1797 case KVM_S390_VM_CPU_PROCESSOR:
1798 case KVM_S390_VM_CPU_MACHINE:
1799 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1800 case KVM_S390_VM_CPU_MACHINE_FEAT:
1801 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1802 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1810 case KVM_S390_VM_CRYPTO:
1811 switch (attr->attr) {
1812 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1813 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1814 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1815 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1818 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1819 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1820 ret = ap_instructions_available() ? 0 : -ENXIO;
1827 case KVM_S390_VM_MIGRATION:
1838 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1842 int srcu_idx, i, r = 0;
1844 if (args->flags != 0)
1847 /* Is this guest using storage keys? */
1848 if (!mm_uses_skeys(current->mm))
1849 return KVM_S390_GET_SKEYS_NONE;
1851 /* Enforce sane limit on memory allocation */
1852 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1855 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1859 mmap_read_lock(current->mm);
1860 srcu_idx = srcu_read_lock(&kvm->srcu);
1861 for (i = 0; i < args->count; i++) {
1862 hva = gfn_to_hva(kvm, args->start_gfn + i);
1863 if (kvm_is_error_hva(hva)) {
1868 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1872 srcu_read_unlock(&kvm->srcu, srcu_idx);
1873 mmap_read_unlock(current->mm);
1876 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1877 sizeof(uint8_t) * args->count);
1886 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1890 int srcu_idx, i, r = 0;
1893 if (args->flags != 0)
1896 /* Enforce sane limit on memory allocation */
1897 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1900 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1904 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1905 sizeof(uint8_t) * args->count);
1911 /* Enable storage key handling for the guest */
1912 r = s390_enable_skey();
1917 mmap_read_lock(current->mm);
1918 srcu_idx = srcu_read_lock(&kvm->srcu);
1919 while (i < args->count) {
1921 hva = gfn_to_hva(kvm, args->start_gfn + i);
1922 if (kvm_is_error_hva(hva)) {
1927 /* Lowest order bit is reserved */
1928 if (keys[i] & 0x01) {
1933 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1935 r = fixup_user_fault(current->mm, hva,
1936 FAULT_FLAG_WRITE, &unlocked);
1943 srcu_read_unlock(&kvm->srcu, srcu_idx);
1944 mmap_read_unlock(current->mm);
1951 * Base address and length must be sent at the start of each block, therefore
1952 * it's cheaper to send some clean data, as long as it's less than the size of
1955 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1956 /* for consistency */
1957 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1960 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1961 * address falls in a hole. In that case the index of one of the memslots
1962 * bordering the hole is returned.
1964 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1966 int start = 0, end = slots->used_slots;
1967 int slot = atomic_read(&slots->last_used_slot);
1968 struct kvm_memory_slot *memslots = slots->memslots;
1970 if (gfn >= memslots[slot].base_gfn &&
1971 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1974 while (start < end) {
1975 slot = start + (end - start) / 2;
1977 if (gfn >= memslots[slot].base_gfn)
1983 if (start >= slots->used_slots)
1984 return slots->used_slots - 1;
1986 if (gfn >= memslots[start].base_gfn &&
1987 gfn < memslots[start].base_gfn + memslots[start].npages) {
1988 atomic_set(&slots->last_used_slot, start);
1994 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995 u8 *res, unsigned long bufsize)
1997 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2000 while (args->count < bufsize) {
2001 hva = gfn_to_hva(kvm, cur_gfn);
2003 * We return an error if the first value was invalid, but we
2004 * return successfully if at least one value was copied.
2006 if (kvm_is_error_hva(hva))
2007 return args->count ? 0 : -EFAULT;
2008 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2010 res[args->count++] = (pgstev >> 24) & 0x43;
2017 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2018 unsigned long cur_gfn)
2020 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2021 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2022 unsigned long ofs = cur_gfn - ms->base_gfn;
2024 if (ms->base_gfn + ms->npages <= cur_gfn) {
2026 /* If we are above the highest slot, wrap around */
2028 slotidx = slots->used_slots - 1;
2030 ms = slots->memslots + slotidx;
2033 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2034 while ((slotidx > 0) && (ofs >= ms->npages)) {
2036 ms = slots->memslots + slotidx;
2037 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2039 return ms->base_gfn + ofs;
2042 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2043 u8 *res, unsigned long bufsize)
2045 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2046 struct kvm_memslots *slots = kvm_memslots(kvm);
2047 struct kvm_memory_slot *ms;
2049 if (unlikely(!slots->used_slots))
2052 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2053 ms = gfn_to_memslot(kvm, cur_gfn);
2055 args->start_gfn = cur_gfn;
2058 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2059 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2061 while (args->count < bufsize) {
2062 hva = gfn_to_hva(kvm, cur_gfn);
2063 if (kvm_is_error_hva(hva))
2065 /* Decrement only if we actually flipped the bit to 0 */
2066 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2067 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2068 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2070 /* Save the value */
2071 res[args->count++] = (pgstev >> 24) & 0x43;
2072 /* If the next bit is too far away, stop. */
2073 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2075 /* If we reached the previous "next", find the next one */
2076 if (cur_gfn == next_gfn)
2077 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2078 /* Reached the end of memory or of the buffer, stop */
2079 if ((next_gfn >= mem_end) ||
2080 (next_gfn - args->start_gfn >= bufsize))
2083 /* Reached the end of the current memslot, take the next one. */
2084 if (cur_gfn - ms->base_gfn >= ms->npages) {
2085 ms = gfn_to_memslot(kvm, cur_gfn);
2094 * This function searches for the next page with dirty CMMA attributes, and
2095 * saves the attributes in the buffer up to either the end of the buffer or
2096 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2097 * no trailing clean bytes are saved.
2098 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2099 * output buffer will indicate 0 as length.
2101 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2102 struct kvm_s390_cmma_log *args)
2104 unsigned long bufsize;
2105 int srcu_idx, peek, ret;
2108 if (!kvm->arch.use_cmma)
2110 /* Invalid/unsupported flags were specified */
2111 if (args->flags & ~KVM_S390_CMMA_PEEK)
2113 /* Migration mode query, and we are not doing a migration */
2114 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2115 if (!peek && !kvm->arch.migration_mode)
2117 /* CMMA is disabled or was not used, or the buffer has length zero */
2118 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2119 if (!bufsize || !kvm->mm->context.uses_cmm) {
2120 memset(args, 0, sizeof(*args));
2123 /* We are not peeking, and there are no dirty pages */
2124 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2125 memset(args, 0, sizeof(*args));
2129 values = vmalloc(bufsize);
2133 mmap_read_lock(kvm->mm);
2134 srcu_idx = srcu_read_lock(&kvm->srcu);
2136 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2138 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2139 srcu_read_unlock(&kvm->srcu, srcu_idx);
2140 mmap_read_unlock(kvm->mm);
2142 if (kvm->arch.migration_mode)
2143 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2145 args->remaining = 0;
2147 if (copy_to_user((void __user *)args->values, values, args->count))
2155 * This function sets the CMMA attributes for the given pages. If the input
2156 * buffer has zero length, no action is taken, otherwise the attributes are
2157 * set and the mm->context.uses_cmm flag is set.
2159 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2160 const struct kvm_s390_cmma_log *args)
2162 unsigned long hva, mask, pgstev, i;
2164 int srcu_idx, r = 0;
2168 if (!kvm->arch.use_cmma)
2170 /* invalid/unsupported flags */
2171 if (args->flags != 0)
2173 /* Enforce sane limit on memory allocation */
2174 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2177 if (args->count == 0)
2180 bits = vmalloc(array_size(sizeof(*bits), args->count));
2184 r = copy_from_user(bits, (void __user *)args->values, args->count);
2190 mmap_read_lock(kvm->mm);
2191 srcu_idx = srcu_read_lock(&kvm->srcu);
2192 for (i = 0; i < args->count; i++) {
2193 hva = gfn_to_hva(kvm, args->start_gfn + i);
2194 if (kvm_is_error_hva(hva)) {
2200 pgstev = pgstev << 24;
2201 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2202 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2204 srcu_read_unlock(&kvm->srcu, srcu_idx);
2205 mmap_read_unlock(kvm->mm);
2207 if (!kvm->mm->context.uses_cmm) {
2208 mmap_write_lock(kvm->mm);
2209 kvm->mm->context.uses_cmm = 1;
2210 mmap_write_unlock(kvm->mm);
2217 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2219 struct kvm_vcpu *vcpu;
2225 * We ignore failures and try to destroy as many CPUs as possible.
2226 * At the same time we must not free the assigned resources when
2227 * this fails, as the ultravisor has still access to that memory.
2228 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2230 * We want to return the first failure rc and rrc, though.
2232 kvm_for_each_vcpu(i, vcpu, kvm) {
2233 mutex_lock(&vcpu->mutex);
2234 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2239 mutex_unlock(&vcpu->mutex);
2244 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2249 struct kvm_vcpu *vcpu;
2251 kvm_for_each_vcpu(i, vcpu, kvm) {
2252 mutex_lock(&vcpu->mutex);
2253 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2254 mutex_unlock(&vcpu->mutex);
2259 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2263 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2267 void __user *argp = (void __user *)cmd->data;
2270 case KVM_PV_ENABLE: {
2272 if (kvm_s390_pv_is_protected(kvm))
2276 * FMT 4 SIE needs esca. As we never switch back to bsca from
2277 * esca, we need no cleanup in the error cases below
2279 r = sca_switch_to_extended(kvm);
2283 mmap_write_lock(current->mm);
2284 r = gmap_mark_unmergeable();
2285 mmap_write_unlock(current->mm);
2289 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2293 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2295 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2297 /* we need to block service interrupts from now on */
2298 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2301 case KVM_PV_DISABLE: {
2303 if (!kvm_s390_pv_is_protected(kvm))
2306 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2308 * If a CPU could not be destroyed, destroy VM will also fail.
2309 * There is no point in trying to destroy it. Instead return
2310 * the rc and rrc from the first CPU that failed destroying.
2314 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2316 /* no need to block service interrupts any more */
2317 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2320 case KVM_PV_SET_SEC_PARMS: {
2321 struct kvm_s390_pv_sec_parm parms = {};
2325 if (!kvm_s390_pv_is_protected(kvm))
2329 if (copy_from_user(&parms, argp, sizeof(parms)))
2332 /* Currently restricted to 8KB */
2334 if (parms.length > PAGE_SIZE * 2)
2338 hdr = vmalloc(parms.length);
2343 if (!copy_from_user(hdr, (void __user *)parms.origin,
2345 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2346 &cmd->rc, &cmd->rrc);
2351 case KVM_PV_UNPACK: {
2352 struct kvm_s390_pv_unp unp = {};
2355 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2359 if (copy_from_user(&unp, argp, sizeof(unp)))
2362 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2363 &cmd->rc, &cmd->rrc);
2366 case KVM_PV_VERIFY: {
2368 if (!kvm_s390_pv_is_protected(kvm))
2371 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2373 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2377 case KVM_PV_PREP_RESET: {
2379 if (!kvm_s390_pv_is_protected(kvm))
2382 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2384 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2388 case KVM_PV_UNSHARE_ALL: {
2390 if (!kvm_s390_pv_is_protected(kvm))
2393 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2394 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2395 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2405 long kvm_arch_vm_ioctl(struct file *filp,
2406 unsigned int ioctl, unsigned long arg)
2408 struct kvm *kvm = filp->private_data;
2409 void __user *argp = (void __user *)arg;
2410 struct kvm_device_attr attr;
2414 case KVM_S390_INTERRUPT: {
2415 struct kvm_s390_interrupt s390int;
2418 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2420 r = kvm_s390_inject_vm(kvm, &s390int);
2423 case KVM_CREATE_IRQCHIP: {
2424 struct kvm_irq_routing_entry routing;
2427 if (kvm->arch.use_irqchip) {
2428 /* Set up dummy routing. */
2429 memset(&routing, 0, sizeof(routing));
2430 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2434 case KVM_SET_DEVICE_ATTR: {
2436 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2438 r = kvm_s390_vm_set_attr(kvm, &attr);
2441 case KVM_GET_DEVICE_ATTR: {
2443 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2445 r = kvm_s390_vm_get_attr(kvm, &attr);
2448 case KVM_HAS_DEVICE_ATTR: {
2450 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2452 r = kvm_s390_vm_has_attr(kvm, &attr);
2455 case KVM_S390_GET_SKEYS: {
2456 struct kvm_s390_skeys args;
2459 if (copy_from_user(&args, argp,
2460 sizeof(struct kvm_s390_skeys)))
2462 r = kvm_s390_get_skeys(kvm, &args);
2465 case KVM_S390_SET_SKEYS: {
2466 struct kvm_s390_skeys args;
2469 if (copy_from_user(&args, argp,
2470 sizeof(struct kvm_s390_skeys)))
2472 r = kvm_s390_set_skeys(kvm, &args);
2475 case KVM_S390_GET_CMMA_BITS: {
2476 struct kvm_s390_cmma_log args;
2479 if (copy_from_user(&args, argp, sizeof(args)))
2481 mutex_lock(&kvm->slots_lock);
2482 r = kvm_s390_get_cmma_bits(kvm, &args);
2483 mutex_unlock(&kvm->slots_lock);
2485 r = copy_to_user(argp, &args, sizeof(args));
2491 case KVM_S390_SET_CMMA_BITS: {
2492 struct kvm_s390_cmma_log args;
2495 if (copy_from_user(&args, argp, sizeof(args)))
2497 mutex_lock(&kvm->slots_lock);
2498 r = kvm_s390_set_cmma_bits(kvm, &args);
2499 mutex_unlock(&kvm->slots_lock);
2502 case KVM_S390_PV_COMMAND: {
2503 struct kvm_pv_cmd args;
2505 /* protvirt means user sigp */
2506 kvm->arch.user_cpu_state_ctrl = 1;
2508 if (!is_prot_virt_host()) {
2512 if (copy_from_user(&args, argp, sizeof(args))) {
2520 mutex_lock(&kvm->lock);
2521 r = kvm_s390_handle_pv(kvm, &args);
2522 mutex_unlock(&kvm->lock);
2523 if (copy_to_user(argp, &args, sizeof(args))) {
2536 static int kvm_s390_apxa_installed(void)
2538 struct ap_config_info info;
2540 if (ap_instructions_available()) {
2541 if (ap_qci(&info) == 0)
2549 * The format of the crypto control block (CRYCB) is specified in the 3 low
2550 * order bits of the CRYCB designation (CRYCBD) field as follows:
2551 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2552 * AP extended addressing (APXA) facility are installed.
2553 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2554 * Format 2: Both the APXA and MSAX3 facilities are installed
2556 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2558 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2560 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2561 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2563 /* Check whether MSAX3 is installed */
2564 if (!test_kvm_facility(kvm, 76))
2567 if (kvm_s390_apxa_installed())
2568 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2570 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2574 * kvm_arch_crypto_set_masks
2576 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2578 * @apm: the mask identifying the accessible AP adapters
2579 * @aqm: the mask identifying the accessible AP domains
2580 * @adm: the mask identifying the accessible AP control domains
2582 * Set the masks that identify the adapters, domains and control domains to
2583 * which the KVM guest is granted access.
2585 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2588 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2589 unsigned long *aqm, unsigned long *adm)
2591 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2593 kvm_s390_vcpu_block_all(kvm);
2595 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2596 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2597 memcpy(crycb->apcb1.apm, apm, 32);
2598 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2599 apm[0], apm[1], apm[2], apm[3]);
2600 memcpy(crycb->apcb1.aqm, aqm, 32);
2601 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2602 aqm[0], aqm[1], aqm[2], aqm[3]);
2603 memcpy(crycb->apcb1.adm, adm, 32);
2604 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2605 adm[0], adm[1], adm[2], adm[3]);
2608 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2609 memcpy(crycb->apcb0.apm, apm, 8);
2610 memcpy(crycb->apcb0.aqm, aqm, 2);
2611 memcpy(crycb->apcb0.adm, adm, 2);
2612 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2613 apm[0], *((unsigned short *)aqm),
2614 *((unsigned short *)adm));
2616 default: /* Can not happen */
2620 /* recreate the shadow crycb for each vcpu */
2621 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2622 kvm_s390_vcpu_unblock_all(kvm);
2624 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2627 * kvm_arch_crypto_clear_masks
2629 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2632 * Clear the masks that identify the adapters, domains and control domains to
2633 * which the KVM guest is granted access.
2635 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2638 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2640 kvm_s390_vcpu_block_all(kvm);
2642 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2643 sizeof(kvm->arch.crypto.crycb->apcb0));
2644 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2645 sizeof(kvm->arch.crypto.crycb->apcb1));
2647 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2648 /* recreate the shadow crycb for each vcpu */
2649 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2650 kvm_s390_vcpu_unblock_all(kvm);
2652 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2654 static u64 kvm_s390_get_initial_cpuid(void)
2659 cpuid.version = 0xff;
2660 return *((u64 *) &cpuid);
2663 static void kvm_s390_crypto_init(struct kvm *kvm)
2665 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2666 kvm_s390_set_crycb_format(kvm);
2667 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2669 if (!test_kvm_facility(kvm, 76))
2672 /* Enable AES/DEA protected key functions by default */
2673 kvm->arch.crypto.aes_kw = 1;
2674 kvm->arch.crypto.dea_kw = 1;
2675 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2676 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2677 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2678 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2681 static void sca_dispose(struct kvm *kvm)
2683 if (kvm->arch.use_esca)
2684 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2686 free_page((unsigned long)(kvm->arch.sca));
2687 kvm->arch.sca = NULL;
2690 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2692 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2694 char debug_name[16];
2695 static unsigned long sca_offset;
2698 #ifdef CONFIG_KVM_S390_UCONTROL
2699 if (type & ~KVM_VM_S390_UCONTROL)
2701 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2708 rc = s390_enable_sie();
2714 if (!sclp.has_64bscao)
2715 alloc_flags |= GFP_DMA;
2716 rwlock_init(&kvm->arch.sca_lock);
2717 /* start with basic SCA */
2718 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2721 mutex_lock(&kvm_lock);
2723 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2725 kvm->arch.sca = (struct bsca_block *)
2726 ((char *) kvm->arch.sca + sca_offset);
2727 mutex_unlock(&kvm_lock);
2729 sprintf(debug_name, "kvm-%u", current->pid);
2731 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2735 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2736 kvm->arch.sie_page2 =
2737 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2738 if (!kvm->arch.sie_page2)
2741 kvm->arch.sie_page2->kvm = kvm;
2742 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2744 for (i = 0; i < kvm_s390_fac_size(); i++) {
2745 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2746 (kvm_s390_fac_base[i] |
2747 kvm_s390_fac_ext[i]);
2748 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2749 kvm_s390_fac_base[i];
2751 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2753 /* we are always in czam mode - even on pre z14 machines */
2754 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2755 set_kvm_facility(kvm->arch.model.fac_list, 138);
2756 /* we emulate STHYI in kvm */
2757 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2758 set_kvm_facility(kvm->arch.model.fac_list, 74);
2759 if (MACHINE_HAS_TLB_GUEST) {
2760 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2761 set_kvm_facility(kvm->arch.model.fac_list, 147);
2764 if (css_general_characteristics.aiv && test_facility(65))
2765 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2767 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2768 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2770 kvm_s390_crypto_init(kvm);
2772 mutex_init(&kvm->arch.float_int.ais_lock);
2773 spin_lock_init(&kvm->arch.float_int.lock);
2774 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2775 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2776 init_waitqueue_head(&kvm->arch.ipte_wq);
2777 mutex_init(&kvm->arch.ipte_mutex);
2779 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2780 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2782 if (type & KVM_VM_S390_UCONTROL) {
2783 kvm->arch.gmap = NULL;
2784 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2786 if (sclp.hamax == U64_MAX)
2787 kvm->arch.mem_limit = TASK_SIZE_MAX;
2789 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2791 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2792 if (!kvm->arch.gmap)
2794 kvm->arch.gmap->private = kvm;
2795 kvm->arch.gmap->pfault_enabled = 0;
2798 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2799 kvm->arch.use_skf = sclp.has_skey;
2800 spin_lock_init(&kvm->arch.start_stop_lock);
2801 kvm_s390_vsie_init(kvm);
2803 kvm_s390_gisa_init(kvm);
2804 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2808 free_page((unsigned long)kvm->arch.sie_page2);
2809 debug_unregister(kvm->arch.dbf);
2811 KVM_EVENT(3, "creation of vm failed: %d", rc);
2815 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2819 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2820 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2821 kvm_s390_clear_local_irqs(vcpu);
2822 kvm_clear_async_pf_completion_queue(vcpu);
2823 if (!kvm_is_ucontrol(vcpu->kvm))
2826 if (kvm_is_ucontrol(vcpu->kvm))
2827 gmap_remove(vcpu->arch.gmap);
2829 if (vcpu->kvm->arch.use_cmma)
2830 kvm_s390_vcpu_unsetup_cmma(vcpu);
2831 /* We can not hold the vcpu mutex here, we are already dying */
2832 if (kvm_s390_pv_cpu_get_handle(vcpu))
2833 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2834 free_page((unsigned long)(vcpu->arch.sie_block));
2837 static void kvm_free_vcpus(struct kvm *kvm)
2840 struct kvm_vcpu *vcpu;
2842 kvm_for_each_vcpu(i, vcpu, kvm)
2843 kvm_vcpu_destroy(vcpu);
2845 mutex_lock(&kvm->lock);
2846 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2847 kvm->vcpus[i] = NULL;
2849 atomic_set(&kvm->online_vcpus, 0);
2850 mutex_unlock(&kvm->lock);
2853 void kvm_arch_destroy_vm(struct kvm *kvm)
2857 kvm_free_vcpus(kvm);
2859 kvm_s390_gisa_destroy(kvm);
2861 * We are already at the end of life and kvm->lock is not taken.
2862 * This is ok as the file descriptor is closed by now and nobody
2863 * can mess with the pv state. To avoid lockdep_assert_held from
2864 * complaining we do not use kvm_s390_pv_is_protected.
2866 if (kvm_s390_pv_get_handle(kvm))
2867 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2868 debug_unregister(kvm->arch.dbf);
2869 free_page((unsigned long)kvm->arch.sie_page2);
2870 if (!kvm_is_ucontrol(kvm))
2871 gmap_remove(kvm->arch.gmap);
2872 kvm_s390_destroy_adapters(kvm);
2873 kvm_s390_clear_float_irqs(kvm);
2874 kvm_s390_vsie_destroy(kvm);
2875 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2878 /* Section: vcpu related */
2879 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2881 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2882 if (!vcpu->arch.gmap)
2884 vcpu->arch.gmap->private = vcpu->kvm;
2889 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2891 if (!kvm_s390_use_sca_entries())
2893 read_lock(&vcpu->kvm->arch.sca_lock);
2894 if (vcpu->kvm->arch.use_esca) {
2895 struct esca_block *sca = vcpu->kvm->arch.sca;
2897 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2898 sca->cpu[vcpu->vcpu_id].sda = 0;
2900 struct bsca_block *sca = vcpu->kvm->arch.sca;
2902 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2903 sca->cpu[vcpu->vcpu_id].sda = 0;
2905 read_unlock(&vcpu->kvm->arch.sca_lock);
2908 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2910 if (!kvm_s390_use_sca_entries()) {
2911 struct bsca_block *sca = vcpu->kvm->arch.sca;
2913 /* we still need the basic sca for the ipte control */
2914 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2915 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2918 read_lock(&vcpu->kvm->arch.sca_lock);
2919 if (vcpu->kvm->arch.use_esca) {
2920 struct esca_block *sca = vcpu->kvm->arch.sca;
2922 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2923 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2924 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2925 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2926 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2928 struct bsca_block *sca = vcpu->kvm->arch.sca;
2930 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2931 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2932 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2933 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2935 read_unlock(&vcpu->kvm->arch.sca_lock);
2938 /* Basic SCA to Extended SCA data copy routines */
2939 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2942 d->sigp_ctrl.c = s->sigp_ctrl.c;
2943 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2946 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2950 d->ipte_control = s->ipte_control;
2952 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2953 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2956 static int sca_switch_to_extended(struct kvm *kvm)
2958 struct bsca_block *old_sca = kvm->arch.sca;
2959 struct esca_block *new_sca;
2960 struct kvm_vcpu *vcpu;
2961 unsigned int vcpu_idx;
2964 if (kvm->arch.use_esca)
2967 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2971 scaoh = (u32)((u64)(new_sca) >> 32);
2972 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2974 kvm_s390_vcpu_block_all(kvm);
2975 write_lock(&kvm->arch.sca_lock);
2977 sca_copy_b_to_e(new_sca, old_sca);
2979 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2980 vcpu->arch.sie_block->scaoh = scaoh;
2981 vcpu->arch.sie_block->scaol = scaol;
2982 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2984 kvm->arch.sca = new_sca;
2985 kvm->arch.use_esca = 1;
2987 write_unlock(&kvm->arch.sca_lock);
2988 kvm_s390_vcpu_unblock_all(kvm);
2990 free_page((unsigned long)old_sca);
2992 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2993 old_sca, kvm->arch.sca);
2997 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3001 if (!kvm_s390_use_sca_entries()) {
3002 if (id < KVM_MAX_VCPUS)
3006 if (id < KVM_S390_BSCA_CPU_SLOTS)
3008 if (!sclp.has_esca || !sclp.has_64bscao)
3011 mutex_lock(&kvm->lock);
3012 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3013 mutex_unlock(&kvm->lock);
3015 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3018 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3019 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3021 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3022 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3023 vcpu->arch.cputm_start = get_tod_clock_fast();
3024 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3027 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3028 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3030 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3031 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3032 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3033 vcpu->arch.cputm_start = 0;
3034 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3037 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3038 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3040 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3041 vcpu->arch.cputm_enabled = true;
3042 __start_cpu_timer_accounting(vcpu);
3045 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3046 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3048 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3049 __stop_cpu_timer_accounting(vcpu);
3050 vcpu->arch.cputm_enabled = false;
3053 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3055 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3056 __enable_cpu_timer_accounting(vcpu);
3060 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3062 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3063 __disable_cpu_timer_accounting(vcpu);
3067 /* set the cpu timer - may only be called from the VCPU thread itself */
3068 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3070 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3071 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3072 if (vcpu->arch.cputm_enabled)
3073 vcpu->arch.cputm_start = get_tod_clock_fast();
3074 vcpu->arch.sie_block->cputm = cputm;
3075 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3079 /* update and get the cpu timer - can also be called from other VCPU threads */
3080 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3085 if (unlikely(!vcpu->arch.cputm_enabled))
3086 return vcpu->arch.sie_block->cputm;
3088 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3090 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3092 * If the writer would ever execute a read in the critical
3093 * section, e.g. in irq context, we have a deadlock.
3095 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3096 value = vcpu->arch.sie_block->cputm;
3097 /* if cputm_start is 0, accounting is being started/stopped */
3098 if (likely(vcpu->arch.cputm_start))
3099 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3100 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3105 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3108 gmap_enable(vcpu->arch.enabled_gmap);
3109 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3110 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3111 __start_cpu_timer_accounting(vcpu);
3115 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3118 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3119 __stop_cpu_timer_accounting(vcpu);
3120 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3121 vcpu->arch.enabled_gmap = gmap_get_enabled();
3122 gmap_disable(vcpu->arch.enabled_gmap);
3126 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3128 mutex_lock(&vcpu->kvm->lock);
3130 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3131 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3133 mutex_unlock(&vcpu->kvm->lock);
3134 if (!kvm_is_ucontrol(vcpu->kvm)) {
3135 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3138 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3139 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3140 /* make vcpu_load load the right gmap on the first trigger */
3141 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3144 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3146 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3147 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3152 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3154 /* At least one ECC subfunction must be present */
3155 return kvm_has_pckmo_subfunc(kvm, 32) ||
3156 kvm_has_pckmo_subfunc(kvm, 33) ||
3157 kvm_has_pckmo_subfunc(kvm, 34) ||
3158 kvm_has_pckmo_subfunc(kvm, 40) ||
3159 kvm_has_pckmo_subfunc(kvm, 41);
3163 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3166 * If the AP instructions are not being interpreted and the MSAX3
3167 * facility is not configured for the guest, there is nothing to set up.
3169 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3172 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3173 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3174 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3175 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3177 if (vcpu->kvm->arch.crypto.apie)
3178 vcpu->arch.sie_block->eca |= ECA_APIE;
3180 /* Set up protected key support */
3181 if (vcpu->kvm->arch.crypto.aes_kw) {
3182 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3183 /* ecc is also wrapped with AES key */
3184 if (kvm_has_pckmo_ecc(vcpu->kvm))
3185 vcpu->arch.sie_block->ecd |= ECD_ECC;
3188 if (vcpu->kvm->arch.crypto.dea_kw)
3189 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3192 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3194 free_page(vcpu->arch.sie_block->cbrlo);
3195 vcpu->arch.sie_block->cbrlo = 0;
3198 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3200 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3201 if (!vcpu->arch.sie_block->cbrlo)
3206 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3208 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3210 vcpu->arch.sie_block->ibc = model->ibc;
3211 if (test_kvm_facility(vcpu->kvm, 7))
3212 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3215 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3220 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3224 if (test_kvm_facility(vcpu->kvm, 78))
3225 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3226 else if (test_kvm_facility(vcpu->kvm, 8))
3227 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3229 kvm_s390_vcpu_setup_model(vcpu);
3231 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3232 if (MACHINE_HAS_ESOP)
3233 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3234 if (test_kvm_facility(vcpu->kvm, 9))
3235 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3236 if (test_kvm_facility(vcpu->kvm, 73))
3237 vcpu->arch.sie_block->ecb |= ECB_TE;
3238 if (!kvm_is_ucontrol(vcpu->kvm))
3239 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3241 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3242 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3243 if (test_kvm_facility(vcpu->kvm, 130))
3244 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3245 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3247 vcpu->arch.sie_block->eca |= ECA_CEI;
3249 vcpu->arch.sie_block->eca |= ECA_IB;
3251 vcpu->arch.sie_block->eca |= ECA_SII;
3252 if (sclp.has_sigpif)
3253 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3254 if (test_kvm_facility(vcpu->kvm, 129)) {
3255 vcpu->arch.sie_block->eca |= ECA_VX;
3256 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3258 if (test_kvm_facility(vcpu->kvm, 139))
3259 vcpu->arch.sie_block->ecd |= ECD_MEF;
3260 if (test_kvm_facility(vcpu->kvm, 156))
3261 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3262 if (vcpu->arch.sie_block->gd) {
3263 vcpu->arch.sie_block->eca |= ECA_AIV;
3264 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3265 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3267 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3269 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3272 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3274 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3276 if (vcpu->kvm->arch.use_cmma) {
3277 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3281 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3282 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3284 vcpu->arch.sie_block->hpid = HPID_KVM;
3286 kvm_s390_vcpu_crypto_setup(vcpu);
3288 mutex_lock(&vcpu->kvm->lock);
3289 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3290 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3292 kvm_s390_vcpu_unsetup_cmma(vcpu);
3294 mutex_unlock(&vcpu->kvm->lock);
3299 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3301 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3306 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3308 struct sie_page *sie_page;
3311 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3312 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3316 vcpu->arch.sie_block = &sie_page->sie_block;
3317 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3319 /* the real guest size will always be smaller than msl */
3320 vcpu->arch.sie_block->mso = 0;
3321 vcpu->arch.sie_block->msl = sclp.hamax;
3323 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3324 spin_lock_init(&vcpu->arch.local_int.lock);
3325 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3326 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3327 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3328 seqcount_init(&vcpu->arch.cputm_seqcount);
3330 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3331 kvm_clear_async_pf_completion_queue(vcpu);
3332 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3339 kvm_s390_set_prefix(vcpu, 0);
3340 if (test_kvm_facility(vcpu->kvm, 64))
3341 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3342 if (test_kvm_facility(vcpu->kvm, 82))
3343 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3344 if (test_kvm_facility(vcpu->kvm, 133))
3345 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3346 if (test_kvm_facility(vcpu->kvm, 156))
3347 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3348 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3349 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3352 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3354 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3356 if (kvm_is_ucontrol(vcpu->kvm)) {
3357 rc = __kvm_ucontrol_vcpu_init(vcpu);
3359 goto out_free_sie_block;
3362 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3363 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3364 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3366 rc = kvm_s390_vcpu_setup(vcpu);
3368 goto out_ucontrol_uninit;
3371 out_ucontrol_uninit:
3372 if (kvm_is_ucontrol(vcpu->kvm))
3373 gmap_remove(vcpu->arch.gmap);
3375 free_page((unsigned long)(vcpu->arch.sie_block));
3379 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3381 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3382 return kvm_s390_vcpu_has_irq(vcpu, 0);
3385 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3387 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3390 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3392 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3396 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3398 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3401 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3403 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3407 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3409 return atomic_read(&vcpu->arch.sie_block->prog20) &
3410 (PROG_BLOCK_SIE | PROG_REQUEST);
3413 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3415 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3419 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3420 * If the CPU is not running (e.g. waiting as idle) the function will
3421 * return immediately. */
3422 void exit_sie(struct kvm_vcpu *vcpu)
3424 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3425 kvm_s390_vsie_kick(vcpu);
3426 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3430 /* Kick a guest cpu out of SIE to process a request synchronously */
3431 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3433 kvm_make_request(req, vcpu);
3434 kvm_s390_vcpu_request(vcpu);
3437 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3440 struct kvm *kvm = gmap->private;
3441 struct kvm_vcpu *vcpu;
3442 unsigned long prefix;
3445 if (gmap_is_shadow(gmap))
3447 if (start >= 1UL << 31)
3448 /* We are only interested in prefix pages */
3450 kvm_for_each_vcpu(i, vcpu, kvm) {
3451 /* match against both prefix pages */
3452 prefix = kvm_s390_get_prefix(vcpu);
3453 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3454 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3456 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3461 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3463 /* do not poll with more than halt_poll_max_steal percent of steal time */
3464 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3465 READ_ONCE(halt_poll_max_steal)) {
3466 vcpu->stat.halt_no_poll_steal++;
3472 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3474 /* kvm common code refers to this, but never calls it */
3479 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3480 struct kvm_one_reg *reg)
3485 case KVM_REG_S390_TODPR:
3486 r = put_user(vcpu->arch.sie_block->todpr,
3487 (u32 __user *)reg->addr);
3489 case KVM_REG_S390_EPOCHDIFF:
3490 r = put_user(vcpu->arch.sie_block->epoch,
3491 (u64 __user *)reg->addr);
3493 case KVM_REG_S390_CPU_TIMER:
3494 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3495 (u64 __user *)reg->addr);
3497 case KVM_REG_S390_CLOCK_COMP:
3498 r = put_user(vcpu->arch.sie_block->ckc,
3499 (u64 __user *)reg->addr);
3501 case KVM_REG_S390_PFTOKEN:
3502 r = put_user(vcpu->arch.pfault_token,
3503 (u64 __user *)reg->addr);
3505 case KVM_REG_S390_PFCOMPARE:
3506 r = put_user(vcpu->arch.pfault_compare,
3507 (u64 __user *)reg->addr);
3509 case KVM_REG_S390_PFSELECT:
3510 r = put_user(vcpu->arch.pfault_select,
3511 (u64 __user *)reg->addr);
3513 case KVM_REG_S390_PP:
3514 r = put_user(vcpu->arch.sie_block->pp,
3515 (u64 __user *)reg->addr);
3517 case KVM_REG_S390_GBEA:
3518 r = put_user(vcpu->arch.sie_block->gbea,
3519 (u64 __user *)reg->addr);
3528 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3529 struct kvm_one_reg *reg)
3535 case KVM_REG_S390_TODPR:
3536 r = get_user(vcpu->arch.sie_block->todpr,
3537 (u32 __user *)reg->addr);
3539 case KVM_REG_S390_EPOCHDIFF:
3540 r = get_user(vcpu->arch.sie_block->epoch,
3541 (u64 __user *)reg->addr);
3543 case KVM_REG_S390_CPU_TIMER:
3544 r = get_user(val, (u64 __user *)reg->addr);
3546 kvm_s390_set_cpu_timer(vcpu, val);
3548 case KVM_REG_S390_CLOCK_COMP:
3549 r = get_user(vcpu->arch.sie_block->ckc,
3550 (u64 __user *)reg->addr);
3552 case KVM_REG_S390_PFTOKEN:
3553 r = get_user(vcpu->arch.pfault_token,
3554 (u64 __user *)reg->addr);
3555 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3556 kvm_clear_async_pf_completion_queue(vcpu);
3558 case KVM_REG_S390_PFCOMPARE:
3559 r = get_user(vcpu->arch.pfault_compare,
3560 (u64 __user *)reg->addr);
3562 case KVM_REG_S390_PFSELECT:
3563 r = get_user(vcpu->arch.pfault_select,
3564 (u64 __user *)reg->addr);
3566 case KVM_REG_S390_PP:
3567 r = get_user(vcpu->arch.sie_block->pp,
3568 (u64 __user *)reg->addr);
3570 case KVM_REG_S390_GBEA:
3571 r = get_user(vcpu->arch.sie_block->gbea,
3572 (u64 __user *)reg->addr);
3581 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3583 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3584 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3585 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3587 kvm_clear_async_pf_completion_queue(vcpu);
3588 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3589 kvm_s390_vcpu_stop(vcpu);
3590 kvm_s390_clear_local_irqs(vcpu);
3593 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3595 /* Initial reset is a superset of the normal reset */
3596 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3599 * This equals initial cpu reset in pop, but we don't switch to ESA.
3600 * We do not only reset the internal data, but also ...
3602 vcpu->arch.sie_block->gpsw.mask = 0;
3603 vcpu->arch.sie_block->gpsw.addr = 0;
3604 kvm_s390_set_prefix(vcpu, 0);
3605 kvm_s390_set_cpu_timer(vcpu, 0);
3606 vcpu->arch.sie_block->ckc = 0;
3607 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3608 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3609 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3611 /* ... the data in sync regs */
3612 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3613 vcpu->run->s.regs.ckc = 0;
3614 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3615 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3616 vcpu->run->psw_addr = 0;
3617 vcpu->run->psw_mask = 0;
3618 vcpu->run->s.regs.todpr = 0;
3619 vcpu->run->s.regs.cputm = 0;
3620 vcpu->run->s.regs.ckc = 0;
3621 vcpu->run->s.regs.pp = 0;
3622 vcpu->run->s.regs.gbea = 1;
3623 vcpu->run->s.regs.fpc = 0;
3625 * Do not reset these registers in the protected case, as some of
3626 * them are overlayed and they are not accessible in this case
3629 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3630 vcpu->arch.sie_block->gbea = 1;
3631 vcpu->arch.sie_block->pp = 0;
3632 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3633 vcpu->arch.sie_block->todpr = 0;
3637 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3639 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3641 /* Clear reset is a superset of the initial reset */
3642 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3644 memset(®s->gprs, 0, sizeof(regs->gprs));
3645 memset(®s->vrs, 0, sizeof(regs->vrs));
3646 memset(®s->acrs, 0, sizeof(regs->acrs));
3647 memset(®s->gscb, 0, sizeof(regs->gscb));
3650 regs->etoken_extension = 0;
3653 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3656 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3661 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3664 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3669 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3670 struct kvm_sregs *sregs)
3674 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3675 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3681 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3682 struct kvm_sregs *sregs)
3686 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3687 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3693 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3699 if (test_fp_ctl(fpu->fpc)) {
3703 vcpu->run->s.regs.fpc = fpu->fpc;
3705 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3706 (freg_t *) fpu->fprs);
3708 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3715 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3719 /* make sure we have the latest values */
3722 convert_vx_to_fp((freg_t *) fpu->fprs,
3723 (__vector128 *) vcpu->run->s.regs.vrs);
3725 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3726 fpu->fpc = vcpu->run->s.regs.fpc;
3732 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3736 if (!is_vcpu_stopped(vcpu))
3739 vcpu->run->psw_mask = psw.mask;
3740 vcpu->run->psw_addr = psw.addr;
3745 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3746 struct kvm_translation *tr)
3748 return -EINVAL; /* not implemented yet */
3751 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3752 KVM_GUESTDBG_USE_HW_BP | \
3753 KVM_GUESTDBG_ENABLE)
3755 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3756 struct kvm_guest_debug *dbg)
3762 vcpu->guest_debug = 0;
3763 kvm_s390_clear_bp_data(vcpu);
3765 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3769 if (!sclp.has_gpere) {
3774 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3775 vcpu->guest_debug = dbg->control;
3776 /* enforce guest PER */
3777 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3779 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3780 rc = kvm_s390_import_bp_data(vcpu, dbg);
3782 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3783 vcpu->arch.guestdbg.last_bp = 0;
3787 vcpu->guest_debug = 0;
3788 kvm_s390_clear_bp_data(vcpu);
3789 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3797 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3798 struct kvm_mp_state *mp_state)
3804 /* CHECK_STOP and LOAD are not supported yet */
3805 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3806 KVM_MP_STATE_OPERATING;
3812 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3813 struct kvm_mp_state *mp_state)
3819 /* user space knows about this interface - let it control the state */
3820 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3822 switch (mp_state->mp_state) {
3823 case KVM_MP_STATE_STOPPED:
3824 rc = kvm_s390_vcpu_stop(vcpu);
3826 case KVM_MP_STATE_OPERATING:
3827 rc = kvm_s390_vcpu_start(vcpu);
3829 case KVM_MP_STATE_LOAD:
3830 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3834 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3836 case KVM_MP_STATE_CHECK_STOP:
3837 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3846 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3848 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3851 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3854 kvm_s390_vcpu_request_handled(vcpu);
3855 if (!kvm_request_pending(vcpu))
3858 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3859 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3860 * This ensures that the ipte instruction for this request has
3861 * already finished. We might race against a second unmapper that
3862 * wants to set the blocking bit. Lets just retry the request loop.
3864 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3866 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3867 kvm_s390_get_prefix(vcpu),
3868 PAGE_SIZE * 2, PROT_WRITE);
3870 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3876 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3877 vcpu->arch.sie_block->ihcpu = 0xffff;
3881 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3882 if (!ibs_enabled(vcpu)) {
3883 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3884 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3889 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3890 if (ibs_enabled(vcpu)) {
3891 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3892 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3897 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3898 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3902 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3904 * Disable CMM virtualization; we will emulate the ESSA
3905 * instruction manually, in order to provide additional
3906 * functionalities needed for live migration.
3908 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3912 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3914 * Re-enable CMM virtualization if CMMA is available and
3915 * CMM has been used.
3917 if ((vcpu->kvm->arch.use_cmma) &&
3918 (vcpu->kvm->mm->context.uses_cmm))
3919 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3923 /* nothing to do, just clear the request */
3924 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3925 /* we left the vsie handler, nothing to do, just clear the request */
3926 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3931 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3933 struct kvm_vcpu *vcpu;
3934 union tod_clock clk;
3939 store_tod_clock_ext(&clk);
3941 kvm->arch.epoch = gtod->tod - clk.tod;
3943 if (test_kvm_facility(kvm, 139)) {
3944 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3945 if (kvm->arch.epoch > gtod->tod)
3946 kvm->arch.epdx -= 1;
3949 kvm_s390_vcpu_block_all(kvm);
3950 kvm_for_each_vcpu(i, vcpu, kvm) {
3951 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3952 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3955 kvm_s390_vcpu_unblock_all(kvm);
3959 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3961 if (!mutex_trylock(&kvm->lock))
3963 __kvm_s390_set_tod_clock(kvm, gtod);
3964 mutex_unlock(&kvm->lock);
3969 * kvm_arch_fault_in_page - fault-in guest page if necessary
3970 * @vcpu: The corresponding virtual cpu
3971 * @gpa: Guest physical address
3972 * @writable: Whether the page should be writable or not
3974 * Make sure that a guest page has been faulted-in on the host.
3976 * Return: Zero on success, negative error code otherwise.
3978 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3980 return gmap_fault(vcpu->arch.gmap, gpa,
3981 writable ? FAULT_FLAG_WRITE : 0);
3984 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3985 unsigned long token)
3987 struct kvm_s390_interrupt inti;
3988 struct kvm_s390_irq irq;
3991 irq.u.ext.ext_params2 = token;
3992 irq.type = KVM_S390_INT_PFAULT_INIT;
3993 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3995 inti.type = KVM_S390_INT_PFAULT_DONE;
3996 inti.parm64 = token;
3997 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4001 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4002 struct kvm_async_pf *work)
4004 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4005 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4010 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4011 struct kvm_async_pf *work)
4013 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4014 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4017 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4018 struct kvm_async_pf *work)
4020 /* s390 will always inject the page directly */
4023 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4026 * s390 will always inject the page directly,
4027 * but we still want check_async_completion to cleanup
4032 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4035 struct kvm_arch_async_pf arch;
4037 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4039 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4040 vcpu->arch.pfault_compare)
4042 if (psw_extint_disabled(vcpu))
4044 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4046 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4048 if (!vcpu->arch.gmap->pfault_enabled)
4051 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4052 hva += current->thread.gmap_addr & ~PAGE_MASK;
4053 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4056 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4059 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4064 * On s390 notifications for arriving pages will be delivered directly
4065 * to the guest but the house keeping for completed pfaults is
4066 * handled outside the worker.
4068 kvm_check_async_pf_completion(vcpu);
4070 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4071 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4076 if (!kvm_is_ucontrol(vcpu->kvm)) {
4077 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4082 rc = kvm_s390_handle_requests(vcpu);
4086 if (guestdbg_enabled(vcpu)) {
4087 kvm_s390_backup_guest_per_regs(vcpu);
4088 kvm_s390_patch_guest_per_regs(vcpu);
4091 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4093 vcpu->arch.sie_block->icptcode = 0;
4094 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4095 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4096 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4101 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4103 struct kvm_s390_pgm_info pgm_info = {
4104 .code = PGM_ADDRESSING,
4109 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4110 trace_kvm_s390_sie_fault(vcpu);
4113 * We want to inject an addressing exception, which is defined as a
4114 * suppressing or terminating exception. However, since we came here
4115 * by a DAT access exception, the PSW still points to the faulting
4116 * instruction since DAT exceptions are nullifying. So we've got
4117 * to look up the current opcode to get the length of the instruction
4118 * to be able to forward the PSW.
4120 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4121 ilen = insn_length(opcode);
4125 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4126 * Forward by arbitrary ilc, injection will take care of
4127 * nullification if necessary.
4129 pgm_info = vcpu->arch.pgm;
4132 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4133 kvm_s390_forward_psw(vcpu, ilen);
4134 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4137 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4139 struct mcck_volatile_info *mcck_info;
4140 struct sie_page *sie_page;
4142 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4143 vcpu->arch.sie_block->icptcode);
4144 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4146 if (guestdbg_enabled(vcpu))
4147 kvm_s390_restore_guest_per_regs(vcpu);
4149 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4150 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4152 if (exit_reason == -EINTR) {
4153 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4154 sie_page = container_of(vcpu->arch.sie_block,
4155 struct sie_page, sie_block);
4156 mcck_info = &sie_page->mcck_info;
4157 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4161 if (vcpu->arch.sie_block->icptcode > 0) {
4162 int rc = kvm_handle_sie_intercept(vcpu);
4164 if (rc != -EOPNOTSUPP)
4166 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4167 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4168 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4169 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4171 } else if (exit_reason != -EFAULT) {
4172 vcpu->stat.exit_null++;
4174 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4175 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4176 vcpu->run->s390_ucontrol.trans_exc_code =
4177 current->thread.gmap_addr;
4178 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4180 } else if (current->thread.gmap_pfault) {
4181 trace_kvm_s390_major_guest_pfault(vcpu);
4182 current->thread.gmap_pfault = 0;
4183 if (kvm_arch_setup_async_pf(vcpu))
4185 vcpu->stat.pfault_sync++;
4186 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4188 return vcpu_post_run_fault_in_sie(vcpu);
4191 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4192 static int __vcpu_run(struct kvm_vcpu *vcpu)
4194 int rc, exit_reason;
4195 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4198 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4199 * ning the guest), so that memslots (and other stuff) are protected
4201 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4204 rc = vcpu_pre_run(vcpu);
4208 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4210 * As PF_VCPU will be used in fault handler, between
4211 * guest_enter and guest_exit should be no uaccess.
4213 local_irq_disable();
4214 guest_enter_irqoff();
4215 __disable_cpu_timer_accounting(vcpu);
4217 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4218 memcpy(sie_page->pv_grregs,
4219 vcpu->run->s.regs.gprs,
4220 sizeof(sie_page->pv_grregs));
4222 if (test_cpu_flag(CIF_FPU))
4224 exit_reason = sie64a(vcpu->arch.sie_block,
4225 vcpu->run->s.regs.gprs);
4226 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4227 memcpy(vcpu->run->s.regs.gprs,
4228 sie_page->pv_grregs,
4229 sizeof(sie_page->pv_grregs));
4231 * We're not allowed to inject interrupts on intercepts
4232 * that leave the guest state in an "in-between" state
4233 * where the next SIE entry will do a continuation.
4234 * Fence interrupts in our "internal" PSW.
4236 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4237 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4238 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4241 local_irq_disable();
4242 __enable_cpu_timer_accounting(vcpu);
4243 guest_exit_irqoff();
4245 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4247 rc = vcpu_post_run(vcpu, exit_reason);
4248 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4250 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4254 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4256 struct kvm_run *kvm_run = vcpu->run;
4257 struct runtime_instr_cb *riccb;
4260 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4261 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4262 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4263 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4264 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4265 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4266 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4267 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4270 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4271 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4272 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4273 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4274 kvm_clear_async_pf_completion_queue(vcpu);
4276 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4277 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4278 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4281 * If userspace sets the riccb (e.g. after migration) to a valid state,
4282 * we should enable RI here instead of doing the lazy enablement.
4284 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4285 test_kvm_facility(vcpu->kvm, 64) &&
4287 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4288 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4289 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4292 * If userspace sets the gscb (e.g. after migration) to non-zero,
4293 * we should enable GS here instead of doing the lazy enablement.
4295 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4296 test_kvm_facility(vcpu->kvm, 133) &&
4298 !vcpu->arch.gs_enabled) {
4299 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4300 vcpu->arch.sie_block->ecb |= ECB_GS;
4301 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4302 vcpu->arch.gs_enabled = 1;
4304 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4305 test_kvm_facility(vcpu->kvm, 82)) {
4306 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4307 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4309 if (MACHINE_HAS_GS) {
4311 __ctl_set_bit(2, 4);
4312 if (current->thread.gs_cb) {
4313 vcpu->arch.host_gscb = current->thread.gs_cb;
4314 save_gs_cb(vcpu->arch.host_gscb);
4316 if (vcpu->arch.gs_enabled) {
4317 current->thread.gs_cb = (struct gs_cb *)
4318 &vcpu->run->s.regs.gscb;
4319 restore_gs_cb(current->thread.gs_cb);
4323 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4326 static void sync_regs(struct kvm_vcpu *vcpu)
4328 struct kvm_run *kvm_run = vcpu->run;
4330 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4331 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4332 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4333 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4334 /* some control register changes require a tlb flush */
4335 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4337 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4338 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4339 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4341 save_access_regs(vcpu->arch.host_acrs);
4342 restore_access_regs(vcpu->run->s.regs.acrs);
4343 /* save host (userspace) fprs/vrs */
4345 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4346 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4348 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4350 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4351 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4352 if (test_fp_ctl(current->thread.fpu.fpc))
4353 /* User space provided an invalid FPC, let's clear it */
4354 current->thread.fpu.fpc = 0;
4356 /* Sync fmt2 only data */
4357 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4358 sync_regs_fmt2(vcpu);
4361 * In several places we have to modify our internal view to
4362 * not do things that are disallowed by the ultravisor. For
4363 * example we must not inject interrupts after specific exits
4364 * (e.g. 112 prefix page not secure). We do this by turning
4365 * off the machine check, external and I/O interrupt bits
4366 * of our PSW copy. To avoid getting validity intercepts, we
4367 * do only accept the condition code from userspace.
4369 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4370 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4374 kvm_run->kvm_dirty_regs = 0;
4377 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4379 struct kvm_run *kvm_run = vcpu->run;
4381 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4382 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4383 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4384 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4385 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4386 if (MACHINE_HAS_GS) {
4388 __ctl_set_bit(2, 4);
4389 if (vcpu->arch.gs_enabled)
4390 save_gs_cb(current->thread.gs_cb);
4391 current->thread.gs_cb = vcpu->arch.host_gscb;
4392 restore_gs_cb(vcpu->arch.host_gscb);
4393 if (!vcpu->arch.host_gscb)
4394 __ctl_clear_bit(2, 4);
4395 vcpu->arch.host_gscb = NULL;
4398 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4401 static void store_regs(struct kvm_vcpu *vcpu)
4403 struct kvm_run *kvm_run = vcpu->run;
4405 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4406 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4407 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4408 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4409 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4410 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4411 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4412 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4413 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4414 save_access_regs(vcpu->run->s.regs.acrs);
4415 restore_access_regs(vcpu->arch.host_acrs);
4416 /* Save guest register state */
4418 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4419 /* Restore will be done lazily at return */
4420 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4421 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4422 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4423 store_regs_fmt2(vcpu);
4426 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4428 struct kvm_run *kvm_run = vcpu->run;
4431 if (kvm_run->immediate_exit)
4434 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4435 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4440 if (guestdbg_exit_pending(vcpu)) {
4441 kvm_s390_prepare_debug_exit(vcpu);
4446 kvm_sigset_activate(vcpu);
4449 * no need to check the return value of vcpu_start as it can only have
4450 * an error for protvirt, but protvirt means user cpu state
4452 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4453 kvm_s390_vcpu_start(vcpu);
4454 } else if (is_vcpu_stopped(vcpu)) {
4455 pr_err_ratelimited("can't run stopped vcpu %d\n",
4462 enable_cpu_timer_accounting(vcpu);
4465 rc = __vcpu_run(vcpu);
4467 if (signal_pending(current) && !rc) {
4468 kvm_run->exit_reason = KVM_EXIT_INTR;
4472 if (guestdbg_exit_pending(vcpu) && !rc) {
4473 kvm_s390_prepare_debug_exit(vcpu);
4477 if (rc == -EREMOTE) {
4478 /* userspace support is needed, kvm_run has been prepared */
4482 disable_cpu_timer_accounting(vcpu);
4485 kvm_sigset_deactivate(vcpu);
4487 vcpu->stat.exit_userspace++;
4494 * store status at address
4495 * we use have two special cases:
4496 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4497 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4499 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4501 unsigned char archmode = 1;
4502 freg_t fprs[NUM_FPRS];
4507 px = kvm_s390_get_prefix(vcpu);
4508 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4509 if (write_guest_abs(vcpu, 163, &archmode, 1))
4512 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4513 if (write_guest_real(vcpu, 163, &archmode, 1))
4517 gpa -= __LC_FPREGS_SAVE_AREA;
4519 /* manually convert vector registers if necessary */
4520 if (MACHINE_HAS_VX) {
4521 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4522 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4525 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4526 vcpu->run->s.regs.fprs, 128);
4528 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4529 vcpu->run->s.regs.gprs, 128);
4530 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4531 &vcpu->arch.sie_block->gpsw, 16);
4532 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4534 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4535 &vcpu->run->s.regs.fpc, 4);
4536 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4537 &vcpu->arch.sie_block->todpr, 4);
4538 cputm = kvm_s390_get_cpu_timer(vcpu);
4539 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4541 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4542 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4544 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4545 &vcpu->run->s.regs.acrs, 64);
4546 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4547 &vcpu->arch.sie_block->gcr, 128);
4548 return rc ? -EFAULT : 0;
4551 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4554 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4555 * switch in the run ioctl. Let's update our copies before we save
4556 * it into the save area
4559 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4560 save_access_regs(vcpu->run->s.regs.acrs);
4562 return kvm_s390_store_status_unloaded(vcpu, addr);
4565 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4567 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4568 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4571 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4574 struct kvm_vcpu *vcpu;
4576 kvm_for_each_vcpu(i, vcpu, kvm) {
4577 __disable_ibs_on_vcpu(vcpu);
4581 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4585 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4586 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4589 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4591 int i, online_vcpus, r = 0, started_vcpus = 0;
4593 if (!is_vcpu_stopped(vcpu))
4596 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4597 /* Only one cpu at a time may enter/leave the STOPPED state. */
4598 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4599 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4601 /* Let's tell the UV that we want to change into the operating state */
4602 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4603 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4605 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4610 for (i = 0; i < online_vcpus; i++) {
4611 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4615 if (started_vcpus == 0) {
4616 /* we're the only active VCPU -> speed it up */
4617 __enable_ibs_on_vcpu(vcpu);
4618 } else if (started_vcpus == 1) {
4620 * As we are starting a second VCPU, we have to disable
4621 * the IBS facility on all VCPUs to remove potentially
4622 * outstanding ENABLE requests.
4624 __disable_ibs_on_all_vcpus(vcpu->kvm);
4627 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4629 * The real PSW might have changed due to a RESTART interpreted by the
4630 * ultravisor. We block all interrupts and let the next sie exit
4633 if (kvm_s390_pv_cpu_is_protected(vcpu))
4634 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4636 * Another VCPU might have used IBS while we were offline.
4637 * Let's play safe and flush the VCPU at startup.
4639 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4640 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4646 int i, online_vcpus, r = 0, started_vcpus = 0;
4647 struct kvm_vcpu *started_vcpu = NULL;
4649 if (is_vcpu_stopped(vcpu))
4652 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4653 /* Only one cpu at a time may enter/leave the STOPPED state. */
4654 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4655 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4657 /* Let's tell the UV that we want to change into the stopped state */
4658 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4659 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4661 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4667 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4668 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4669 * have been fully processed. This will ensure that the VCPU
4670 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4672 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4673 kvm_s390_clear_stop_irq(vcpu);
4675 __disable_ibs_on_vcpu(vcpu);
4677 for (i = 0; i < online_vcpus; i++) {
4678 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4680 started_vcpu = vcpu->kvm->vcpus[i];
4684 if (started_vcpus == 1) {
4686 * As we only have one VCPU left, we want to enable the
4687 * IBS facility for that VCPU to speed it up.
4689 __enable_ibs_on_vcpu(started_vcpu);
4692 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4696 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4697 struct kvm_enable_cap *cap)
4705 case KVM_CAP_S390_CSS_SUPPORT:
4706 if (!vcpu->kvm->arch.css_support) {
4707 vcpu->kvm->arch.css_support = 1;
4708 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4709 trace_kvm_s390_enable_css(vcpu->kvm);
4720 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4721 struct kvm_s390_mem_op *mop)
4723 void __user *uaddr = (void __user *)mop->buf;
4726 if (mop->flags || !mop->size)
4728 if (mop->size + mop->sida_offset < mop->size)
4730 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4732 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4736 case KVM_S390_MEMOP_SIDA_READ:
4737 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4738 mop->sida_offset), mop->size))
4742 case KVM_S390_MEMOP_SIDA_WRITE:
4743 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4744 mop->sida_offset), uaddr, mop->size))
4750 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4751 struct kvm_s390_mem_op *mop)
4753 void __user *uaddr = (void __user *)mop->buf;
4754 void *tmpbuf = NULL;
4756 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4757 | KVM_S390_MEMOP_F_CHECK_ONLY;
4759 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4762 if (mop->size > MEM_OP_MAX_SIZE)
4765 if (kvm_s390_pv_cpu_is_protected(vcpu))
4768 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4769 tmpbuf = vmalloc(mop->size);
4775 case KVM_S390_MEMOP_LOGICAL_READ:
4776 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4777 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4778 mop->size, GACC_FETCH);
4781 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4783 if (copy_to_user(uaddr, tmpbuf, mop->size))
4787 case KVM_S390_MEMOP_LOGICAL_WRITE:
4788 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4789 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4790 mop->size, GACC_STORE);
4793 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4797 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4801 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4802 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4808 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4809 struct kvm_s390_mem_op *mop)
4813 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4816 case KVM_S390_MEMOP_LOGICAL_READ:
4817 case KVM_S390_MEMOP_LOGICAL_WRITE:
4818 r = kvm_s390_guest_mem_op(vcpu, mop);
4820 case KVM_S390_MEMOP_SIDA_READ:
4821 case KVM_S390_MEMOP_SIDA_WRITE:
4822 /* we are locked against sida going away by the vcpu->mutex */
4823 r = kvm_s390_guest_sida_op(vcpu, mop);
4829 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4833 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4834 unsigned int ioctl, unsigned long arg)
4836 struct kvm_vcpu *vcpu = filp->private_data;
4837 void __user *argp = (void __user *)arg;
4840 case KVM_S390_IRQ: {
4841 struct kvm_s390_irq s390irq;
4843 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4845 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4847 case KVM_S390_INTERRUPT: {
4848 struct kvm_s390_interrupt s390int;
4849 struct kvm_s390_irq s390irq = {};
4851 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4853 if (s390int_to_s390irq(&s390int, &s390irq))
4855 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4858 return -ENOIOCTLCMD;
4861 long kvm_arch_vcpu_ioctl(struct file *filp,
4862 unsigned int ioctl, unsigned long arg)
4864 struct kvm_vcpu *vcpu = filp->private_data;
4865 void __user *argp = (void __user *)arg;
4873 case KVM_S390_STORE_STATUS:
4874 idx = srcu_read_lock(&vcpu->kvm->srcu);
4875 r = kvm_s390_store_status_unloaded(vcpu, arg);
4876 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4878 case KVM_S390_SET_INITIAL_PSW: {
4882 if (copy_from_user(&psw, argp, sizeof(psw)))
4884 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4887 case KVM_S390_CLEAR_RESET:
4889 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4890 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4891 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4892 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4893 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4897 case KVM_S390_INITIAL_RESET:
4899 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4900 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4901 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4902 UVC_CMD_CPU_RESET_INITIAL,
4904 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4908 case KVM_S390_NORMAL_RESET:
4910 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4911 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4912 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4913 UVC_CMD_CPU_RESET, &rc, &rrc);
4914 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4918 case KVM_SET_ONE_REG:
4919 case KVM_GET_ONE_REG: {
4920 struct kvm_one_reg reg;
4922 if (kvm_s390_pv_cpu_is_protected(vcpu))
4925 if (copy_from_user(®, argp, sizeof(reg)))
4927 if (ioctl == KVM_SET_ONE_REG)
4928 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4930 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4933 #ifdef CONFIG_KVM_S390_UCONTROL
4934 case KVM_S390_UCAS_MAP: {
4935 struct kvm_s390_ucas_mapping ucasmap;
4937 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4942 if (!kvm_is_ucontrol(vcpu->kvm)) {
4947 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4948 ucasmap.vcpu_addr, ucasmap.length);
4951 case KVM_S390_UCAS_UNMAP: {
4952 struct kvm_s390_ucas_mapping ucasmap;
4954 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4959 if (!kvm_is_ucontrol(vcpu->kvm)) {
4964 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4969 case KVM_S390_VCPU_FAULT: {
4970 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4973 case KVM_ENABLE_CAP:
4975 struct kvm_enable_cap cap;
4977 if (copy_from_user(&cap, argp, sizeof(cap)))
4979 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4982 case KVM_S390_MEM_OP: {
4983 struct kvm_s390_mem_op mem_op;
4985 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4986 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4991 case KVM_S390_SET_IRQ_STATE: {
4992 struct kvm_s390_irq_state irq_state;
4995 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4997 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4998 irq_state.len == 0 ||
4999 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5003 /* do not use irq_state.flags, it will break old QEMUs */
5004 r = kvm_s390_set_irq_state(vcpu,
5005 (void __user *) irq_state.buf,
5009 case KVM_S390_GET_IRQ_STATE: {
5010 struct kvm_s390_irq_state irq_state;
5013 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5015 if (irq_state.len == 0) {
5019 /* do not use irq_state.flags, it will break old QEMUs */
5020 r = kvm_s390_get_irq_state(vcpu,
5021 (__u8 __user *) irq_state.buf,
5033 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5035 #ifdef CONFIG_KVM_S390_UCONTROL
5036 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5037 && (kvm_is_ucontrol(vcpu->kvm))) {
5038 vmf->page = virt_to_page(vcpu->arch.sie_block);
5039 get_page(vmf->page);
5043 return VM_FAULT_SIGBUS;
5046 /* Section: memory related */
5047 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5048 struct kvm_memory_slot *memslot,
5049 const struct kvm_userspace_memory_region *mem,
5050 enum kvm_mr_change change)
5052 /* A few sanity checks. We can have memory slots which have to be
5053 located/ended at a segment boundary (1MB). The memory in userland is
5054 ok to be fragmented into various different vmas. It is okay to mmap()
5055 and munmap() stuff in this slot after doing this call at any time */
5057 if (mem->userspace_addr & 0xffffful)
5060 if (mem->memory_size & 0xffffful)
5063 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5066 /* When we are protected, we should not change the memory slots */
5067 if (kvm_s390_pv_get_handle(kvm))
5072 void kvm_arch_commit_memory_region(struct kvm *kvm,
5073 const struct kvm_userspace_memory_region *mem,
5074 struct kvm_memory_slot *old,
5075 const struct kvm_memory_slot *new,
5076 enum kvm_mr_change change)
5082 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5083 old->npages * PAGE_SIZE);
5086 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5087 old->npages * PAGE_SIZE);
5092 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5093 mem->guest_phys_addr, mem->memory_size);
5095 case KVM_MR_FLAGS_ONLY:
5098 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5101 pr_warn("failed to commit memory region\n");
5105 static inline unsigned long nonhyp_mask(int i)
5107 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5109 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5112 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5114 vcpu->valid_wakeup = false;
5117 static int __init kvm_s390_init(void)
5121 if (!sclp.has_sief2) {
5122 pr_info("SIE is not available\n");
5126 if (nested && hpage) {
5127 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5131 for (i = 0; i < 16; i++)
5132 kvm_s390_fac_base[i] |=
5133 stfle_fac_list[i] & nonhyp_mask(i);
5135 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5138 static void __exit kvm_s390_exit(void)
5143 module_init(kvm_s390_init);
5144 module_exit(kvm_s390_exit);
5147 * Enable autoloading of the kvm module.
5148 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5149 * since x86 takes a different approach.
5151 #include <linux/miscdevice.h>
5152 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5153 MODULE_ALIAS("devname:kvm");