2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
49 #define KMSG_COMPONENT "kvm-s390"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
133 struct kvm_s390_tod_clock_ext {
139 /* allow nested virtualization in KVM (if enabled by user space) */
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
147 unsigned long kvm_s390_fac_list_mask_size(void)
149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 return ARRAY_SIZE(kvm_s390_fac_list_mask);
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
165 /* every s390 is virtualization enabled ;-) */
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
173 * This callback is executed during stop_machine(). All CPUs are therefore
174 * temporarily stopped. In order not to change guest behavior, we have to
175 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176 * so a CPU won't be stopped while calculating with the epoch.
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
182 struct kvm_vcpu *vcpu;
184 unsigned long long *delta = v;
186 list_for_each_entry(kvm, &vm_list, vm_list) {
187 kvm->arch.epoch -= *delta;
188 kvm_for_each_vcpu(i, vcpu, kvm) {
189 vcpu->arch.sie_block->epoch -= *delta;
190 if (vcpu->arch.cputm_enabled)
191 vcpu->arch.cputm_start += *delta;
192 if (vcpu->arch.vsie_block)
193 vcpu->arch.vsie_block->epoch -= *delta;
199 static struct notifier_block kvm_clock_notifier = {
200 .notifier_call = kvm_clock_sync,
203 int kvm_arch_hardware_setup(void)
205 gmap_notifier.notifier_call = kvm_gmap_notifier;
206 gmap_register_pte_notifier(&gmap_notifier);
207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 gmap_register_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
214 void kvm_arch_hardware_unsetup(void)
216 gmap_unregister_pte_notifier(&gmap_notifier);
217 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 &kvm_clock_notifier);
222 static void allow_cpu_feat(unsigned long nr)
224 set_bit_inv(nr, kvm_s390_available_cpu_feat);
227 static inline int plo_test_bit(unsigned char nr)
229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
233 /* Parameter registers are ignored for "test bit" */
243 static void kvm_s390_cpu_feat_init(void)
247 for (i = 0; i < 256; ++i) {
249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
252 if (test_facility(28)) /* TOD-clock steering */
253 ptff(kvm_s390_available_subfunc.ptff,
254 sizeof(kvm_s390_available_subfunc.ptff),
257 if (test_facility(17)) { /* MSA */
258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kmac);
260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.kmc);
262 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 kvm_s390_available_subfunc.km);
264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kimd);
266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.klmd);
269 if (test_facility(76)) /* MSA3 */
270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pckmo);
272 if (test_facility(77)) { /* MSA4 */
273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.kmctr);
275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 kvm_s390_available_subfunc.kmf);
277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.kmo);
279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 kvm_s390_available_subfunc.pcc);
282 if (test_facility(57)) /* MSA5 */
283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 kvm_s390_available_subfunc.ppno);
286 if (test_facility(146)) /* MSA8 */
287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 kvm_s390_available_subfunc.kma);
290 if (MACHINE_HAS_ESOP)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 !test_facility(3) || !nested)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 if (sclp.has_64bscao)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 * all skey handling functions read/set the skey from the PGSTE
319 * instead of the real storage key.
321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 * pages being detected as preserved although they are resident.
324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 * cannot easily shadow the SCA because of the ipte lock.
336 int kvm_arch_init(void *opaque)
338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 debug_unregister(kvm_s390_dbf);
347 kvm_s390_cpu_feat_init();
349 /* Register floating interrupt controller interface. */
350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
353 void kvm_arch_exit(void)
355 debug_unregister(kvm_s390_dbf);
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 unsigned int ioctl, unsigned long arg)
362 if (ioctl == KVM_S390_ENABLE_SIE)
363 return s390_enable_sie();
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
372 case KVM_CAP_S390_PSW:
373 case KVM_CAP_S390_GMAP:
374 case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 case KVM_CAP_S390_UCONTROL:
378 case KVM_CAP_ASYNC_PF:
379 case KVM_CAP_SYNC_REGS:
380 case KVM_CAP_ONE_REG:
381 case KVM_CAP_ENABLE_CAP:
382 case KVM_CAP_S390_CSS_SUPPORT:
383 case KVM_CAP_IOEVENTFD:
384 case KVM_CAP_DEVICE_CTRL:
385 case KVM_CAP_ENABLE_CAP_VM:
386 case KVM_CAP_S390_IRQCHIP:
387 case KVM_CAP_VM_ATTRIBUTES:
388 case KVM_CAP_MP_STATE:
389 case KVM_CAP_IMMEDIATE_EXIT:
390 case KVM_CAP_S390_INJECT_IRQ:
391 case KVM_CAP_S390_USER_SIGP:
392 case KVM_CAP_S390_USER_STSI:
393 case KVM_CAP_S390_SKEYS:
394 case KVM_CAP_S390_IRQ_STATE:
395 case KVM_CAP_S390_USER_INSTR0:
396 case KVM_CAP_S390_CMMA_MIGRATION:
397 case KVM_CAP_S390_AIS:
398 case KVM_CAP_S390_AIS_MIGRATION:
401 case KVM_CAP_S390_MEM_OP:
404 case KVM_CAP_NR_VCPUS:
405 case KVM_CAP_MAX_VCPUS:
406 r = KVM_S390_BSCA_CPU_SLOTS;
407 if (!kvm_s390_use_sca_entries())
409 else if (sclp.has_esca && sclp.has_64bscao)
410 r = KVM_S390_ESCA_CPU_SLOTS;
412 case KVM_CAP_NR_MEMSLOTS:
413 r = KVM_USER_MEM_SLOTS;
415 case KVM_CAP_S390_COW:
416 r = MACHINE_HAS_ESOP;
418 case KVM_CAP_S390_VECTOR_REGISTERS:
421 case KVM_CAP_S390_RI:
422 r = test_facility(64);
424 case KVM_CAP_S390_GS:
425 r = test_facility(133);
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434 struct kvm_memory_slot *memslot)
436 gfn_t cur_gfn, last_gfn;
437 unsigned long address;
438 struct gmap *gmap = kvm->arch.gmap;
440 /* Loop over all guest pages */
441 last_gfn = memslot->base_gfn + memslot->npages;
442 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443 address = gfn_to_hva_memslot(memslot, cur_gfn);
445 if (test_and_clear_guest_dirty(gmap->mm, address))
446 mark_page_dirty(kvm, cur_gfn);
447 if (fatal_signal_pending(current))
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
457 * Get (and clear) the dirty memory log for a memory slot.
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460 struct kvm_dirty_log *log)
464 struct kvm_memslots *slots;
465 struct kvm_memory_slot *memslot;
468 if (kvm_is_ucontrol(kvm))
471 mutex_lock(&kvm->slots_lock);
474 if (log->slot >= KVM_USER_MEM_SLOTS)
477 slots = kvm_memslots(kvm);
478 memslot = id_to_memslot(slots, log->slot);
480 if (!memslot->dirty_bitmap)
483 kvm_s390_sync_dirty_log(kvm, memslot);
484 r = kvm_get_dirty_log(kvm, log, &is_dirty);
488 /* Clear the dirty log */
490 n = kvm_dirty_bitmap_bytes(memslot);
491 memset(memslot->dirty_bitmap, 0, n);
495 mutex_unlock(&kvm->slots_lock);
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
502 struct kvm_vcpu *vcpu;
504 kvm_for_each_vcpu(i, vcpu, kvm) {
505 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
517 case KVM_CAP_S390_IRQCHIP:
518 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519 kvm->arch.use_irqchip = 1;
522 case KVM_CAP_S390_USER_SIGP:
523 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524 kvm->arch.user_sigp = 1;
527 case KVM_CAP_S390_VECTOR_REGISTERS:
528 mutex_lock(&kvm->lock);
529 if (kvm->created_vcpus) {
531 } else if (MACHINE_HAS_VX) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 129);
533 set_kvm_facility(kvm->arch.model.fac_list, 129);
534 if (test_facility(134)) {
535 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536 set_kvm_facility(kvm->arch.model.fac_list, 134);
538 if (test_facility(135)) {
539 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540 set_kvm_facility(kvm->arch.model.fac_list, 135);
545 mutex_unlock(&kvm->lock);
546 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547 r ? "(not available)" : "(success)");
549 case KVM_CAP_S390_RI:
551 mutex_lock(&kvm->lock);
552 if (kvm->created_vcpus) {
554 } else if (test_facility(64)) {
555 set_kvm_facility(kvm->arch.model.fac_mask, 64);
556 set_kvm_facility(kvm->arch.model.fac_list, 64);
559 mutex_unlock(&kvm->lock);
560 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561 r ? "(not available)" : "(success)");
563 case KVM_CAP_S390_AIS:
564 mutex_lock(&kvm->lock);
565 if (kvm->created_vcpus) {
568 set_kvm_facility(kvm->arch.model.fac_mask, 72);
569 set_kvm_facility(kvm->arch.model.fac_list, 72);
572 mutex_unlock(&kvm->lock);
573 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574 r ? "(not available)" : "(success)");
576 case KVM_CAP_S390_GS:
578 mutex_lock(&kvm->lock);
579 if (atomic_read(&kvm->online_vcpus)) {
581 } else if (test_facility(133)) {
582 set_kvm_facility(kvm->arch.model.fac_mask, 133);
583 set_kvm_facility(kvm->arch.model.fac_list, 133);
586 mutex_unlock(&kvm->lock);
587 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588 r ? "(not available)" : "(success)");
590 case KVM_CAP_S390_USER_STSI:
591 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592 kvm->arch.user_stsi = 1;
595 case KVM_CAP_S390_USER_INSTR0:
596 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597 kvm->arch.user_instr0 = 1;
598 icpt_operexc_on_all_vcpus(kvm);
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
612 switch (attr->attr) {
613 case KVM_S390_VM_MEM_LIMIT_SIZE:
615 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616 kvm->arch.mem_limit);
617 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
631 switch (attr->attr) {
632 case KVM_S390_VM_MEM_ENABLE_CMMA:
638 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639 mutex_lock(&kvm->lock);
640 if (!kvm->created_vcpus) {
641 kvm->arch.use_cmma = 1;
644 mutex_unlock(&kvm->lock);
646 case KVM_S390_VM_MEM_CLR_CMMA:
651 if (!kvm->arch.use_cmma)
654 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655 mutex_lock(&kvm->lock);
656 idx = srcu_read_lock(&kvm->srcu);
657 s390_reset_cmma(kvm->arch.gmap->mm);
658 srcu_read_unlock(&kvm->srcu, idx);
659 mutex_unlock(&kvm->lock);
662 case KVM_S390_VM_MEM_LIMIT_SIZE: {
663 unsigned long new_limit;
665 if (kvm_is_ucontrol(kvm))
668 if (get_user(new_limit, (u64 __user *)attr->addr))
671 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672 new_limit > kvm->arch.mem_limit)
678 /* gmap_create takes last usable address */
679 if (new_limit != KVM_S390_NO_MEM_LIMIT)
683 mutex_lock(&kvm->lock);
684 if (!kvm->created_vcpus) {
685 /* gmap_create will round the limit up */
686 struct gmap *new = gmap_create(current->mm, new_limit);
691 gmap_remove(kvm->arch.gmap);
693 kvm->arch.gmap = new;
697 mutex_unlock(&kvm->lock);
698 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700 (void *) kvm->arch.gmap->asce);
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
714 struct kvm_vcpu *vcpu;
717 if (!test_kvm_facility(kvm, 76))
720 mutex_lock(&kvm->lock);
721 switch (attr->attr) {
722 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
724 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726 kvm->arch.crypto.aes_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
729 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
731 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733 kvm->arch.crypto.dea_kw = 1;
734 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
736 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737 kvm->arch.crypto.aes_kw = 0;
738 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
742 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743 kvm->arch.crypto.dea_kw = 0;
744 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
749 mutex_unlock(&kvm->lock);
753 kvm_for_each_vcpu(i, vcpu, kvm) {
754 kvm_s390_vcpu_crypto_setup(vcpu);
757 mutex_unlock(&kvm->lock);
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
764 struct kvm_vcpu *vcpu;
766 kvm_for_each_vcpu(cx, vcpu, kvm)
767 kvm_s390_sync_request(req, vcpu);
771 * Must be called with kvm->srcu held to avoid races on memslots, and with
772 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
776 struct kvm_s390_migration_state *mgs;
777 struct kvm_memory_slot *ms;
778 /* should be the only one */
779 struct kvm_memslots *slots;
780 unsigned long ram_pages;
783 /* migration mode already enabled */
784 if (kvm->arch.migration_state)
787 slots = kvm_memslots(kvm);
788 if (!slots || !slots->used_slots)
791 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
794 kvm->arch.migration_state = mgs;
796 if (kvm->arch.use_cmma) {
798 * Get the last slot. They should be sorted by base_gfn, so the
799 * last slot is also the one at the end of the address space.
800 * We have verified above that at least one slot is present.
802 ms = slots->memslots + slots->used_slots - 1;
803 /* round up so we only use full longs */
804 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
805 /* allocate enough bytes to store all the bits */
806 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
807 if (!mgs->pgste_bitmap) {
809 kvm->arch.migration_state = NULL;
813 mgs->bitmap_size = ram_pages;
814 atomic64_set(&mgs->dirty_pages, ram_pages);
815 /* mark all the pages in active slots as dirty */
816 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
817 ms = slots->memslots + slotnr;
818 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
821 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
827 * Must be called with kvm->lock to avoid races with ourselves and
828 * kvm_s390_vm_start_migration.
830 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 struct kvm_s390_migration_state *mgs;
834 /* migration mode already disabled */
835 if (!kvm->arch.migration_state)
837 mgs = kvm->arch.migration_state;
838 kvm->arch.migration_state = NULL;
840 if (kvm->arch.use_cmma) {
841 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
842 vfree(mgs->pgste_bitmap);
848 static int kvm_s390_vm_set_migration(struct kvm *kvm,
849 struct kvm_device_attr *attr)
851 int idx, res = -ENXIO;
853 mutex_lock(&kvm->lock);
854 switch (attr->attr) {
855 case KVM_S390_VM_MIGRATION_START:
856 idx = srcu_read_lock(&kvm->srcu);
857 res = kvm_s390_vm_start_migration(kvm);
858 srcu_read_unlock(&kvm->srcu, idx);
860 case KVM_S390_VM_MIGRATION_STOP:
861 res = kvm_s390_vm_stop_migration(kvm);
866 mutex_unlock(&kvm->lock);
871 static int kvm_s390_vm_get_migration(struct kvm *kvm,
872 struct kvm_device_attr *attr)
874 u64 mig = (kvm->arch.migration_state != NULL);
876 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
879 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
884 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 struct kvm_s390_vm_tod_clock gtod;
888 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
891 if (test_kvm_facility(kvm, 139))
892 kvm_s390_set_tod_clock_ext(kvm, >od);
893 else if (gtod.epoch_idx == 0)
894 kvm_s390_set_tod_clock(kvm, gtod.tod);
898 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
899 gtod.epoch_idx, gtod.tod);
904 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
908 if (copy_from_user(>od_high, (void __user *)attr->addr,
914 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
919 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
923 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
926 kvm_s390_set_tod_clock(kvm, gtod);
927 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
931 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
938 switch (attr->attr) {
939 case KVM_S390_VM_TOD_EXT:
940 ret = kvm_s390_set_tod_ext(kvm, attr);
942 case KVM_S390_VM_TOD_HIGH:
943 ret = kvm_s390_set_tod_high(kvm, attr);
945 case KVM_S390_VM_TOD_LOW:
946 ret = kvm_s390_set_tod_low(kvm, attr);
955 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
956 struct kvm_s390_vm_tod_clock *gtod)
958 struct kvm_s390_tod_clock_ext htod;
962 get_tod_clock_ext((char *)&htod);
964 gtod->tod = htod.tod + kvm->arch.epoch;
965 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967 if (gtod->tod < htod.tod)
968 gtod->epoch_idx += 1;
973 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 struct kvm_s390_vm_tod_clock gtod;
977 memset(>od, 0, sizeof(gtod));
979 if (test_kvm_facility(kvm, 139))
980 kvm_s390_get_tod_clock_ext(kvm, >od);
982 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
987 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
988 gtod.epoch_idx, gtod.tod);
992 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
996 if (copy_to_user((void __user *)attr->addr, >od_high,
999 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1004 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1008 gtod = kvm_s390_get_tod_clock_fast(kvm);
1009 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1011 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1016 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1023 switch (attr->attr) {
1024 case KVM_S390_VM_TOD_EXT:
1025 ret = kvm_s390_get_tod_ext(kvm, attr);
1027 case KVM_S390_VM_TOD_HIGH:
1028 ret = kvm_s390_get_tod_high(kvm, attr);
1030 case KVM_S390_VM_TOD_LOW:
1031 ret = kvm_s390_get_tod_low(kvm, attr);
1040 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 struct kvm_s390_vm_cpu_processor *proc;
1043 u16 lowest_ibc, unblocked_ibc;
1046 mutex_lock(&kvm->lock);
1047 if (kvm->created_vcpus) {
1051 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1056 if (!copy_from_user(proc, (void __user *)attr->addr,
1058 kvm->arch.model.cpuid = proc->cpuid;
1059 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1060 unblocked_ibc = sclp.ibc & 0xfff;
1061 if (lowest_ibc && proc->ibc) {
1062 if (proc->ibc > unblocked_ibc)
1063 kvm->arch.model.ibc = unblocked_ibc;
1064 else if (proc->ibc < lowest_ibc)
1065 kvm->arch.model.ibc = lowest_ibc;
1067 kvm->arch.model.ibc = proc->ibc;
1069 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1070 S390_ARCH_FAC_LIST_SIZE_BYTE);
1071 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1072 kvm->arch.model.ibc,
1073 kvm->arch.model.cpuid);
1074 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1075 kvm->arch.model.fac_list[0],
1076 kvm->arch.model.fac_list[1],
1077 kvm->arch.model.fac_list[2]);
1082 mutex_unlock(&kvm->lock);
1086 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1089 struct kvm_s390_vm_cpu_feat data;
1092 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094 if (!bitmap_subset((unsigned long *) data.feat,
1095 kvm_s390_available_cpu_feat,
1096 KVM_S390_VM_CPU_FEAT_NR_BITS))
1099 mutex_lock(&kvm->lock);
1100 if (!atomic_read(&kvm->online_vcpus)) {
1101 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1102 KVM_S390_VM_CPU_FEAT_NR_BITS);
1105 mutex_unlock(&kvm->lock);
1109 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1110 struct kvm_device_attr *attr)
1113 * Once supported by kernel + hw, we have to store the subfunctions
1114 * in kvm->arch and remember that user space configured them.
1119 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1123 switch (attr->attr) {
1124 case KVM_S390_VM_CPU_PROCESSOR:
1125 ret = kvm_s390_set_processor(kvm, attr);
1127 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1128 ret = kvm_s390_set_processor_feat(kvm, attr);
1130 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1131 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1137 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 struct kvm_s390_vm_cpu_processor *proc;
1142 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1147 proc->cpuid = kvm->arch.model.cpuid;
1148 proc->ibc = kvm->arch.model.ibc;
1149 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1150 S390_ARCH_FAC_LIST_SIZE_BYTE);
1151 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1152 kvm->arch.model.ibc,
1153 kvm->arch.model.cpuid);
1154 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1155 kvm->arch.model.fac_list[0],
1156 kvm->arch.model.fac_list[1],
1157 kvm->arch.model.fac_list[2]);
1158 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1165 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 struct kvm_s390_vm_cpu_machine *mach;
1170 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1175 get_cpu_id((struct cpuid *) &mach->cpuid);
1176 mach->ibc = sclp.ibc;
1177 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1178 S390_ARCH_FAC_LIST_SIZE_BYTE);
1179 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1180 sizeof(S390_lowcore.stfle_fac_list));
1181 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1182 kvm->arch.model.ibc,
1183 kvm->arch.model.cpuid);
1184 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1188 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1192 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1199 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1200 struct kvm_device_attr *attr)
1202 struct kvm_s390_vm_cpu_feat data;
1204 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1205 KVM_S390_VM_CPU_FEAT_NR_BITS);
1206 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1211 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1212 struct kvm_device_attr *attr)
1214 struct kvm_s390_vm_cpu_feat data;
1216 bitmap_copy((unsigned long *) data.feat,
1217 kvm_s390_available_cpu_feat,
1218 KVM_S390_VM_CPU_FEAT_NR_BITS);
1219 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1224 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1225 struct kvm_device_attr *attr)
1228 * Once we can actually configure subfunctions (kernel + hw support),
1229 * we have to check if they were already set by user space, if so copy
1230 * them from kvm->arch.
1235 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1236 struct kvm_device_attr *attr)
1238 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1239 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1243 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1247 switch (attr->attr) {
1248 case KVM_S390_VM_CPU_PROCESSOR:
1249 ret = kvm_s390_get_processor(kvm, attr);
1251 case KVM_S390_VM_CPU_MACHINE:
1252 ret = kvm_s390_get_machine(kvm, attr);
1254 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1255 ret = kvm_s390_get_processor_feat(kvm, attr);
1257 case KVM_S390_VM_CPU_MACHINE_FEAT:
1258 ret = kvm_s390_get_machine_feat(kvm, attr);
1260 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1261 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1264 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1270 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1274 switch (attr->group) {
1275 case KVM_S390_VM_MEM_CTRL:
1276 ret = kvm_s390_set_mem_control(kvm, attr);
1278 case KVM_S390_VM_TOD:
1279 ret = kvm_s390_set_tod(kvm, attr);
1281 case KVM_S390_VM_CPU_MODEL:
1282 ret = kvm_s390_set_cpu_model(kvm, attr);
1284 case KVM_S390_VM_CRYPTO:
1285 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287 case KVM_S390_VM_MIGRATION:
1288 ret = kvm_s390_vm_set_migration(kvm, attr);
1298 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1302 switch (attr->group) {
1303 case KVM_S390_VM_MEM_CTRL:
1304 ret = kvm_s390_get_mem_control(kvm, attr);
1306 case KVM_S390_VM_TOD:
1307 ret = kvm_s390_get_tod(kvm, attr);
1309 case KVM_S390_VM_CPU_MODEL:
1310 ret = kvm_s390_get_cpu_model(kvm, attr);
1312 case KVM_S390_VM_MIGRATION:
1313 ret = kvm_s390_vm_get_migration(kvm, attr);
1323 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1327 switch (attr->group) {
1328 case KVM_S390_VM_MEM_CTRL:
1329 switch (attr->attr) {
1330 case KVM_S390_VM_MEM_ENABLE_CMMA:
1331 case KVM_S390_VM_MEM_CLR_CMMA:
1332 ret = sclp.has_cmma ? 0 : -ENXIO;
1334 case KVM_S390_VM_MEM_LIMIT_SIZE:
1342 case KVM_S390_VM_TOD:
1343 switch (attr->attr) {
1344 case KVM_S390_VM_TOD_LOW:
1345 case KVM_S390_VM_TOD_HIGH:
1353 case KVM_S390_VM_CPU_MODEL:
1354 switch (attr->attr) {
1355 case KVM_S390_VM_CPU_PROCESSOR:
1356 case KVM_S390_VM_CPU_MACHINE:
1357 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1358 case KVM_S390_VM_CPU_MACHINE_FEAT:
1359 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1362 /* configuring subfunctions is not supported yet */
1363 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1369 case KVM_S390_VM_CRYPTO:
1370 switch (attr->attr) {
1371 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1372 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1373 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1374 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1382 case KVM_S390_VM_MIGRATION:
1393 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1397 int srcu_idx, i, r = 0;
1399 if (args->flags != 0)
1402 /* Is this guest using storage keys? */
1403 if (!mm_use_skey(current->mm))
1404 return KVM_S390_GET_SKEYS_NONE;
1406 /* Enforce sane limit on memory allocation */
1407 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1410 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1414 down_read(¤t->mm->mmap_sem);
1415 srcu_idx = srcu_read_lock(&kvm->srcu);
1416 for (i = 0; i < args->count; i++) {
1417 hva = gfn_to_hva(kvm, args->start_gfn + i);
1418 if (kvm_is_error_hva(hva)) {
1423 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1427 srcu_read_unlock(&kvm->srcu, srcu_idx);
1428 up_read(¤t->mm->mmap_sem);
1431 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1432 sizeof(uint8_t) * args->count);
1441 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1445 int srcu_idx, i, r = 0;
1447 if (args->flags != 0)
1450 /* Enforce sane limit on memory allocation */
1451 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1454 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1458 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1459 sizeof(uint8_t) * args->count);
1465 /* Enable storage key handling for the guest */
1466 r = s390_enable_skey();
1470 down_read(¤t->mm->mmap_sem);
1471 srcu_idx = srcu_read_lock(&kvm->srcu);
1472 for (i = 0; i < args->count; i++) {
1473 hva = gfn_to_hva(kvm, args->start_gfn + i);
1474 if (kvm_is_error_hva(hva)) {
1479 /* Lowest order bit is reserved */
1480 if (keys[i] & 0x01) {
1485 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1489 srcu_read_unlock(&kvm->srcu, srcu_idx);
1490 up_read(¤t->mm->mmap_sem);
1497 * Base address and length must be sent at the start of each block, therefore
1498 * it's cheaper to send some clean data, as long as it's less than the size of
1501 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1502 /* for consistency */
1503 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1506 * This function searches for the next page with dirty CMMA attributes, and
1507 * saves the attributes in the buffer up to either the end of the buffer or
1508 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1509 * no trailing clean bytes are saved.
1510 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1511 * output buffer will indicate 0 as length.
1513 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1514 struct kvm_s390_cmma_log *args)
1516 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1517 unsigned long bufsize, hva, pgstev, i, next, cur;
1518 int srcu_idx, peek, r = 0, rr;
1521 cur = args->start_gfn;
1522 i = next = pgstev = 0;
1524 if (unlikely(!kvm->arch.use_cmma))
1526 /* Invalid/unsupported flags were specified */
1527 if (args->flags & ~KVM_S390_CMMA_PEEK)
1529 /* Migration mode query, and we are not doing a migration */
1530 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1533 /* CMMA is disabled or was not used, or the buffer has length zero */
1534 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1535 if (!bufsize || !kvm->mm->context.use_cmma) {
1536 memset(args, 0, sizeof(*args));
1541 /* We are not peeking, and there are no dirty pages */
1542 if (!atomic64_read(&s->dirty_pages)) {
1543 memset(args, 0, sizeof(*args));
1546 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548 if (cur >= s->bitmap_size) /* nothing found, loop back */
1549 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1550 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1551 memset(args, 0, sizeof(*args));
1554 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1557 res = vmalloc(bufsize);
1561 args->start_gfn = cur;
1563 down_read(&kvm->mm->mmap_sem);
1564 srcu_idx = srcu_read_lock(&kvm->srcu);
1565 while (i < bufsize) {
1566 hva = gfn_to_hva(kvm, cur);
1567 if (kvm_is_error_hva(hva)) {
1571 /* decrement only if we actually flipped the bit to 0 */
1572 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1573 atomic64_dec(&s->dirty_pages);
1574 r = get_pgste(kvm->mm, hva, &pgstev);
1577 /* save the value */
1578 res[i++] = (pgstev >> 24) & 0x43;
1580 * if the next bit is too far away, stop.
1581 * if we reached the previous "next", find the next one
1584 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1587 next = find_next_bit(s->pgste_bitmap,
1588 s->bitmap_size, cur + 1);
1589 /* reached the end of the bitmap or of the buffer, stop */
1590 if ((next >= s->bitmap_size) ||
1591 (next >= args->start_gfn + bufsize))
1596 srcu_read_unlock(&kvm->srcu, srcu_idx);
1597 up_read(&kvm->mm->mmap_sem);
1599 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601 rr = copy_to_user((void __user *)args->values, res, args->count);
1610 * This function sets the CMMA attributes for the given pages. If the input
1611 * buffer has zero length, no action is taken, otherwise the attributes are
1612 * set and the mm->context.use_cmma flag is set.
1614 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1615 const struct kvm_s390_cmma_log *args)
1617 unsigned long hva, mask, pgstev, i;
1619 int srcu_idx, r = 0;
1623 if (!kvm->arch.use_cmma)
1625 /* invalid/unsupported flags */
1626 if (args->flags != 0)
1628 /* Enforce sane limit on memory allocation */
1629 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1632 if (args->count == 0)
1635 bits = vmalloc(sizeof(*bits) * args->count);
1639 r = copy_from_user(bits, (void __user *)args->values, args->count);
1645 down_read(&kvm->mm->mmap_sem);
1646 srcu_idx = srcu_read_lock(&kvm->srcu);
1647 for (i = 0; i < args->count; i++) {
1648 hva = gfn_to_hva(kvm, args->start_gfn + i);
1649 if (kvm_is_error_hva(hva)) {
1655 pgstev = pgstev << 24;
1656 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1657 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659 srcu_read_unlock(&kvm->srcu, srcu_idx);
1660 up_read(&kvm->mm->mmap_sem);
1662 if (!kvm->mm->context.use_cmma) {
1663 down_write(&kvm->mm->mmap_sem);
1664 kvm->mm->context.use_cmma = 1;
1665 up_write(&kvm->mm->mmap_sem);
1672 long kvm_arch_vm_ioctl(struct file *filp,
1673 unsigned int ioctl, unsigned long arg)
1675 struct kvm *kvm = filp->private_data;
1676 void __user *argp = (void __user *)arg;
1677 struct kvm_device_attr attr;
1681 case KVM_S390_INTERRUPT: {
1682 struct kvm_s390_interrupt s390int;
1685 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687 r = kvm_s390_inject_vm(kvm, &s390int);
1690 case KVM_ENABLE_CAP: {
1691 struct kvm_enable_cap cap;
1693 if (copy_from_user(&cap, argp, sizeof(cap)))
1695 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1698 case KVM_CREATE_IRQCHIP: {
1699 struct kvm_irq_routing_entry routing;
1702 if (kvm->arch.use_irqchip) {
1703 /* Set up dummy routing. */
1704 memset(&routing, 0, sizeof(routing));
1705 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1709 case KVM_SET_DEVICE_ATTR: {
1711 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713 r = kvm_s390_vm_set_attr(kvm, &attr);
1716 case KVM_GET_DEVICE_ATTR: {
1718 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720 r = kvm_s390_vm_get_attr(kvm, &attr);
1723 case KVM_HAS_DEVICE_ATTR: {
1725 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727 r = kvm_s390_vm_has_attr(kvm, &attr);
1730 case KVM_S390_GET_SKEYS: {
1731 struct kvm_s390_skeys args;
1734 if (copy_from_user(&args, argp,
1735 sizeof(struct kvm_s390_skeys)))
1737 r = kvm_s390_get_skeys(kvm, &args);
1740 case KVM_S390_SET_SKEYS: {
1741 struct kvm_s390_skeys args;
1744 if (copy_from_user(&args, argp,
1745 sizeof(struct kvm_s390_skeys)))
1747 r = kvm_s390_set_skeys(kvm, &args);
1750 case KVM_S390_GET_CMMA_BITS: {
1751 struct kvm_s390_cmma_log args;
1754 if (copy_from_user(&args, argp, sizeof(args)))
1756 r = kvm_s390_get_cmma_bits(kvm, &args);
1758 r = copy_to_user(argp, &args, sizeof(args));
1764 case KVM_S390_SET_CMMA_BITS: {
1765 struct kvm_s390_cmma_log args;
1768 if (copy_from_user(&args, argp, sizeof(args)))
1770 r = kvm_s390_set_cmma_bits(kvm, &args);
1780 static int kvm_s390_query_ap_config(u8 *config)
1782 u32 fcn_code = 0x04000000UL;
1785 memset(config, 0, 128);
1789 ".long 0xb2af0000\n" /* PQAP(QCI) */
1795 : "r" (fcn_code), "r" (config)
1796 : "cc", "0", "2", "memory"
1802 static int kvm_s390_apxa_installed(void)
1807 if (test_facility(12)) {
1808 cc = kvm_s390_query_ap_config(config);
1811 pr_err("PQAP(QCI) failed with cc=%d", cc);
1813 return config[0] & 0x40;
1819 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1821 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1823 if (kvm_s390_apxa_installed())
1824 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1826 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1829 static u64 kvm_s390_get_initial_cpuid(void)
1834 cpuid.version = 0xff;
1835 return *((u64 *) &cpuid);
1838 static void kvm_s390_crypto_init(struct kvm *kvm)
1840 if (!test_kvm_facility(kvm, 76))
1843 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1844 kvm_s390_set_crycb_format(kvm);
1846 /* Enable AES/DEA protected key functions by default */
1847 kvm->arch.crypto.aes_kw = 1;
1848 kvm->arch.crypto.dea_kw = 1;
1849 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1850 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1851 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1852 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1855 static void sca_dispose(struct kvm *kvm)
1857 if (kvm->arch.use_esca)
1858 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1860 free_page((unsigned long)(kvm->arch.sca));
1861 kvm->arch.sca = NULL;
1864 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1866 gfp_t alloc_flags = GFP_KERNEL;
1868 char debug_name[16];
1869 static unsigned long sca_offset;
1872 #ifdef CONFIG_KVM_S390_UCONTROL
1873 if (type & ~KVM_VM_S390_UCONTROL)
1875 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1882 rc = s390_enable_sie();
1888 kvm->arch.use_esca = 0; /* start with basic SCA */
1889 if (!sclp.has_64bscao)
1890 alloc_flags |= GFP_DMA;
1891 rwlock_init(&kvm->arch.sca_lock);
1892 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1895 spin_lock(&kvm_lock);
1897 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899 kvm->arch.sca = (struct bsca_block *)
1900 ((char *) kvm->arch.sca + sca_offset);
1901 spin_unlock(&kvm_lock);
1903 sprintf(debug_name, "kvm-%u", current->pid);
1905 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1909 kvm->arch.sie_page2 =
1910 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1911 if (!kvm->arch.sie_page2)
1914 /* Populate the facility mask initially. */
1915 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1916 sizeof(S390_lowcore.stfle_fac_list));
1917 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1918 if (i < kvm_s390_fac_list_mask_size())
1919 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921 kvm->arch.model.fac_mask[i] = 0UL;
1924 /* Populate the facility list initially. */
1925 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1926 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1927 S390_ARCH_FAC_LIST_SIZE_BYTE);
1929 /* we are always in czam mode - even on pre z14 machines */
1930 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1931 set_kvm_facility(kvm->arch.model.fac_list, 138);
1932 /* we emulate STHYI in kvm */
1933 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1934 set_kvm_facility(kvm->arch.model.fac_list, 74);
1935 if (MACHINE_HAS_TLB_GUEST) {
1936 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1937 set_kvm_facility(kvm->arch.model.fac_list, 147);
1940 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1941 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943 kvm_s390_crypto_init(kvm);
1945 mutex_init(&kvm->arch.float_int.ais_lock);
1946 kvm->arch.float_int.simm = 0;
1947 kvm->arch.float_int.nimm = 0;
1948 spin_lock_init(&kvm->arch.float_int.lock);
1949 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1950 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1951 init_waitqueue_head(&kvm->arch.ipte_wq);
1952 mutex_init(&kvm->arch.ipte_mutex);
1954 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1955 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957 if (type & KVM_VM_S390_UCONTROL) {
1958 kvm->arch.gmap = NULL;
1959 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961 if (sclp.hamax == U64_MAX)
1962 kvm->arch.mem_limit = TASK_SIZE_MAX;
1964 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1967 if (!kvm->arch.gmap)
1969 kvm->arch.gmap->private = kvm;
1970 kvm->arch.gmap->pfault_enabled = 0;
1973 kvm->arch.css_support = 0;
1974 kvm->arch.use_irqchip = 0;
1975 kvm->arch.epoch = 0;
1977 spin_lock_init(&kvm->arch.start_stop_lock);
1978 kvm_s390_vsie_init(kvm);
1979 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1983 free_page((unsigned long)kvm->arch.sie_page2);
1984 debug_unregister(kvm->arch.dbf);
1986 KVM_EVENT(3, "creation of vm failed: %d", rc);
1990 bool kvm_arch_has_vcpu_debugfs(void)
1995 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2000 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2003 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2004 kvm_s390_clear_local_irqs(vcpu);
2005 kvm_clear_async_pf_completion_queue(vcpu);
2006 if (!kvm_is_ucontrol(vcpu->kvm))
2009 if (kvm_is_ucontrol(vcpu->kvm))
2010 gmap_remove(vcpu->arch.gmap);
2012 if (vcpu->kvm->arch.use_cmma)
2013 kvm_s390_vcpu_unsetup_cmma(vcpu);
2014 free_page((unsigned long)(vcpu->arch.sie_block));
2016 kvm_vcpu_uninit(vcpu);
2017 kmem_cache_free(kvm_vcpu_cache, vcpu);
2020 static void kvm_free_vcpus(struct kvm *kvm)
2023 struct kvm_vcpu *vcpu;
2025 kvm_for_each_vcpu(i, vcpu, kvm)
2026 kvm_arch_vcpu_destroy(vcpu);
2028 mutex_lock(&kvm->lock);
2029 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2030 kvm->vcpus[i] = NULL;
2032 atomic_set(&kvm->online_vcpus, 0);
2033 mutex_unlock(&kvm->lock);
2036 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 kvm_free_vcpus(kvm);
2040 debug_unregister(kvm->arch.dbf);
2041 free_page((unsigned long)kvm->arch.sie_page2);
2042 if (!kvm_is_ucontrol(kvm))
2043 gmap_remove(kvm->arch.gmap);
2044 kvm_s390_destroy_adapters(kvm);
2045 kvm_s390_clear_float_irqs(kvm);
2046 kvm_s390_vsie_destroy(kvm);
2047 if (kvm->arch.migration_state) {
2048 vfree(kvm->arch.migration_state->pgste_bitmap);
2049 kfree(kvm->arch.migration_state);
2051 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2054 /* Section: vcpu related */
2055 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2058 if (!vcpu->arch.gmap)
2060 vcpu->arch.gmap->private = vcpu->kvm;
2065 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 if (!kvm_s390_use_sca_entries())
2069 read_lock(&vcpu->kvm->arch.sca_lock);
2070 if (vcpu->kvm->arch.use_esca) {
2071 struct esca_block *sca = vcpu->kvm->arch.sca;
2073 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2074 sca->cpu[vcpu->vcpu_id].sda = 0;
2076 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2079 sca->cpu[vcpu->vcpu_id].sda = 0;
2081 read_unlock(&vcpu->kvm->arch.sca_lock);
2084 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 if (!kvm_s390_use_sca_entries()) {
2087 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089 /* we still need the basic sca for the ipte control */
2090 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2091 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093 read_lock(&vcpu->kvm->arch.sca_lock);
2094 if (vcpu->kvm->arch.use_esca) {
2095 struct esca_block *sca = vcpu->kvm->arch.sca;
2097 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2098 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2099 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2100 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2101 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2106 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2107 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2108 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110 read_unlock(&vcpu->kvm->arch.sca_lock);
2113 /* Basic SCA to Extended SCA data copy routines */
2114 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2117 d->sigp_ctrl.c = s->sigp_ctrl.c;
2118 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2121 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2125 d->ipte_control = s->ipte_control;
2127 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2128 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2131 static int sca_switch_to_extended(struct kvm *kvm)
2133 struct bsca_block *old_sca = kvm->arch.sca;
2134 struct esca_block *new_sca;
2135 struct kvm_vcpu *vcpu;
2136 unsigned int vcpu_idx;
2139 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2143 scaoh = (u32)((u64)(new_sca) >> 32);
2144 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146 kvm_s390_vcpu_block_all(kvm);
2147 write_lock(&kvm->arch.sca_lock);
2149 sca_copy_b_to_e(new_sca, old_sca);
2151 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2152 vcpu->arch.sie_block->scaoh = scaoh;
2153 vcpu->arch.sie_block->scaol = scaol;
2154 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156 kvm->arch.sca = new_sca;
2157 kvm->arch.use_esca = 1;
2159 write_unlock(&kvm->arch.sca_lock);
2160 kvm_s390_vcpu_unblock_all(kvm);
2162 free_page((unsigned long)old_sca);
2164 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2165 old_sca, kvm->arch.sca);
2169 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2173 if (!kvm_s390_use_sca_entries()) {
2174 if (id < KVM_MAX_VCPUS)
2178 if (id < KVM_S390_BSCA_CPU_SLOTS)
2180 if (!sclp.has_esca || !sclp.has_64bscao)
2183 mutex_lock(&kvm->lock);
2184 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2185 mutex_unlock(&kvm->lock);
2187 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2193 kvm_clear_async_pf_completion_queue(vcpu);
2194 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2200 kvm_s390_set_prefix(vcpu, 0);
2201 if (test_kvm_facility(vcpu->kvm, 64))
2202 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2203 if (test_kvm_facility(vcpu->kvm, 133))
2204 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2205 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2206 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2209 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213 if (kvm_is_ucontrol(vcpu->kvm))
2214 return __kvm_ucontrol_vcpu_init(vcpu);
2219 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2220 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2223 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2224 vcpu->arch.cputm_start = get_tod_clock_fast();
2225 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2232 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2234 vcpu->arch.cputm_start = 0;
2235 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2238 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2239 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2242 vcpu->arch.cputm_enabled = true;
2243 __start_cpu_timer_accounting(vcpu);
2246 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2247 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2250 __stop_cpu_timer_accounting(vcpu);
2251 vcpu->arch.cputm_enabled = false;
2254 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2257 __enable_cpu_timer_accounting(vcpu);
2261 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264 __disable_cpu_timer_accounting(vcpu);
2268 /* set the cpu timer - may only be called from the VCPU thread itself */
2269 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2272 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2273 if (vcpu->arch.cputm_enabled)
2274 vcpu->arch.cputm_start = get_tod_clock_fast();
2275 vcpu->arch.sie_block->cputm = cputm;
2276 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2280 /* update and get the cpu timer - can also be called from other VCPU threads */
2281 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2286 if (unlikely(!vcpu->arch.cputm_enabled))
2287 return vcpu->arch.sie_block->cputm;
2289 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293 * If the writer would ever execute a read in the critical
2294 * section, e.g. in irq context, we have a deadlock.
2296 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2297 value = vcpu->arch.sie_block->cputm;
2298 /* if cputm_start is 0, accounting is being started/stopped */
2299 if (likely(vcpu->arch.cputm_start))
2300 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2301 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2306 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2309 gmap_enable(vcpu->arch.enabled_gmap);
2310 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2311 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2312 __start_cpu_timer_accounting(vcpu);
2316 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2319 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2320 __stop_cpu_timer_accounting(vcpu);
2321 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2322 vcpu->arch.enabled_gmap = gmap_get_enabled();
2323 gmap_disable(vcpu->arch.enabled_gmap);
2327 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2330 vcpu->arch.sie_block->gpsw.mask = 0UL;
2331 vcpu->arch.sie_block->gpsw.addr = 0UL;
2332 kvm_s390_set_prefix(vcpu, 0);
2333 kvm_s390_set_cpu_timer(vcpu, 0);
2334 vcpu->arch.sie_block->ckc = 0UL;
2335 vcpu->arch.sie_block->todpr = 0;
2336 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2337 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2338 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2339 /* make sure the new fpc will be lazily loaded */
2341 current->thread.fpu.fpc = 0;
2342 vcpu->arch.sie_block->gbea = 1;
2343 vcpu->arch.sie_block->pp = 0;
2344 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2345 kvm_clear_async_pf_completion_queue(vcpu);
2346 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2347 kvm_s390_vcpu_stop(vcpu);
2348 kvm_s390_clear_local_irqs(vcpu);
2351 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353 mutex_lock(&vcpu->kvm->lock);
2355 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357 mutex_unlock(&vcpu->kvm->lock);
2358 if (!kvm_is_ucontrol(vcpu->kvm)) {
2359 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2362 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2363 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2364 /* make vcpu_load load the right gmap on the first trigger */
2365 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2368 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370 if (!test_kvm_facility(vcpu->kvm, 76))
2373 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375 if (vcpu->kvm->arch.crypto.aes_kw)
2376 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2377 if (vcpu->kvm->arch.crypto.dea_kw)
2378 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2383 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385 free_page(vcpu->arch.sie_block->cbrlo);
2386 vcpu->arch.sie_block->cbrlo = 0;
2389 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2392 if (!vcpu->arch.sie_block->cbrlo)
2395 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2399 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403 vcpu->arch.sie_block->ibc = model->ibc;
2404 if (test_kvm_facility(vcpu->kvm, 7))
2405 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2408 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2412 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2416 if (test_kvm_facility(vcpu->kvm, 78))
2417 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2418 else if (test_kvm_facility(vcpu->kvm, 8))
2419 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421 kvm_s390_vcpu_setup_model(vcpu);
2423 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2424 if (MACHINE_HAS_ESOP)
2425 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2426 if (test_kvm_facility(vcpu->kvm, 9))
2427 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2428 if (test_kvm_facility(vcpu->kvm, 73))
2429 vcpu->arch.sie_block->ecb |= ECB_TE;
2431 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2432 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2433 if (test_kvm_facility(vcpu->kvm, 130))
2434 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2435 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437 vcpu->arch.sie_block->eca |= ECA_CEI;
2439 vcpu->arch.sie_block->eca |= ECA_IB;
2441 vcpu->arch.sie_block->eca |= ECA_SII;
2442 if (sclp.has_sigpif)
2443 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2444 if (test_kvm_facility(vcpu->kvm, 129)) {
2445 vcpu->arch.sie_block->eca |= ECA_VX;
2446 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448 if (test_kvm_facility(vcpu->kvm, 139))
2449 vcpu->arch.sie_block->ecd |= ECD_MEF;
2451 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2456 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460 if (vcpu->kvm->arch.use_cmma) {
2461 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2465 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2466 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468 kvm_s390_vcpu_crypto_setup(vcpu);
2473 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2476 struct kvm_vcpu *vcpu;
2477 struct sie_page *sie_page;
2480 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2485 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2489 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2490 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2494 vcpu->arch.sie_block = &sie_page->sie_block;
2495 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497 /* the real guest size will always be smaller than msl */
2498 vcpu->arch.sie_block->mso = 0;
2499 vcpu->arch.sie_block->msl = sclp.hamax;
2501 vcpu->arch.sie_block->icpua = id;
2502 spin_lock_init(&vcpu->arch.local_int.lock);
2503 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2504 vcpu->arch.local_int.wq = &vcpu->wq;
2505 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2506 seqcount_init(&vcpu->arch.cputm_seqcount);
2508 rc = kvm_vcpu_init(vcpu, kvm, id);
2510 goto out_free_sie_block;
2511 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2512 vcpu->arch.sie_block);
2513 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2517 free_page((unsigned long)(vcpu->arch.sie_block));
2519 kmem_cache_free(kvm_vcpu_cache, vcpu);
2524 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526 return kvm_s390_vcpu_has_irq(vcpu, 0);
2529 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2531 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2536 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2542 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2547 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2553 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2557 * Kick a guest cpu out of SIE and wait until SIE is not running.
2558 * If the CPU is not running (e.g. waiting as idle) the function will
2559 * return immediately. */
2560 void exit_sie(struct kvm_vcpu *vcpu)
2562 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2563 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2567 /* Kick a guest cpu out of SIE to process a request synchronously */
2568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2570 kvm_make_request(req, vcpu);
2571 kvm_s390_vcpu_request(vcpu);
2574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2577 struct kvm *kvm = gmap->private;
2578 struct kvm_vcpu *vcpu;
2579 unsigned long prefix;
2582 if (gmap_is_shadow(gmap))
2584 if (start >= 1UL << 31)
2585 /* We are only interested in prefix pages */
2587 kvm_for_each_vcpu(i, vcpu, kvm) {
2588 /* match against both prefix pages */
2589 prefix = kvm_s390_get_prefix(vcpu);
2590 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2591 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2593 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2598 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2600 /* kvm common code refers to this, but never calls it */
2605 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2606 struct kvm_one_reg *reg)
2611 case KVM_REG_S390_TODPR:
2612 r = put_user(vcpu->arch.sie_block->todpr,
2613 (u32 __user *)reg->addr);
2615 case KVM_REG_S390_EPOCHDIFF:
2616 r = put_user(vcpu->arch.sie_block->epoch,
2617 (u64 __user *)reg->addr);
2619 case KVM_REG_S390_CPU_TIMER:
2620 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2621 (u64 __user *)reg->addr);
2623 case KVM_REG_S390_CLOCK_COMP:
2624 r = put_user(vcpu->arch.sie_block->ckc,
2625 (u64 __user *)reg->addr);
2627 case KVM_REG_S390_PFTOKEN:
2628 r = put_user(vcpu->arch.pfault_token,
2629 (u64 __user *)reg->addr);
2631 case KVM_REG_S390_PFCOMPARE:
2632 r = put_user(vcpu->arch.pfault_compare,
2633 (u64 __user *)reg->addr);
2635 case KVM_REG_S390_PFSELECT:
2636 r = put_user(vcpu->arch.pfault_select,
2637 (u64 __user *)reg->addr);
2639 case KVM_REG_S390_PP:
2640 r = put_user(vcpu->arch.sie_block->pp,
2641 (u64 __user *)reg->addr);
2643 case KVM_REG_S390_GBEA:
2644 r = put_user(vcpu->arch.sie_block->gbea,
2645 (u64 __user *)reg->addr);
2654 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2655 struct kvm_one_reg *reg)
2661 case KVM_REG_S390_TODPR:
2662 r = get_user(vcpu->arch.sie_block->todpr,
2663 (u32 __user *)reg->addr);
2665 case KVM_REG_S390_EPOCHDIFF:
2666 r = get_user(vcpu->arch.sie_block->epoch,
2667 (u64 __user *)reg->addr);
2669 case KVM_REG_S390_CPU_TIMER:
2670 r = get_user(val, (u64 __user *)reg->addr);
2672 kvm_s390_set_cpu_timer(vcpu, val);
2674 case KVM_REG_S390_CLOCK_COMP:
2675 r = get_user(vcpu->arch.sie_block->ckc,
2676 (u64 __user *)reg->addr);
2678 case KVM_REG_S390_PFTOKEN:
2679 r = get_user(vcpu->arch.pfault_token,
2680 (u64 __user *)reg->addr);
2681 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2682 kvm_clear_async_pf_completion_queue(vcpu);
2684 case KVM_REG_S390_PFCOMPARE:
2685 r = get_user(vcpu->arch.pfault_compare,
2686 (u64 __user *)reg->addr);
2688 case KVM_REG_S390_PFSELECT:
2689 r = get_user(vcpu->arch.pfault_select,
2690 (u64 __user *)reg->addr);
2692 case KVM_REG_S390_PP:
2693 r = get_user(vcpu->arch.sie_block->pp,
2694 (u64 __user *)reg->addr);
2696 case KVM_REG_S390_GBEA:
2697 r = get_user(vcpu->arch.sie_block->gbea,
2698 (u64 __user *)reg->addr);
2707 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2709 kvm_s390_vcpu_initial_reset(vcpu);
2713 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2715 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2719 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2721 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2725 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2726 struct kvm_sregs *sregs)
2728 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2729 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2733 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2734 struct kvm_sregs *sregs)
2736 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2737 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2741 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743 if (test_fp_ctl(fpu->fpc))
2745 vcpu->run->s.regs.fpc = fpu->fpc;
2747 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2748 (freg_t *) fpu->fprs);
2750 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2754 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2756 /* make sure we have the latest values */
2759 convert_vx_to_fp((freg_t *) fpu->fprs,
2760 (__vector128 *) vcpu->run->s.regs.vrs);
2762 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2763 fpu->fpc = vcpu->run->s.regs.fpc;
2767 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2771 if (!is_vcpu_stopped(vcpu))
2774 vcpu->run->psw_mask = psw.mask;
2775 vcpu->run->psw_addr = psw.addr;
2780 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2781 struct kvm_translation *tr)
2783 return -EINVAL; /* not implemented yet */
2786 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2787 KVM_GUESTDBG_USE_HW_BP | \
2788 KVM_GUESTDBG_ENABLE)
2790 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2791 struct kvm_guest_debug *dbg)
2795 vcpu->guest_debug = 0;
2796 kvm_s390_clear_bp_data(vcpu);
2798 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2800 if (!sclp.has_gpere)
2803 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2804 vcpu->guest_debug = dbg->control;
2805 /* enforce guest PER */
2806 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2809 rc = kvm_s390_import_bp_data(vcpu, dbg);
2811 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2812 vcpu->arch.guestdbg.last_bp = 0;
2816 vcpu->guest_debug = 0;
2817 kvm_s390_clear_bp_data(vcpu);
2818 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2825 struct kvm_mp_state *mp_state)
2827 /* CHECK_STOP and LOAD are not supported yet */
2828 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2829 KVM_MP_STATE_OPERATING;
2832 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2833 struct kvm_mp_state *mp_state)
2837 /* user space knows about this interface - let it control the state */
2838 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2840 switch (mp_state->mp_state) {
2841 case KVM_MP_STATE_STOPPED:
2842 kvm_s390_vcpu_stop(vcpu);
2844 case KVM_MP_STATE_OPERATING:
2845 kvm_s390_vcpu_start(vcpu);
2847 case KVM_MP_STATE_LOAD:
2848 case KVM_MP_STATE_CHECK_STOP:
2849 /* fall through - CHECK_STOP and LOAD are not supported yet */
2857 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2859 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2862 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2865 kvm_s390_vcpu_request_handled(vcpu);
2866 if (!kvm_request_pending(vcpu))
2869 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2870 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2871 * This ensures that the ipte instruction for this request has
2872 * already finished. We might race against a second unmapper that
2873 * wants to set the blocking bit. Lets just retry the request loop.
2875 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2877 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2878 kvm_s390_get_prefix(vcpu),
2879 PAGE_SIZE * 2, PROT_WRITE);
2881 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2887 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2888 vcpu->arch.sie_block->ihcpu = 0xffff;
2892 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2893 if (!ibs_enabled(vcpu)) {
2894 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2895 atomic_or(CPUSTAT_IBS,
2896 &vcpu->arch.sie_block->cpuflags);
2901 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2902 if (ibs_enabled(vcpu)) {
2903 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2904 atomic_andnot(CPUSTAT_IBS,
2905 &vcpu->arch.sie_block->cpuflags);
2910 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2911 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2915 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2917 * Disable CMMA virtualization; we will emulate the ESSA
2918 * instruction manually, in order to provide additional
2919 * functionalities needed for live migration.
2921 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2925 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2927 * Re-enable CMMA virtualization if CMMA is available and
2930 if ((vcpu->kvm->arch.use_cmma) &&
2931 (vcpu->kvm->mm->context.use_cmma))
2932 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2936 /* nothing to do, just clear the request */
2937 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2942 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2943 const struct kvm_s390_vm_tod_clock *gtod)
2945 struct kvm_vcpu *vcpu;
2946 struct kvm_s390_tod_clock_ext htod;
2949 mutex_lock(&kvm->lock);
2952 get_tod_clock_ext((char *)&htod);
2954 kvm->arch.epoch = gtod->tod - htod.tod;
2955 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2957 if (kvm->arch.epoch > gtod->tod)
2958 kvm->arch.epdx -= 1;
2960 kvm_s390_vcpu_block_all(kvm);
2961 kvm_for_each_vcpu(i, vcpu, kvm) {
2962 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2963 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2966 kvm_s390_vcpu_unblock_all(kvm);
2968 mutex_unlock(&kvm->lock);
2971 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2973 struct kvm_vcpu *vcpu;
2976 mutex_lock(&kvm->lock);
2978 kvm->arch.epoch = tod - get_tod_clock();
2979 kvm_s390_vcpu_block_all(kvm);
2980 kvm_for_each_vcpu(i, vcpu, kvm)
2981 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2982 kvm_s390_vcpu_unblock_all(kvm);
2984 mutex_unlock(&kvm->lock);
2988 * kvm_arch_fault_in_page - fault-in guest page if necessary
2989 * @vcpu: The corresponding virtual cpu
2990 * @gpa: Guest physical address
2991 * @writable: Whether the page should be writable or not
2993 * Make sure that a guest page has been faulted-in on the host.
2995 * Return: Zero on success, negative error code otherwise.
2997 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2999 return gmap_fault(vcpu->arch.gmap, gpa,
3000 writable ? FAULT_FLAG_WRITE : 0);
3003 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3004 unsigned long token)
3006 struct kvm_s390_interrupt inti;
3007 struct kvm_s390_irq irq;
3010 irq.u.ext.ext_params2 = token;
3011 irq.type = KVM_S390_INT_PFAULT_INIT;
3012 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3014 inti.type = KVM_S390_INT_PFAULT_DONE;
3015 inti.parm64 = token;
3016 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3020 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3021 struct kvm_async_pf *work)
3023 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3024 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3027 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3028 struct kvm_async_pf *work)
3030 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3031 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3034 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3035 struct kvm_async_pf *work)
3037 /* s390 will always inject the page directly */
3040 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3043 * s390 will always inject the page directly,
3044 * but we still want check_async_completion to cleanup
3049 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3052 struct kvm_arch_async_pf arch;
3055 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3057 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3058 vcpu->arch.pfault_compare)
3060 if (psw_extint_disabled(vcpu))
3062 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3064 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3066 if (!vcpu->arch.gmap->pfault_enabled)
3069 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3070 hva += current->thread.gmap_addr & ~PAGE_MASK;
3071 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3074 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3078 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3083 * On s390 notifications for arriving pages will be delivered directly
3084 * to the guest but the house keeping for completed pfaults is
3085 * handled outside the worker.
3087 kvm_check_async_pf_completion(vcpu);
3089 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3090 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3095 if (test_cpu_flag(CIF_MCCK_PENDING))
3098 if (!kvm_is_ucontrol(vcpu->kvm)) {
3099 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3104 rc = kvm_s390_handle_requests(vcpu);
3108 if (guestdbg_enabled(vcpu)) {
3109 kvm_s390_backup_guest_per_regs(vcpu);
3110 kvm_s390_patch_guest_per_regs(vcpu);
3113 vcpu->arch.sie_block->icptcode = 0;
3114 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3115 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3116 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3121 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3123 struct kvm_s390_pgm_info pgm_info = {
3124 .code = PGM_ADDRESSING,
3129 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3130 trace_kvm_s390_sie_fault(vcpu);
3133 * We want to inject an addressing exception, which is defined as a
3134 * suppressing or terminating exception. However, since we came here
3135 * by a DAT access exception, the PSW still points to the faulting
3136 * instruction since DAT exceptions are nullifying. So we've got
3137 * to look up the current opcode to get the length of the instruction
3138 * to be able to forward the PSW.
3140 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3141 ilen = insn_length(opcode);
3145 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3146 * Forward by arbitrary ilc, injection will take care of
3147 * nullification if necessary.
3149 pgm_info = vcpu->arch.pgm;
3152 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3153 kvm_s390_forward_psw(vcpu, ilen);
3154 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3157 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3159 struct mcck_volatile_info *mcck_info;
3160 struct sie_page *sie_page;
3162 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3163 vcpu->arch.sie_block->icptcode);
3164 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3166 if (guestdbg_enabled(vcpu))
3167 kvm_s390_restore_guest_per_regs(vcpu);
3169 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3170 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3172 if (exit_reason == -EINTR) {
3173 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3174 sie_page = container_of(vcpu->arch.sie_block,
3175 struct sie_page, sie_block);
3176 mcck_info = &sie_page->mcck_info;
3177 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3181 if (vcpu->arch.sie_block->icptcode > 0) {
3182 int rc = kvm_handle_sie_intercept(vcpu);
3184 if (rc != -EOPNOTSUPP)
3186 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3187 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3188 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3189 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3191 } else if (exit_reason != -EFAULT) {
3192 vcpu->stat.exit_null++;
3194 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3195 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3196 vcpu->run->s390_ucontrol.trans_exc_code =
3197 current->thread.gmap_addr;
3198 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3200 } else if (current->thread.gmap_pfault) {
3201 trace_kvm_s390_major_guest_pfault(vcpu);
3202 current->thread.gmap_pfault = 0;
3203 if (kvm_arch_setup_async_pf(vcpu))
3205 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3207 return vcpu_post_run_fault_in_sie(vcpu);
3210 static int __vcpu_run(struct kvm_vcpu *vcpu)
3212 int rc, exit_reason;
3215 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3216 * ning the guest), so that memslots (and other stuff) are protected
3218 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3221 rc = vcpu_pre_run(vcpu);
3225 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3227 * As PF_VCPU will be used in fault handler, between
3228 * guest_enter and guest_exit should be no uaccess.
3230 local_irq_disable();
3231 guest_enter_irqoff();
3232 __disable_cpu_timer_accounting(vcpu);
3234 exit_reason = sie64a(vcpu->arch.sie_block,
3235 vcpu->run->s.regs.gprs);
3236 local_irq_disable();
3237 __enable_cpu_timer_accounting(vcpu);
3238 guest_exit_irqoff();
3240 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3242 rc = vcpu_post_run(vcpu, exit_reason);
3243 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3245 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3249 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3251 struct runtime_instr_cb *riccb;
3254 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3255 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3256 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3257 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3258 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3259 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3260 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3261 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3262 /* some control register changes require a tlb flush */
3263 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3265 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3266 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3267 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3268 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3269 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3270 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3272 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3273 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3274 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3275 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3276 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3277 kvm_clear_async_pf_completion_queue(vcpu);
3280 * If userspace sets the riccb (e.g. after migration) to a valid state,
3281 * we should enable RI here instead of doing the lazy enablement.
3283 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3284 test_kvm_facility(vcpu->kvm, 64) &&
3286 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3287 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3288 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3291 * If userspace sets the gscb (e.g. after migration) to non-zero,
3292 * we should enable GS here instead of doing the lazy enablement.
3294 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3295 test_kvm_facility(vcpu->kvm, 133) &&
3297 !vcpu->arch.gs_enabled) {
3298 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3299 vcpu->arch.sie_block->ecb |= ECB_GS;
3300 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3301 vcpu->arch.gs_enabled = 1;
3303 save_access_regs(vcpu->arch.host_acrs);
3304 restore_access_regs(vcpu->run->s.regs.acrs);
3305 /* save host (userspace) fprs/vrs */
3307 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3308 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3310 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3312 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3313 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3314 if (test_fp_ctl(current->thread.fpu.fpc))
3315 /* User space provided an invalid FPC, let's clear it */
3316 current->thread.fpu.fpc = 0;
3317 if (MACHINE_HAS_GS) {
3319 __ctl_set_bit(2, 4);
3320 if (current->thread.gs_cb) {
3321 vcpu->arch.host_gscb = current->thread.gs_cb;
3322 save_gs_cb(vcpu->arch.host_gscb);
3324 if (vcpu->arch.gs_enabled) {
3325 current->thread.gs_cb = (struct gs_cb *)
3326 &vcpu->run->s.regs.gscb;
3327 restore_gs_cb(current->thread.gs_cb);
3332 kvm_run->kvm_dirty_regs = 0;
3335 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3338 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3339 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3340 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3341 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3342 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3343 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3344 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3345 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3346 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3347 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3348 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3349 save_access_regs(vcpu->run->s.regs.acrs);
3350 restore_access_regs(vcpu->arch.host_acrs);
3351 /* Save guest register state */
3353 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3354 /* Restore will be done lazily at return */
3355 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3356 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3357 if (MACHINE_HAS_GS) {
3358 __ctl_set_bit(2, 4);
3359 if (vcpu->arch.gs_enabled)
3360 save_gs_cb(current->thread.gs_cb);
3362 current->thread.gs_cb = vcpu->arch.host_gscb;
3363 restore_gs_cb(vcpu->arch.host_gscb);
3365 if (!vcpu->arch.host_gscb)
3366 __ctl_clear_bit(2, 4);
3367 vcpu->arch.host_gscb = NULL;
3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3376 if (kvm_run->immediate_exit)
3379 if (guestdbg_exit_pending(vcpu)) {
3380 kvm_s390_prepare_debug_exit(vcpu);
3384 kvm_sigset_activate(vcpu);
3386 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3387 kvm_s390_vcpu_start(vcpu);
3388 } else if (is_vcpu_stopped(vcpu)) {
3389 pr_err_ratelimited("can't run stopped vcpu %d\n",
3394 sync_regs(vcpu, kvm_run);
3395 enable_cpu_timer_accounting(vcpu);
3398 rc = __vcpu_run(vcpu);
3400 if (signal_pending(current) && !rc) {
3401 kvm_run->exit_reason = KVM_EXIT_INTR;
3405 if (guestdbg_exit_pending(vcpu) && !rc) {
3406 kvm_s390_prepare_debug_exit(vcpu);
3410 if (rc == -EREMOTE) {
3411 /* userspace support is needed, kvm_run has been prepared */
3415 disable_cpu_timer_accounting(vcpu);
3416 store_regs(vcpu, kvm_run);
3418 kvm_sigset_deactivate(vcpu);
3420 vcpu->stat.exit_userspace++;
3425 * store status at address
3426 * we use have two special cases:
3427 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3428 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3430 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3432 unsigned char archmode = 1;
3433 freg_t fprs[NUM_FPRS];
3438 px = kvm_s390_get_prefix(vcpu);
3439 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3440 if (write_guest_abs(vcpu, 163, &archmode, 1))
3443 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3444 if (write_guest_real(vcpu, 163, &archmode, 1))
3448 gpa -= __LC_FPREGS_SAVE_AREA;
3450 /* manually convert vector registers if necessary */
3451 if (MACHINE_HAS_VX) {
3452 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3453 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3456 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3457 vcpu->run->s.regs.fprs, 128);
3459 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3460 vcpu->run->s.regs.gprs, 128);
3461 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3462 &vcpu->arch.sie_block->gpsw, 16);
3463 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3465 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3466 &vcpu->run->s.regs.fpc, 4);
3467 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3468 &vcpu->arch.sie_block->todpr, 4);
3469 cputm = kvm_s390_get_cpu_timer(vcpu);
3470 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3472 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3473 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3475 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3476 &vcpu->run->s.regs.acrs, 64);
3477 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3478 &vcpu->arch.sie_block->gcr, 128);
3479 return rc ? -EFAULT : 0;
3482 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3485 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3486 * switch in the run ioctl. Let's update our copies before we save
3487 * it into the save area
3490 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491 save_access_regs(vcpu->run->s.regs.acrs);
3493 return kvm_s390_store_status_unloaded(vcpu, addr);
3496 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3498 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3499 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3502 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3505 struct kvm_vcpu *vcpu;
3507 kvm_for_each_vcpu(i, vcpu, kvm) {
3508 __disable_ibs_on_vcpu(vcpu);
3512 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3516 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3517 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3520 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3522 int i, online_vcpus, started_vcpus = 0;
3524 if (!is_vcpu_stopped(vcpu))
3527 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3528 /* Only one cpu at a time may enter/leave the STOPPED state. */
3529 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3530 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3532 for (i = 0; i < online_vcpus; i++) {
3533 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3537 if (started_vcpus == 0) {
3538 /* we're the only active VCPU -> speed it up */
3539 __enable_ibs_on_vcpu(vcpu);
3540 } else if (started_vcpus == 1) {
3542 * As we are starting a second VCPU, we have to disable
3543 * the IBS facility on all VCPUs to remove potentially
3544 * oustanding ENABLE requests.
3546 __disable_ibs_on_all_vcpus(vcpu->kvm);
3549 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3551 * Another VCPU might have used IBS while we were offline.
3552 * Let's play safe and flush the VCPU at startup.
3554 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3555 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3559 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3561 int i, online_vcpus, started_vcpus = 0;
3562 struct kvm_vcpu *started_vcpu = NULL;
3564 if (is_vcpu_stopped(vcpu))
3567 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3568 /* Only one cpu at a time may enter/leave the STOPPED state. */
3569 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3572 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3573 kvm_s390_clear_stop_irq(vcpu);
3575 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3576 __disable_ibs_on_vcpu(vcpu);
3578 for (i = 0; i < online_vcpus; i++) {
3579 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3581 started_vcpu = vcpu->kvm->vcpus[i];
3585 if (started_vcpus == 1) {
3587 * As we only have one VCPU left, we want to enable the
3588 * IBS facility for that VCPU to speed it up.
3590 __enable_ibs_on_vcpu(started_vcpu);
3593 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3597 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3598 struct kvm_enable_cap *cap)
3606 case KVM_CAP_S390_CSS_SUPPORT:
3607 if (!vcpu->kvm->arch.css_support) {
3608 vcpu->kvm->arch.css_support = 1;
3609 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3610 trace_kvm_s390_enable_css(vcpu->kvm);
3621 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3622 struct kvm_s390_mem_op *mop)
3624 void __user *uaddr = (void __user *)mop->buf;
3625 void *tmpbuf = NULL;
3627 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3628 | KVM_S390_MEMOP_F_CHECK_ONLY;
3630 if (mop->flags & ~supported_flags)
3633 if (mop->size > MEM_OP_MAX_SIZE)
3636 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3637 tmpbuf = vmalloc(mop->size);
3642 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3645 case KVM_S390_MEMOP_LOGICAL_READ:
3646 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3647 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3648 mop->size, GACC_FETCH);
3651 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3653 if (copy_to_user(uaddr, tmpbuf, mop->size))
3657 case KVM_S390_MEMOP_LOGICAL_WRITE:
3658 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3659 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3660 mop->size, GACC_STORE);
3663 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3667 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3673 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3675 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3676 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3682 long kvm_arch_vcpu_ioctl(struct file *filp,
3683 unsigned int ioctl, unsigned long arg)
3685 struct kvm_vcpu *vcpu = filp->private_data;
3686 void __user *argp = (void __user *)arg;
3691 case KVM_S390_IRQ: {
3692 struct kvm_s390_irq s390irq;
3695 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3697 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3700 case KVM_S390_INTERRUPT: {
3701 struct kvm_s390_interrupt s390int;
3702 struct kvm_s390_irq s390irq;
3705 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3707 if (s390int_to_s390irq(&s390int, &s390irq))
3709 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3712 case KVM_S390_STORE_STATUS:
3713 idx = srcu_read_lock(&vcpu->kvm->srcu);
3714 r = kvm_s390_vcpu_store_status(vcpu, arg);
3715 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3717 case KVM_S390_SET_INITIAL_PSW: {
3721 if (copy_from_user(&psw, argp, sizeof(psw)))
3723 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3726 case KVM_S390_INITIAL_RESET:
3727 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3729 case KVM_SET_ONE_REG:
3730 case KVM_GET_ONE_REG: {
3731 struct kvm_one_reg reg;
3733 if (copy_from_user(®, argp, sizeof(reg)))
3735 if (ioctl == KVM_SET_ONE_REG)
3736 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3738 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3741 #ifdef CONFIG_KVM_S390_UCONTROL
3742 case KVM_S390_UCAS_MAP: {
3743 struct kvm_s390_ucas_mapping ucasmap;
3745 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3750 if (!kvm_is_ucontrol(vcpu->kvm)) {
3755 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3756 ucasmap.vcpu_addr, ucasmap.length);
3759 case KVM_S390_UCAS_UNMAP: {
3760 struct kvm_s390_ucas_mapping ucasmap;
3762 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3767 if (!kvm_is_ucontrol(vcpu->kvm)) {
3772 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3777 case KVM_S390_VCPU_FAULT: {
3778 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3781 case KVM_ENABLE_CAP:
3783 struct kvm_enable_cap cap;
3785 if (copy_from_user(&cap, argp, sizeof(cap)))
3787 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3790 case KVM_S390_MEM_OP: {
3791 struct kvm_s390_mem_op mem_op;
3793 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3794 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3799 case KVM_S390_SET_IRQ_STATE: {
3800 struct kvm_s390_irq_state irq_state;
3803 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3805 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3806 irq_state.len == 0 ||
3807 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3811 r = kvm_s390_set_irq_state(vcpu,
3812 (void __user *) irq_state.buf,
3816 case KVM_S390_GET_IRQ_STATE: {
3817 struct kvm_s390_irq_state irq_state;
3820 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3822 if (irq_state.len == 0) {
3826 r = kvm_s390_get_irq_state(vcpu,
3827 (__u8 __user *) irq_state.buf,
3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3839 #ifdef CONFIG_KVM_S390_UCONTROL
3840 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3841 && (kvm_is_ucontrol(vcpu->kvm))) {
3842 vmf->page = virt_to_page(vcpu->arch.sie_block);
3843 get_page(vmf->page);
3847 return VM_FAULT_SIGBUS;
3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3851 unsigned long npages)
3856 /* Section: memory related */
3857 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3858 struct kvm_memory_slot *memslot,
3859 const struct kvm_userspace_memory_region *mem,
3860 enum kvm_mr_change change)
3862 /* A few sanity checks. We can have memory slots which have to be
3863 located/ended at a segment boundary (1MB). The memory in userland is
3864 ok to be fragmented into various different vmas. It is okay to mmap()
3865 and munmap() stuff in this slot after doing this call at any time */
3867 if (mem->userspace_addr & 0xffffful)
3870 if (mem->memory_size & 0xffffful)
3873 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3879 void kvm_arch_commit_memory_region(struct kvm *kvm,
3880 const struct kvm_userspace_memory_region *mem,
3881 const struct kvm_memory_slot *old,
3882 const struct kvm_memory_slot *new,
3883 enum kvm_mr_change change)
3887 /* If the basics of the memslot do not change, we do not want
3888 * to update the gmap. Every update causes several unnecessary
3889 * segment translation exceptions. This is usually handled just
3890 * fine by the normal fault handler + gmap, but it will also
3891 * cause faults on the prefix page of running guest CPUs.
3893 if (old->userspace_addr == mem->userspace_addr &&
3894 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3895 old->npages * PAGE_SIZE == mem->memory_size)
3898 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3899 mem->guest_phys_addr, mem->memory_size);
3901 pr_warn("failed to commit memory region\n");
3905 static inline unsigned long nonhyp_mask(int i)
3907 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3909 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3914 vcpu->valid_wakeup = false;
3917 static int __init kvm_s390_init(void)
3921 if (!sclp.has_sief2) {
3922 pr_info("SIE not available\n");
3926 for (i = 0; i < 16; i++)
3927 kvm_s390_fac_list_mask[i] |=
3928 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3930 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3933 static void __exit kvm_s390_exit(void)
3938 module_init(kvm_s390_init);
3939 module_exit(kvm_s390_exit);
3942 * Enable autoloading of the kvm module.
3943 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3944 * since x86 takes a different approach.
3946 #include <linux/miscdevice.h>
3947 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3948 MODULE_ALIAS("devname:kvm");