502de74ea984b4d56ccb935f1f042cb593066692
[platform/kernel/linux-exynos.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc;
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323         if (!kvm_s390_dbf)
324                 return -ENOMEM;
325
326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327                 debug_unregister(kvm_s390_dbf);
328                 return -ENOMEM;
329         }
330
331         kvm_s390_cpu_feat_init();
332
333         /* Register floating interrupt controller interface. */
334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335 }
336
337 void kvm_arch_exit(void)
338 {
339         debug_unregister(kvm_s390_dbf);
340 }
341
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344                         unsigned int ioctl, unsigned long arg)
345 {
346         if (ioctl == KVM_S390_ENABLE_SIE)
347                 return s390_enable_sie();
348         return -EINVAL;
349 }
350
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352 {
353         int r;
354
355         switch (ext) {
356         case KVM_CAP_S390_PSW:
357         case KVM_CAP_S390_GMAP:
358         case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360         case KVM_CAP_S390_UCONTROL:
361 #endif
362         case KVM_CAP_ASYNC_PF:
363         case KVM_CAP_SYNC_REGS:
364         case KVM_CAP_ONE_REG:
365         case KVM_CAP_ENABLE_CAP:
366         case KVM_CAP_S390_CSS_SUPPORT:
367         case KVM_CAP_IOEVENTFD:
368         case KVM_CAP_DEVICE_CTRL:
369         case KVM_CAP_ENABLE_CAP_VM:
370         case KVM_CAP_S390_IRQCHIP:
371         case KVM_CAP_VM_ATTRIBUTES:
372         case KVM_CAP_MP_STATE:
373         case KVM_CAP_S390_INJECT_IRQ:
374         case KVM_CAP_S390_USER_SIGP:
375         case KVM_CAP_S390_USER_STSI:
376         case KVM_CAP_S390_SKEYS:
377         case KVM_CAP_S390_IRQ_STATE:
378         case KVM_CAP_S390_USER_INSTR0:
379                 r = 1;
380                 break;
381         case KVM_CAP_S390_MEM_OP:
382                 r = MEM_OP_MAX_SIZE;
383                 break;
384         case KVM_CAP_NR_VCPUS:
385         case KVM_CAP_MAX_VCPUS:
386                 r = KVM_S390_BSCA_CPU_SLOTS;
387                 if (!kvm_s390_use_sca_entries())
388                         r = KVM_MAX_VCPUS;
389                 else if (sclp.has_esca && sclp.has_64bscao)
390                         r = KVM_S390_ESCA_CPU_SLOTS;
391                 break;
392         case KVM_CAP_NR_MEMSLOTS:
393                 r = KVM_USER_MEM_SLOTS;
394                 break;
395         case KVM_CAP_S390_COW:
396                 r = MACHINE_HAS_ESOP;
397                 break;
398         case KVM_CAP_S390_VECTOR_REGISTERS:
399                 r = MACHINE_HAS_VX;
400                 break;
401         case KVM_CAP_S390_RI:
402                 r = test_facility(64);
403                 break;
404         default:
405                 r = 0;
406         }
407         return r;
408 }
409
410 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
411                                         struct kvm_memory_slot *memslot)
412 {
413         gfn_t cur_gfn, last_gfn;
414         unsigned long address;
415         struct gmap *gmap = kvm->arch.gmap;
416
417         /* Loop over all guest pages */
418         last_gfn = memslot->base_gfn + memslot->npages;
419         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
420                 address = gfn_to_hva_memslot(memslot, cur_gfn);
421
422                 if (test_and_clear_guest_dirty(gmap->mm, address))
423                         mark_page_dirty(kvm, cur_gfn);
424                 if (fatal_signal_pending(current))
425                         return;
426                 cond_resched();
427         }
428 }
429
430 /* Section: vm related */
431 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
432
433 /*
434  * Get (and clear) the dirty memory log for a memory slot.
435  */
436 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
437                                struct kvm_dirty_log *log)
438 {
439         int r;
440         unsigned long n;
441         struct kvm_memslots *slots;
442         struct kvm_memory_slot *memslot;
443         int is_dirty = 0;
444
445         if (kvm_is_ucontrol(kvm))
446                 return -EINVAL;
447
448         mutex_lock(&kvm->slots_lock);
449
450         r = -EINVAL;
451         if (log->slot >= KVM_USER_MEM_SLOTS)
452                 goto out;
453
454         slots = kvm_memslots(kvm);
455         memslot = id_to_memslot(slots, log->slot);
456         r = -ENOENT;
457         if (!memslot->dirty_bitmap)
458                 goto out;
459
460         kvm_s390_sync_dirty_log(kvm, memslot);
461         r = kvm_get_dirty_log(kvm, log, &is_dirty);
462         if (r)
463                 goto out;
464
465         /* Clear the dirty log */
466         if (is_dirty) {
467                 n = kvm_dirty_bitmap_bytes(memslot);
468                 memset(memslot->dirty_bitmap, 0, n);
469         }
470         r = 0;
471 out:
472         mutex_unlock(&kvm->slots_lock);
473         return r;
474 }
475
476 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
477 {
478         unsigned int i;
479         struct kvm_vcpu *vcpu;
480
481         kvm_for_each_vcpu(i, vcpu, kvm) {
482                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
483         }
484 }
485
486 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
487 {
488         int r;
489
490         if (cap->flags)
491                 return -EINVAL;
492
493         switch (cap->cap) {
494         case KVM_CAP_S390_IRQCHIP:
495                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
496                 kvm->arch.use_irqchip = 1;
497                 r = 0;
498                 break;
499         case KVM_CAP_S390_USER_SIGP:
500                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
501                 kvm->arch.user_sigp = 1;
502                 r = 0;
503                 break;
504         case KVM_CAP_S390_VECTOR_REGISTERS:
505                 mutex_lock(&kvm->lock);
506                 if (kvm->created_vcpus) {
507                         r = -EBUSY;
508                 } else if (MACHINE_HAS_VX) {
509                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
510                         set_kvm_facility(kvm->arch.model.fac_list, 129);
511                         if (test_facility(134)) {
512                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
513                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
514                         }
515                         if (test_facility(135)) {
516                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
517                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
518                         }
519                         r = 0;
520                 } else
521                         r = -EINVAL;
522                 mutex_unlock(&kvm->lock);
523                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
524                          r ? "(not available)" : "(success)");
525                 break;
526         case KVM_CAP_S390_RI:
527                 r = -EINVAL;
528                 mutex_lock(&kvm->lock);
529                 if (kvm->created_vcpus) {
530                         r = -EBUSY;
531                 } else if (test_facility(64)) {
532                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
533                         set_kvm_facility(kvm->arch.model.fac_list, 64);
534                         r = 0;
535                 }
536                 mutex_unlock(&kvm->lock);
537                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
538                          r ? "(not available)" : "(success)");
539                 break;
540         case KVM_CAP_S390_USER_STSI:
541                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
542                 kvm->arch.user_stsi = 1;
543                 r = 0;
544                 break;
545         case KVM_CAP_S390_USER_INSTR0:
546                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
547                 kvm->arch.user_instr0 = 1;
548                 icpt_operexc_on_all_vcpus(kvm);
549                 r = 0;
550                 break;
551         default:
552                 r = -EINVAL;
553                 break;
554         }
555         return r;
556 }
557
558 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
559 {
560         int ret;
561
562         switch (attr->attr) {
563         case KVM_S390_VM_MEM_LIMIT_SIZE:
564                 ret = 0;
565                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
566                          kvm->arch.mem_limit);
567                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
568                         ret = -EFAULT;
569                 break;
570         default:
571                 ret = -ENXIO;
572                 break;
573         }
574         return ret;
575 }
576
577 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
578 {
579         int ret;
580         unsigned int idx;
581         switch (attr->attr) {
582         case KVM_S390_VM_MEM_ENABLE_CMMA:
583                 ret = -ENXIO;
584                 if (!sclp.has_cmma)
585                         break;
586
587                 ret = -EBUSY;
588                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
589                 mutex_lock(&kvm->lock);
590                 if (!kvm->created_vcpus) {
591                         kvm->arch.use_cmma = 1;
592                         ret = 0;
593                 }
594                 mutex_unlock(&kvm->lock);
595                 break;
596         case KVM_S390_VM_MEM_CLR_CMMA:
597                 ret = -ENXIO;
598                 if (!sclp.has_cmma)
599                         break;
600                 ret = -EINVAL;
601                 if (!kvm->arch.use_cmma)
602                         break;
603
604                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
605                 mutex_lock(&kvm->lock);
606                 idx = srcu_read_lock(&kvm->srcu);
607                 s390_reset_cmma(kvm->arch.gmap->mm);
608                 srcu_read_unlock(&kvm->srcu, idx);
609                 mutex_unlock(&kvm->lock);
610                 ret = 0;
611                 break;
612         case KVM_S390_VM_MEM_LIMIT_SIZE: {
613                 unsigned long new_limit;
614
615                 if (kvm_is_ucontrol(kvm))
616                         return -EINVAL;
617
618                 if (get_user(new_limit, (u64 __user *)attr->addr))
619                         return -EFAULT;
620
621                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
622                     new_limit > kvm->arch.mem_limit)
623                         return -E2BIG;
624
625                 if (!new_limit)
626                         return -EINVAL;
627
628                 /* gmap_create takes last usable address */
629                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
630                         new_limit -= 1;
631
632                 ret = -EBUSY;
633                 mutex_lock(&kvm->lock);
634                 if (!kvm->created_vcpus) {
635                         /* gmap_create will round the limit up */
636                         struct gmap *new = gmap_create(current->mm, new_limit);
637
638                         if (!new) {
639                                 ret = -ENOMEM;
640                         } else {
641                                 gmap_remove(kvm->arch.gmap);
642                                 new->private = kvm;
643                                 kvm->arch.gmap = new;
644                                 ret = 0;
645                         }
646                 }
647                 mutex_unlock(&kvm->lock);
648                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
649                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
650                          (void *) kvm->arch.gmap->asce);
651                 break;
652         }
653         default:
654                 ret = -ENXIO;
655                 break;
656         }
657         return ret;
658 }
659
660 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
661
662 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
663 {
664         struct kvm_vcpu *vcpu;
665         int i;
666
667         if (!test_kvm_facility(kvm, 76))
668                 return -EINVAL;
669
670         mutex_lock(&kvm->lock);
671         switch (attr->attr) {
672         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
673                 get_random_bytes(
674                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
675                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
676                 kvm->arch.crypto.aes_kw = 1;
677                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
678                 break;
679         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
680                 get_random_bytes(
681                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
682                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
683                 kvm->arch.crypto.dea_kw = 1;
684                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
685                 break;
686         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
687                 kvm->arch.crypto.aes_kw = 0;
688                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
689                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
690                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
691                 break;
692         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
693                 kvm->arch.crypto.dea_kw = 0;
694                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
695                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
696                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
697                 break;
698         default:
699                 mutex_unlock(&kvm->lock);
700                 return -ENXIO;
701         }
702
703         kvm_for_each_vcpu(i, vcpu, kvm) {
704                 kvm_s390_vcpu_crypto_setup(vcpu);
705                 exit_sie(vcpu);
706         }
707         mutex_unlock(&kvm->lock);
708         return 0;
709 }
710
711 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713         u8 gtod_high;
714
715         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
716                                            sizeof(gtod_high)))
717                 return -EFAULT;
718
719         if (gtod_high != 0)
720                 return -EINVAL;
721         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
722
723         return 0;
724 }
725
726 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
727 {
728         u64 gtod;
729
730         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
731                 return -EFAULT;
732
733         kvm_s390_set_tod_clock(kvm, gtod);
734         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
735         return 0;
736 }
737
738 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
739 {
740         int ret;
741
742         if (attr->flags)
743                 return -EINVAL;
744
745         switch (attr->attr) {
746         case KVM_S390_VM_TOD_HIGH:
747                 ret = kvm_s390_set_tod_high(kvm, attr);
748                 break;
749         case KVM_S390_VM_TOD_LOW:
750                 ret = kvm_s390_set_tod_low(kvm, attr);
751                 break;
752         default:
753                 ret = -ENXIO;
754                 break;
755         }
756         return ret;
757 }
758
759 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
760 {
761         u8 gtod_high = 0;
762
763         if (copy_to_user((void __user *)attr->addr, &gtod_high,
764                                          sizeof(gtod_high)))
765                 return -EFAULT;
766         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
767
768         return 0;
769 }
770
771 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
772 {
773         u64 gtod;
774
775         gtod = kvm_s390_get_tod_clock_fast(kvm);
776         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
777                 return -EFAULT;
778         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
779
780         return 0;
781 }
782
783 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
784 {
785         int ret;
786
787         if (attr->flags)
788                 return -EINVAL;
789
790         switch (attr->attr) {
791         case KVM_S390_VM_TOD_HIGH:
792                 ret = kvm_s390_get_tod_high(kvm, attr);
793                 break;
794         case KVM_S390_VM_TOD_LOW:
795                 ret = kvm_s390_get_tod_low(kvm, attr);
796                 break;
797         default:
798                 ret = -ENXIO;
799                 break;
800         }
801         return ret;
802 }
803
804 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
805 {
806         struct kvm_s390_vm_cpu_processor *proc;
807         u16 lowest_ibc, unblocked_ibc;
808         int ret = 0;
809
810         mutex_lock(&kvm->lock);
811         if (kvm->created_vcpus) {
812                 ret = -EBUSY;
813                 goto out;
814         }
815         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
816         if (!proc) {
817                 ret = -ENOMEM;
818                 goto out;
819         }
820         if (!copy_from_user(proc, (void __user *)attr->addr,
821                             sizeof(*proc))) {
822                 kvm->arch.model.cpuid = proc->cpuid;
823                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
824                 unblocked_ibc = sclp.ibc & 0xfff;
825                 if (lowest_ibc && proc->ibc) {
826                         if (proc->ibc > unblocked_ibc)
827                                 kvm->arch.model.ibc = unblocked_ibc;
828                         else if (proc->ibc < lowest_ibc)
829                                 kvm->arch.model.ibc = lowest_ibc;
830                         else
831                                 kvm->arch.model.ibc = proc->ibc;
832                 }
833                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
834                        S390_ARCH_FAC_LIST_SIZE_BYTE);
835                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
836                          kvm->arch.model.ibc,
837                          kvm->arch.model.cpuid);
838                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
839                          kvm->arch.model.fac_list[0],
840                          kvm->arch.model.fac_list[1],
841                          kvm->arch.model.fac_list[2]);
842         } else
843                 ret = -EFAULT;
844         kfree(proc);
845 out:
846         mutex_unlock(&kvm->lock);
847         return ret;
848 }
849
850 static int kvm_s390_set_processor_feat(struct kvm *kvm,
851                                        struct kvm_device_attr *attr)
852 {
853         struct kvm_s390_vm_cpu_feat data;
854         int ret = -EBUSY;
855
856         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
857                 return -EFAULT;
858         if (!bitmap_subset((unsigned long *) data.feat,
859                            kvm_s390_available_cpu_feat,
860                            KVM_S390_VM_CPU_FEAT_NR_BITS))
861                 return -EINVAL;
862
863         mutex_lock(&kvm->lock);
864         if (!atomic_read(&kvm->online_vcpus)) {
865                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
866                             KVM_S390_VM_CPU_FEAT_NR_BITS);
867                 ret = 0;
868         }
869         mutex_unlock(&kvm->lock);
870         return ret;
871 }
872
873 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
874                                           struct kvm_device_attr *attr)
875 {
876         /*
877          * Once supported by kernel + hw, we have to store the subfunctions
878          * in kvm->arch and remember that user space configured them.
879          */
880         return -ENXIO;
881 }
882
883 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885         int ret = -ENXIO;
886
887         switch (attr->attr) {
888         case KVM_S390_VM_CPU_PROCESSOR:
889                 ret = kvm_s390_set_processor(kvm, attr);
890                 break;
891         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
892                 ret = kvm_s390_set_processor_feat(kvm, attr);
893                 break;
894         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
895                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
896                 break;
897         }
898         return ret;
899 }
900
901 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
902 {
903         struct kvm_s390_vm_cpu_processor *proc;
904         int ret = 0;
905
906         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
907         if (!proc) {
908                 ret = -ENOMEM;
909                 goto out;
910         }
911         proc->cpuid = kvm->arch.model.cpuid;
912         proc->ibc = kvm->arch.model.ibc;
913         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
914                S390_ARCH_FAC_LIST_SIZE_BYTE);
915         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
916                  kvm->arch.model.ibc,
917                  kvm->arch.model.cpuid);
918         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
919                  kvm->arch.model.fac_list[0],
920                  kvm->arch.model.fac_list[1],
921                  kvm->arch.model.fac_list[2]);
922         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
923                 ret = -EFAULT;
924         kfree(proc);
925 out:
926         return ret;
927 }
928
929 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
930 {
931         struct kvm_s390_vm_cpu_machine *mach;
932         int ret = 0;
933
934         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
935         if (!mach) {
936                 ret = -ENOMEM;
937                 goto out;
938         }
939         get_cpu_id((struct cpuid *) &mach->cpuid);
940         mach->ibc = sclp.ibc;
941         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
942                S390_ARCH_FAC_LIST_SIZE_BYTE);
943         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
944                sizeof(S390_lowcore.stfle_fac_list));
945         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
946                  kvm->arch.model.ibc,
947                  kvm->arch.model.cpuid);
948         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
949                  mach->fac_mask[0],
950                  mach->fac_mask[1],
951                  mach->fac_mask[2]);
952         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
953                  mach->fac_list[0],
954                  mach->fac_list[1],
955                  mach->fac_list[2]);
956         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
957                 ret = -EFAULT;
958         kfree(mach);
959 out:
960         return ret;
961 }
962
963 static int kvm_s390_get_processor_feat(struct kvm *kvm,
964                                        struct kvm_device_attr *attr)
965 {
966         struct kvm_s390_vm_cpu_feat data;
967
968         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
969                     KVM_S390_VM_CPU_FEAT_NR_BITS);
970         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
971                 return -EFAULT;
972         return 0;
973 }
974
975 static int kvm_s390_get_machine_feat(struct kvm *kvm,
976                                      struct kvm_device_attr *attr)
977 {
978         struct kvm_s390_vm_cpu_feat data;
979
980         bitmap_copy((unsigned long *) data.feat,
981                     kvm_s390_available_cpu_feat,
982                     KVM_S390_VM_CPU_FEAT_NR_BITS);
983         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
984                 return -EFAULT;
985         return 0;
986 }
987
988 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
989                                           struct kvm_device_attr *attr)
990 {
991         /*
992          * Once we can actually configure subfunctions (kernel + hw support),
993          * we have to check if they were already set by user space, if so copy
994          * them from kvm->arch.
995          */
996         return -ENXIO;
997 }
998
999 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1000                                         struct kvm_device_attr *attr)
1001 {
1002         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1003             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1004                 return -EFAULT;
1005         return 0;
1006 }
1007 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1008 {
1009         int ret = -ENXIO;
1010
1011         switch (attr->attr) {
1012         case KVM_S390_VM_CPU_PROCESSOR:
1013                 ret = kvm_s390_get_processor(kvm, attr);
1014                 break;
1015         case KVM_S390_VM_CPU_MACHINE:
1016                 ret = kvm_s390_get_machine(kvm, attr);
1017                 break;
1018         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1019                 ret = kvm_s390_get_processor_feat(kvm, attr);
1020                 break;
1021         case KVM_S390_VM_CPU_MACHINE_FEAT:
1022                 ret = kvm_s390_get_machine_feat(kvm, attr);
1023                 break;
1024         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1025                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1026                 break;
1027         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1028                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1029                 break;
1030         }
1031         return ret;
1032 }
1033
1034 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1035 {
1036         int ret;
1037
1038         switch (attr->group) {
1039         case KVM_S390_VM_MEM_CTRL:
1040                 ret = kvm_s390_set_mem_control(kvm, attr);
1041                 break;
1042         case KVM_S390_VM_TOD:
1043                 ret = kvm_s390_set_tod(kvm, attr);
1044                 break;
1045         case KVM_S390_VM_CPU_MODEL:
1046                 ret = kvm_s390_set_cpu_model(kvm, attr);
1047                 break;
1048         case KVM_S390_VM_CRYPTO:
1049                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1050                 break;
1051         default:
1052                 ret = -ENXIO;
1053                 break;
1054         }
1055
1056         return ret;
1057 }
1058
1059 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1060 {
1061         int ret;
1062
1063         switch (attr->group) {
1064         case KVM_S390_VM_MEM_CTRL:
1065                 ret = kvm_s390_get_mem_control(kvm, attr);
1066                 break;
1067         case KVM_S390_VM_TOD:
1068                 ret = kvm_s390_get_tod(kvm, attr);
1069                 break;
1070         case KVM_S390_VM_CPU_MODEL:
1071                 ret = kvm_s390_get_cpu_model(kvm, attr);
1072                 break;
1073         default:
1074                 ret = -ENXIO;
1075                 break;
1076         }
1077
1078         return ret;
1079 }
1080
1081 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1082 {
1083         int ret;
1084
1085         switch (attr->group) {
1086         case KVM_S390_VM_MEM_CTRL:
1087                 switch (attr->attr) {
1088                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1089                 case KVM_S390_VM_MEM_CLR_CMMA:
1090                         ret = sclp.has_cmma ? 0 : -ENXIO;
1091                         break;
1092                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1093                         ret = 0;
1094                         break;
1095                 default:
1096                         ret = -ENXIO;
1097                         break;
1098                 }
1099                 break;
1100         case KVM_S390_VM_TOD:
1101                 switch (attr->attr) {
1102                 case KVM_S390_VM_TOD_LOW:
1103                 case KVM_S390_VM_TOD_HIGH:
1104                         ret = 0;
1105                         break;
1106                 default:
1107                         ret = -ENXIO;
1108                         break;
1109                 }
1110                 break;
1111         case KVM_S390_VM_CPU_MODEL:
1112                 switch (attr->attr) {
1113                 case KVM_S390_VM_CPU_PROCESSOR:
1114                 case KVM_S390_VM_CPU_MACHINE:
1115                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1116                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1117                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1118                         ret = 0;
1119                         break;
1120                 /* configuring subfunctions is not supported yet */
1121                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1122                 default:
1123                         ret = -ENXIO;
1124                         break;
1125                 }
1126                 break;
1127         case KVM_S390_VM_CRYPTO:
1128                 switch (attr->attr) {
1129                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1130                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1131                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1132                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1133                         ret = 0;
1134                         break;
1135                 default:
1136                         ret = -ENXIO;
1137                         break;
1138                 }
1139                 break;
1140         default:
1141                 ret = -ENXIO;
1142                 break;
1143         }
1144
1145         return ret;
1146 }
1147
1148 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1149 {
1150         uint8_t *keys;
1151         uint64_t hva;
1152         int i, r = 0;
1153
1154         if (args->flags != 0)
1155                 return -EINVAL;
1156
1157         /* Is this guest using storage keys? */
1158         if (!mm_use_skey(current->mm))
1159                 return KVM_S390_GET_SKEYS_NONE;
1160
1161         /* Enforce sane limit on memory allocation */
1162         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1163                 return -EINVAL;
1164
1165         keys = kmalloc_array(args->count, sizeof(uint8_t),
1166                              GFP_KERNEL | __GFP_NOWARN);
1167         if (!keys)
1168                 keys = vmalloc(sizeof(uint8_t) * args->count);
1169         if (!keys)
1170                 return -ENOMEM;
1171
1172         down_read(&current->mm->mmap_sem);
1173         for (i = 0; i < args->count; i++) {
1174                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1175                 if (kvm_is_error_hva(hva)) {
1176                         r = -EFAULT;
1177                         break;
1178                 }
1179
1180                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1181                 if (r)
1182                         break;
1183         }
1184         up_read(&current->mm->mmap_sem);
1185
1186         if (!r) {
1187                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1188                                  sizeof(uint8_t) * args->count);
1189                 if (r)
1190                         r = -EFAULT;
1191         }
1192
1193         kvfree(keys);
1194         return r;
1195 }
1196
1197 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1198 {
1199         uint8_t *keys;
1200         uint64_t hva;
1201         int i, r = 0;
1202
1203         if (args->flags != 0)
1204                 return -EINVAL;
1205
1206         /* Enforce sane limit on memory allocation */
1207         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1208                 return -EINVAL;
1209
1210         keys = kmalloc_array(args->count, sizeof(uint8_t),
1211                              GFP_KERNEL | __GFP_NOWARN);
1212         if (!keys)
1213                 keys = vmalloc(sizeof(uint8_t) * args->count);
1214         if (!keys)
1215                 return -ENOMEM;
1216
1217         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1218                            sizeof(uint8_t) * args->count);
1219         if (r) {
1220                 r = -EFAULT;
1221                 goto out;
1222         }
1223
1224         /* Enable storage key handling for the guest */
1225         r = s390_enable_skey();
1226         if (r)
1227                 goto out;
1228
1229         down_read(&current->mm->mmap_sem);
1230         for (i = 0; i < args->count; i++) {
1231                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1232                 if (kvm_is_error_hva(hva)) {
1233                         r = -EFAULT;
1234                         break;
1235                 }
1236
1237                 /* Lowest order bit is reserved */
1238                 if (keys[i] & 0x01) {
1239                         r = -EINVAL;
1240                         break;
1241                 }
1242
1243                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1244                 if (r)
1245                         break;
1246         }
1247         up_read(&current->mm->mmap_sem);
1248 out:
1249         kvfree(keys);
1250         return r;
1251 }
1252
1253 long kvm_arch_vm_ioctl(struct file *filp,
1254                        unsigned int ioctl, unsigned long arg)
1255 {
1256         struct kvm *kvm = filp->private_data;
1257         void __user *argp = (void __user *)arg;
1258         struct kvm_device_attr attr;
1259         int r;
1260
1261         switch (ioctl) {
1262         case KVM_S390_INTERRUPT: {
1263                 struct kvm_s390_interrupt s390int;
1264
1265                 r = -EFAULT;
1266                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1267                         break;
1268                 r = kvm_s390_inject_vm(kvm, &s390int);
1269                 break;
1270         }
1271         case KVM_ENABLE_CAP: {
1272                 struct kvm_enable_cap cap;
1273                 r = -EFAULT;
1274                 if (copy_from_user(&cap, argp, sizeof(cap)))
1275                         break;
1276                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1277                 break;
1278         }
1279         case KVM_CREATE_IRQCHIP: {
1280                 struct kvm_irq_routing_entry routing;
1281
1282                 r = -EINVAL;
1283                 if (kvm->arch.use_irqchip) {
1284                         /* Set up dummy routing. */
1285                         memset(&routing, 0, sizeof(routing));
1286                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1287                 }
1288                 break;
1289         }
1290         case KVM_SET_DEVICE_ATTR: {
1291                 r = -EFAULT;
1292                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1293                         break;
1294                 r = kvm_s390_vm_set_attr(kvm, &attr);
1295                 break;
1296         }
1297         case KVM_GET_DEVICE_ATTR: {
1298                 r = -EFAULT;
1299                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1300                         break;
1301                 r = kvm_s390_vm_get_attr(kvm, &attr);
1302                 break;
1303         }
1304         case KVM_HAS_DEVICE_ATTR: {
1305                 r = -EFAULT;
1306                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1307                         break;
1308                 r = kvm_s390_vm_has_attr(kvm, &attr);
1309                 break;
1310         }
1311         case KVM_S390_GET_SKEYS: {
1312                 struct kvm_s390_skeys args;
1313
1314                 r = -EFAULT;
1315                 if (copy_from_user(&args, argp,
1316                                    sizeof(struct kvm_s390_skeys)))
1317                         break;
1318                 r = kvm_s390_get_skeys(kvm, &args);
1319                 break;
1320         }
1321         case KVM_S390_SET_SKEYS: {
1322                 struct kvm_s390_skeys args;
1323
1324                 r = -EFAULT;
1325                 if (copy_from_user(&args, argp,
1326                                    sizeof(struct kvm_s390_skeys)))
1327                         break;
1328                 r = kvm_s390_set_skeys(kvm, &args);
1329                 break;
1330         }
1331         default:
1332                 r = -ENOTTY;
1333         }
1334
1335         return r;
1336 }
1337
1338 static int kvm_s390_query_ap_config(u8 *config)
1339 {
1340         u32 fcn_code = 0x04000000UL;
1341         u32 cc = 0;
1342
1343         memset(config, 0, 128);
1344         asm volatile(
1345                 "lgr 0,%1\n"
1346                 "lgr 2,%2\n"
1347                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1348                 "0: ipm %0\n"
1349                 "srl %0,28\n"
1350                 "1:\n"
1351                 EX_TABLE(0b, 1b)
1352                 : "+r" (cc)
1353                 : "r" (fcn_code), "r" (config)
1354                 : "cc", "0", "2", "memory"
1355         );
1356
1357         return cc;
1358 }
1359
1360 static int kvm_s390_apxa_installed(void)
1361 {
1362         u8 config[128];
1363         int cc;
1364
1365         if (test_facility(12)) {
1366                 cc = kvm_s390_query_ap_config(config);
1367
1368                 if (cc)
1369                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1370                 else
1371                         return config[0] & 0x40;
1372         }
1373
1374         return 0;
1375 }
1376
1377 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1378 {
1379         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1380
1381         if (kvm_s390_apxa_installed())
1382                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1383         else
1384                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1385 }
1386
1387 static u64 kvm_s390_get_initial_cpuid(void)
1388 {
1389         struct cpuid cpuid;
1390
1391         get_cpu_id(&cpuid);
1392         cpuid.version = 0xff;
1393         return *((u64 *) &cpuid);
1394 }
1395
1396 static void kvm_s390_crypto_init(struct kvm *kvm)
1397 {
1398         if (!test_kvm_facility(kvm, 76))
1399                 return;
1400
1401         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1402         kvm_s390_set_crycb_format(kvm);
1403
1404         /* Enable AES/DEA protected key functions by default */
1405         kvm->arch.crypto.aes_kw = 1;
1406         kvm->arch.crypto.dea_kw = 1;
1407         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1408                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1409         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1410                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1411 }
1412
1413 static void sca_dispose(struct kvm *kvm)
1414 {
1415         if (kvm->arch.use_esca)
1416                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1417         else
1418                 free_page((unsigned long)(kvm->arch.sca));
1419         kvm->arch.sca = NULL;
1420 }
1421
1422 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1423 {
1424         gfp_t alloc_flags = GFP_KERNEL;
1425         int i, rc;
1426         char debug_name[16];
1427         static unsigned long sca_offset;
1428
1429         rc = -EINVAL;
1430 #ifdef CONFIG_KVM_S390_UCONTROL
1431         if (type & ~KVM_VM_S390_UCONTROL)
1432                 goto out_err;
1433         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1434                 goto out_err;
1435 #else
1436         if (type)
1437                 goto out_err;
1438 #endif
1439
1440         rc = s390_enable_sie();
1441         if (rc)
1442                 goto out_err;
1443
1444         rc = -ENOMEM;
1445
1446         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1447
1448         kvm->arch.use_esca = 0; /* start with basic SCA */
1449         if (!sclp.has_64bscao)
1450                 alloc_flags |= GFP_DMA;
1451         rwlock_init(&kvm->arch.sca_lock);
1452         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1453         if (!kvm->arch.sca)
1454                 goto out_err;
1455         spin_lock(&kvm_lock);
1456         sca_offset += 16;
1457         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1458                 sca_offset = 0;
1459         kvm->arch.sca = (struct bsca_block *)
1460                         ((char *) kvm->arch.sca + sca_offset);
1461         spin_unlock(&kvm_lock);
1462
1463         sprintf(debug_name, "kvm-%u", current->pid);
1464
1465         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1466         if (!kvm->arch.dbf)
1467                 goto out_err;
1468
1469         kvm->arch.sie_page2 =
1470              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1471         if (!kvm->arch.sie_page2)
1472                 goto out_err;
1473
1474         /* Populate the facility mask initially. */
1475         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1476                sizeof(S390_lowcore.stfle_fac_list));
1477         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1478                 if (i < kvm_s390_fac_list_mask_size())
1479                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1480                 else
1481                         kvm->arch.model.fac_mask[i] = 0UL;
1482         }
1483
1484         /* Populate the facility list initially. */
1485         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1486         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1487                S390_ARCH_FAC_LIST_SIZE_BYTE);
1488
1489         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1490         set_kvm_facility(kvm->arch.model.fac_list, 74);
1491
1492         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1493         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1494
1495         kvm_s390_crypto_init(kvm);
1496
1497         spin_lock_init(&kvm->arch.float_int.lock);
1498         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1499                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1500         init_waitqueue_head(&kvm->arch.ipte_wq);
1501         mutex_init(&kvm->arch.ipte_mutex);
1502
1503         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1504         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1505
1506         if (type & KVM_VM_S390_UCONTROL) {
1507                 kvm->arch.gmap = NULL;
1508                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1509         } else {
1510                 if (sclp.hamax == U64_MAX)
1511                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1512                 else
1513                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1514                                                     sclp.hamax + 1);
1515                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1516                 if (!kvm->arch.gmap)
1517                         goto out_err;
1518                 kvm->arch.gmap->private = kvm;
1519                 kvm->arch.gmap->pfault_enabled = 0;
1520         }
1521
1522         kvm->arch.css_support = 0;
1523         kvm->arch.use_irqchip = 0;
1524         kvm->arch.epoch = 0;
1525
1526         spin_lock_init(&kvm->arch.start_stop_lock);
1527         kvm_s390_vsie_init(kvm);
1528         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1529
1530         return 0;
1531 out_err:
1532         free_page((unsigned long)kvm->arch.sie_page2);
1533         debug_unregister(kvm->arch.dbf);
1534         sca_dispose(kvm);
1535         KVM_EVENT(3, "creation of vm failed: %d", rc);
1536         return rc;
1537 }
1538
1539 bool kvm_arch_has_vcpu_debugfs(void)
1540 {
1541         return false;
1542 }
1543
1544 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1545 {
1546         return 0;
1547 }
1548
1549 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1550 {
1551         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1552         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1553         kvm_s390_clear_local_irqs(vcpu);
1554         kvm_clear_async_pf_completion_queue(vcpu);
1555         if (!kvm_is_ucontrol(vcpu->kvm))
1556                 sca_del_vcpu(vcpu);
1557
1558         if (kvm_is_ucontrol(vcpu->kvm))
1559                 gmap_remove(vcpu->arch.gmap);
1560
1561         if (vcpu->kvm->arch.use_cmma)
1562                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1563         free_page((unsigned long)(vcpu->arch.sie_block));
1564
1565         kvm_vcpu_uninit(vcpu);
1566         kmem_cache_free(kvm_vcpu_cache, vcpu);
1567 }
1568
1569 static void kvm_free_vcpus(struct kvm *kvm)
1570 {
1571         unsigned int i;
1572         struct kvm_vcpu *vcpu;
1573
1574         kvm_for_each_vcpu(i, vcpu, kvm)
1575                 kvm_arch_vcpu_destroy(vcpu);
1576
1577         mutex_lock(&kvm->lock);
1578         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1579                 kvm->vcpus[i] = NULL;
1580
1581         atomic_set(&kvm->online_vcpus, 0);
1582         mutex_unlock(&kvm->lock);
1583 }
1584
1585 void kvm_arch_destroy_vm(struct kvm *kvm)
1586 {
1587         kvm_free_vcpus(kvm);
1588         sca_dispose(kvm);
1589         debug_unregister(kvm->arch.dbf);
1590         free_page((unsigned long)kvm->arch.sie_page2);
1591         if (!kvm_is_ucontrol(kvm))
1592                 gmap_remove(kvm->arch.gmap);
1593         kvm_s390_destroy_adapters(kvm);
1594         kvm_s390_clear_float_irqs(kvm);
1595         kvm_s390_vsie_destroy(kvm);
1596         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1597 }
1598
1599 /* Section: vcpu related */
1600 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1601 {
1602         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1603         if (!vcpu->arch.gmap)
1604                 return -ENOMEM;
1605         vcpu->arch.gmap->private = vcpu->kvm;
1606
1607         return 0;
1608 }
1609
1610 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1611 {
1612         if (!kvm_s390_use_sca_entries())
1613                 return;
1614         read_lock(&vcpu->kvm->arch.sca_lock);
1615         if (vcpu->kvm->arch.use_esca) {
1616                 struct esca_block *sca = vcpu->kvm->arch.sca;
1617
1618                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1619                 sca->cpu[vcpu->vcpu_id].sda = 0;
1620         } else {
1621                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1622
1623                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1624                 sca->cpu[vcpu->vcpu_id].sda = 0;
1625         }
1626         read_unlock(&vcpu->kvm->arch.sca_lock);
1627 }
1628
1629 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1630 {
1631         if (!kvm_s390_use_sca_entries()) {
1632                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1633
1634                 /* we still need the basic sca for the ipte control */
1635                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1636                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1637         }
1638         read_lock(&vcpu->kvm->arch.sca_lock);
1639         if (vcpu->kvm->arch.use_esca) {
1640                 struct esca_block *sca = vcpu->kvm->arch.sca;
1641
1642                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1643                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1644                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1645                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1646                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1647         } else {
1648                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1649
1650                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1651                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1652                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1653                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1654         }
1655         read_unlock(&vcpu->kvm->arch.sca_lock);
1656 }
1657
1658 /* Basic SCA to Extended SCA data copy routines */
1659 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1660 {
1661         d->sda = s->sda;
1662         d->sigp_ctrl.c = s->sigp_ctrl.c;
1663         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1664 }
1665
1666 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1667 {
1668         int i;
1669
1670         d->ipte_control = s->ipte_control;
1671         d->mcn[0] = s->mcn;
1672         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1673                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1674 }
1675
1676 static int sca_switch_to_extended(struct kvm *kvm)
1677 {
1678         struct bsca_block *old_sca = kvm->arch.sca;
1679         struct esca_block *new_sca;
1680         struct kvm_vcpu *vcpu;
1681         unsigned int vcpu_idx;
1682         u32 scaol, scaoh;
1683
1684         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1685         if (!new_sca)
1686                 return -ENOMEM;
1687
1688         scaoh = (u32)((u64)(new_sca) >> 32);
1689         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1690
1691         kvm_s390_vcpu_block_all(kvm);
1692         write_lock(&kvm->arch.sca_lock);
1693
1694         sca_copy_b_to_e(new_sca, old_sca);
1695
1696         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1697                 vcpu->arch.sie_block->scaoh = scaoh;
1698                 vcpu->arch.sie_block->scaol = scaol;
1699                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1700         }
1701         kvm->arch.sca = new_sca;
1702         kvm->arch.use_esca = 1;
1703
1704         write_unlock(&kvm->arch.sca_lock);
1705         kvm_s390_vcpu_unblock_all(kvm);
1706
1707         free_page((unsigned long)old_sca);
1708
1709         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1710                  old_sca, kvm->arch.sca);
1711         return 0;
1712 }
1713
1714 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1715 {
1716         int rc;
1717
1718         if (!kvm_s390_use_sca_entries()) {
1719                 if (id < KVM_MAX_VCPUS)
1720                         return true;
1721                 return false;
1722         }
1723         if (id < KVM_S390_BSCA_CPU_SLOTS)
1724                 return true;
1725         if (!sclp.has_esca || !sclp.has_64bscao)
1726                 return false;
1727
1728         mutex_lock(&kvm->lock);
1729         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1730         mutex_unlock(&kvm->lock);
1731
1732         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1733 }
1734
1735 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1736 {
1737         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1738         kvm_clear_async_pf_completion_queue(vcpu);
1739         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1740                                     KVM_SYNC_GPRS |
1741                                     KVM_SYNC_ACRS |
1742                                     KVM_SYNC_CRS |
1743                                     KVM_SYNC_ARCH0 |
1744                                     KVM_SYNC_PFAULT;
1745         kvm_s390_set_prefix(vcpu, 0);
1746         if (test_kvm_facility(vcpu->kvm, 64))
1747                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1748         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1749          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1750          */
1751         if (MACHINE_HAS_VX)
1752                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1753         else
1754                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1755
1756         if (kvm_is_ucontrol(vcpu->kvm))
1757                 return __kvm_ucontrol_vcpu_init(vcpu);
1758
1759         return 0;
1760 }
1761
1762 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1763 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1764 {
1765         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1766         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1767         vcpu->arch.cputm_start = get_tod_clock_fast();
1768         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1769 }
1770
1771 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1772 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1773 {
1774         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1775         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1776         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1777         vcpu->arch.cputm_start = 0;
1778         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1779 }
1780
1781 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1782 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1783 {
1784         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1785         vcpu->arch.cputm_enabled = true;
1786         __start_cpu_timer_accounting(vcpu);
1787 }
1788
1789 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1790 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1791 {
1792         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1793         __stop_cpu_timer_accounting(vcpu);
1794         vcpu->arch.cputm_enabled = false;
1795 }
1796
1797 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1798 {
1799         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1800         __enable_cpu_timer_accounting(vcpu);
1801         preempt_enable();
1802 }
1803
1804 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1805 {
1806         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1807         __disable_cpu_timer_accounting(vcpu);
1808         preempt_enable();
1809 }
1810
1811 /* set the cpu timer - may only be called from the VCPU thread itself */
1812 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1813 {
1814         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1815         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1816         if (vcpu->arch.cputm_enabled)
1817                 vcpu->arch.cputm_start = get_tod_clock_fast();
1818         vcpu->arch.sie_block->cputm = cputm;
1819         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1820         preempt_enable();
1821 }
1822
1823 /* update and get the cpu timer - can also be called from other VCPU threads */
1824 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1825 {
1826         unsigned int seq;
1827         __u64 value;
1828
1829         if (unlikely(!vcpu->arch.cputm_enabled))
1830                 return vcpu->arch.sie_block->cputm;
1831
1832         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1833         do {
1834                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1835                 /*
1836                  * If the writer would ever execute a read in the critical
1837                  * section, e.g. in irq context, we have a deadlock.
1838                  */
1839                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1840                 value = vcpu->arch.sie_block->cputm;
1841                 /* if cputm_start is 0, accounting is being started/stopped */
1842                 if (likely(vcpu->arch.cputm_start))
1843                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1844         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1845         preempt_enable();
1846         return value;
1847 }
1848
1849 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1850 {
1851
1852         gmap_enable(vcpu->arch.enabled_gmap);
1853         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1854         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1855                 __start_cpu_timer_accounting(vcpu);
1856         vcpu->cpu = cpu;
1857 }
1858
1859 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1860 {
1861         vcpu->cpu = -1;
1862         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1863                 __stop_cpu_timer_accounting(vcpu);
1864         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1865         vcpu->arch.enabled_gmap = gmap_get_enabled();
1866         gmap_disable(vcpu->arch.enabled_gmap);
1867
1868 }
1869
1870 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1871 {
1872         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1873         vcpu->arch.sie_block->gpsw.mask = 0UL;
1874         vcpu->arch.sie_block->gpsw.addr = 0UL;
1875         kvm_s390_set_prefix(vcpu, 0);
1876         kvm_s390_set_cpu_timer(vcpu, 0);
1877         vcpu->arch.sie_block->ckc       = 0UL;
1878         vcpu->arch.sie_block->todpr     = 0;
1879         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1880         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1881         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1882         /* make sure the new fpc will be lazily loaded */
1883         save_fpu_regs();
1884         current->thread.fpu.fpc = 0;
1885         vcpu->arch.sie_block->gbea = 1;
1886         vcpu->arch.sie_block->pp = 0;
1887         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1888         kvm_clear_async_pf_completion_queue(vcpu);
1889         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1890                 kvm_s390_vcpu_stop(vcpu);
1891         kvm_s390_clear_local_irqs(vcpu);
1892 }
1893
1894 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1895 {
1896         mutex_lock(&vcpu->kvm->lock);
1897         preempt_disable();
1898         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1899         preempt_enable();
1900         mutex_unlock(&vcpu->kvm->lock);
1901         if (!kvm_is_ucontrol(vcpu->kvm)) {
1902                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1903                 sca_add_vcpu(vcpu);
1904         }
1905         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1906                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1907         /* make vcpu_load load the right gmap on the first trigger */
1908         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1909 }
1910
1911 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1912 {
1913         if (!test_kvm_facility(vcpu->kvm, 76))
1914                 return;
1915
1916         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1917
1918         if (vcpu->kvm->arch.crypto.aes_kw)
1919                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1920         if (vcpu->kvm->arch.crypto.dea_kw)
1921                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1922
1923         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1924 }
1925
1926 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1927 {
1928         free_page(vcpu->arch.sie_block->cbrlo);
1929         vcpu->arch.sie_block->cbrlo = 0;
1930 }
1931
1932 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1933 {
1934         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1935         if (!vcpu->arch.sie_block->cbrlo)
1936                 return -ENOMEM;
1937
1938         vcpu->arch.sie_block->ecb2 |= 0x80;
1939         vcpu->arch.sie_block->ecb2 &= ~0x08;
1940         return 0;
1941 }
1942
1943 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1944 {
1945         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1946
1947         vcpu->arch.sie_block->ibc = model->ibc;
1948         if (test_kvm_facility(vcpu->kvm, 7))
1949                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1950 }
1951
1952 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1953 {
1954         int rc = 0;
1955
1956         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1957                                                     CPUSTAT_SM |
1958                                                     CPUSTAT_STOPPED);
1959
1960         if (test_kvm_facility(vcpu->kvm, 78))
1961                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1962         else if (test_kvm_facility(vcpu->kvm, 8))
1963                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1964
1965         kvm_s390_vcpu_setup_model(vcpu);
1966
1967         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1968         if (MACHINE_HAS_ESOP)
1969                 vcpu->arch.sie_block->ecb |= 0x02;
1970         if (test_kvm_facility(vcpu->kvm, 9))
1971                 vcpu->arch.sie_block->ecb |= 0x04;
1972         if (test_kvm_facility(vcpu->kvm, 73))
1973                 vcpu->arch.sie_block->ecb |= 0x10;
1974
1975         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1976                 vcpu->arch.sie_block->ecb2 |= 0x08;
1977         if (test_kvm_facility(vcpu->kvm, 130))
1978                 vcpu->arch.sie_block->ecb2 |= 0x20;
1979         vcpu->arch.sie_block->eca = 0x1002000U;
1980         if (sclp.has_cei)
1981                 vcpu->arch.sie_block->eca |= 0x80000000U;
1982         if (sclp.has_ib)
1983                 vcpu->arch.sie_block->eca |= 0x40000000U;
1984         if (sclp.has_siif)
1985                 vcpu->arch.sie_block->eca |= 1;
1986         if (sclp.has_sigpif)
1987                 vcpu->arch.sie_block->eca |= 0x10000000U;
1988         if (test_kvm_facility(vcpu->kvm, 129)) {
1989                 vcpu->arch.sie_block->eca |= 0x00020000;
1990                 vcpu->arch.sie_block->ecd |= 0x20000000;
1991         }
1992         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1993         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1994
1995         if (vcpu->kvm->arch.use_cmma) {
1996                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1997                 if (rc)
1998                         return rc;
1999         }
2000         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2001         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2002
2003         kvm_s390_vcpu_crypto_setup(vcpu);
2004
2005         return rc;
2006 }
2007
2008 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2009                                       unsigned int id)
2010 {
2011         struct kvm_vcpu *vcpu;
2012         struct sie_page *sie_page;
2013         int rc = -EINVAL;
2014
2015         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2016                 goto out;
2017
2018         rc = -ENOMEM;
2019
2020         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2021         if (!vcpu)
2022                 goto out;
2023
2024         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2025         if (!sie_page)
2026                 goto out_free_cpu;
2027
2028         vcpu->arch.sie_block = &sie_page->sie_block;
2029         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2030
2031         /* the real guest size will always be smaller than msl */
2032         vcpu->arch.sie_block->mso = 0;
2033         vcpu->arch.sie_block->msl = sclp.hamax;
2034
2035         vcpu->arch.sie_block->icpua = id;
2036         spin_lock_init(&vcpu->arch.local_int.lock);
2037         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2038         vcpu->arch.local_int.wq = &vcpu->wq;
2039         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2040         seqcount_init(&vcpu->arch.cputm_seqcount);
2041
2042         rc = kvm_vcpu_init(vcpu, kvm, id);
2043         if (rc)
2044                 goto out_free_sie_block;
2045         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2046                  vcpu->arch.sie_block);
2047         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2048
2049         return vcpu;
2050 out_free_sie_block:
2051         free_page((unsigned long)(vcpu->arch.sie_block));
2052 out_free_cpu:
2053         kmem_cache_free(kvm_vcpu_cache, vcpu);
2054 out:
2055         return ERR_PTR(rc);
2056 }
2057
2058 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2059 {
2060         return kvm_s390_vcpu_has_irq(vcpu, 0);
2061 }
2062
2063 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2064 {
2065         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2066         exit_sie(vcpu);
2067 }
2068
2069 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2070 {
2071         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2072 }
2073
2074 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2075 {
2076         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2077         exit_sie(vcpu);
2078 }
2079
2080 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2081 {
2082         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2083 }
2084
2085 /*
2086  * Kick a guest cpu out of SIE and wait until SIE is not running.
2087  * If the CPU is not running (e.g. waiting as idle) the function will
2088  * return immediately. */
2089 void exit_sie(struct kvm_vcpu *vcpu)
2090 {
2091         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2092         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2093                 cpu_relax();
2094 }
2095
2096 /* Kick a guest cpu out of SIE to process a request synchronously */
2097 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2098 {
2099         kvm_make_request(req, vcpu);
2100         kvm_s390_vcpu_request(vcpu);
2101 }
2102
2103 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2104                               unsigned long end)
2105 {
2106         struct kvm *kvm = gmap->private;
2107         struct kvm_vcpu *vcpu;
2108         unsigned long prefix;
2109         int i;
2110
2111         if (gmap_is_shadow(gmap))
2112                 return;
2113         if (start >= 1UL << 31)
2114                 /* We are only interested in prefix pages */
2115                 return;
2116         kvm_for_each_vcpu(i, vcpu, kvm) {
2117                 /* match against both prefix pages */
2118                 prefix = kvm_s390_get_prefix(vcpu);
2119                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2120                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2121                                    start, end);
2122                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2123                 }
2124         }
2125 }
2126
2127 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2128 {
2129         /* kvm common code refers to this, but never calls it */
2130         BUG();
2131         return 0;
2132 }
2133
2134 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2135                                            struct kvm_one_reg *reg)
2136 {
2137         int r = -EINVAL;
2138
2139         switch (reg->id) {
2140         case KVM_REG_S390_TODPR:
2141                 r = put_user(vcpu->arch.sie_block->todpr,
2142                              (u32 __user *)reg->addr);
2143                 break;
2144         case KVM_REG_S390_EPOCHDIFF:
2145                 r = put_user(vcpu->arch.sie_block->epoch,
2146                              (u64 __user *)reg->addr);
2147                 break;
2148         case KVM_REG_S390_CPU_TIMER:
2149                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2150                              (u64 __user *)reg->addr);
2151                 break;
2152         case KVM_REG_S390_CLOCK_COMP:
2153                 r = put_user(vcpu->arch.sie_block->ckc,
2154                              (u64 __user *)reg->addr);
2155                 break;
2156         case KVM_REG_S390_PFTOKEN:
2157                 r = put_user(vcpu->arch.pfault_token,
2158                              (u64 __user *)reg->addr);
2159                 break;
2160         case KVM_REG_S390_PFCOMPARE:
2161                 r = put_user(vcpu->arch.pfault_compare,
2162                              (u64 __user *)reg->addr);
2163                 break;
2164         case KVM_REG_S390_PFSELECT:
2165                 r = put_user(vcpu->arch.pfault_select,
2166                              (u64 __user *)reg->addr);
2167                 break;
2168         case KVM_REG_S390_PP:
2169                 r = put_user(vcpu->arch.sie_block->pp,
2170                              (u64 __user *)reg->addr);
2171                 break;
2172         case KVM_REG_S390_GBEA:
2173                 r = put_user(vcpu->arch.sie_block->gbea,
2174                              (u64 __user *)reg->addr);
2175                 break;
2176         default:
2177                 break;
2178         }
2179
2180         return r;
2181 }
2182
2183 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2184                                            struct kvm_one_reg *reg)
2185 {
2186         int r = -EINVAL;
2187         __u64 val;
2188
2189         switch (reg->id) {
2190         case KVM_REG_S390_TODPR:
2191                 r = get_user(vcpu->arch.sie_block->todpr,
2192                              (u32 __user *)reg->addr);
2193                 break;
2194         case KVM_REG_S390_EPOCHDIFF:
2195                 r = get_user(vcpu->arch.sie_block->epoch,
2196                              (u64 __user *)reg->addr);
2197                 break;
2198         case KVM_REG_S390_CPU_TIMER:
2199                 r = get_user(val, (u64 __user *)reg->addr);
2200                 if (!r)
2201                         kvm_s390_set_cpu_timer(vcpu, val);
2202                 break;
2203         case KVM_REG_S390_CLOCK_COMP:
2204                 r = get_user(vcpu->arch.sie_block->ckc,
2205                              (u64 __user *)reg->addr);
2206                 break;
2207         case KVM_REG_S390_PFTOKEN:
2208                 r = get_user(vcpu->arch.pfault_token,
2209                              (u64 __user *)reg->addr);
2210                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2211                         kvm_clear_async_pf_completion_queue(vcpu);
2212                 break;
2213         case KVM_REG_S390_PFCOMPARE:
2214                 r = get_user(vcpu->arch.pfault_compare,
2215                              (u64 __user *)reg->addr);
2216                 break;
2217         case KVM_REG_S390_PFSELECT:
2218                 r = get_user(vcpu->arch.pfault_select,
2219                              (u64 __user *)reg->addr);
2220                 break;
2221         case KVM_REG_S390_PP:
2222                 r = get_user(vcpu->arch.sie_block->pp,
2223                              (u64 __user *)reg->addr);
2224                 break;
2225         case KVM_REG_S390_GBEA:
2226                 r = get_user(vcpu->arch.sie_block->gbea,
2227                              (u64 __user *)reg->addr);
2228                 break;
2229         default:
2230                 break;
2231         }
2232
2233         return r;
2234 }
2235
2236 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2237 {
2238         kvm_s390_vcpu_initial_reset(vcpu);
2239         return 0;
2240 }
2241
2242 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2243 {
2244         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2245         return 0;
2246 }
2247
2248 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2249 {
2250         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2251         return 0;
2252 }
2253
2254 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2255                                   struct kvm_sregs *sregs)
2256 {
2257         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2258         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2259         return 0;
2260 }
2261
2262 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2263                                   struct kvm_sregs *sregs)
2264 {
2265         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2266         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2267         return 0;
2268 }
2269
2270 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2271 {
2272         if (test_fp_ctl(fpu->fpc))
2273                 return -EINVAL;
2274         vcpu->run->s.regs.fpc = fpu->fpc;
2275         if (MACHINE_HAS_VX)
2276                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2277                                  (freg_t *) fpu->fprs);
2278         else
2279                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2280         return 0;
2281 }
2282
2283 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2284 {
2285         /* make sure we have the latest values */
2286         save_fpu_regs();
2287         if (MACHINE_HAS_VX)
2288                 convert_vx_to_fp((freg_t *) fpu->fprs,
2289                                  (__vector128 *) vcpu->run->s.regs.vrs);
2290         else
2291                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2292         fpu->fpc = vcpu->run->s.regs.fpc;
2293         return 0;
2294 }
2295
2296 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2297 {
2298         int rc = 0;
2299
2300         if (!is_vcpu_stopped(vcpu))
2301                 rc = -EBUSY;
2302         else {
2303                 vcpu->run->psw_mask = psw.mask;
2304                 vcpu->run->psw_addr = psw.addr;
2305         }
2306         return rc;
2307 }
2308
2309 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2310                                   struct kvm_translation *tr)
2311 {
2312         return -EINVAL; /* not implemented yet */
2313 }
2314
2315 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2316                               KVM_GUESTDBG_USE_HW_BP | \
2317                               KVM_GUESTDBG_ENABLE)
2318
2319 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2320                                         struct kvm_guest_debug *dbg)
2321 {
2322         int rc = 0;
2323
2324         vcpu->guest_debug = 0;
2325         kvm_s390_clear_bp_data(vcpu);
2326
2327         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2328                 return -EINVAL;
2329         if (!sclp.has_gpere)
2330                 return -EINVAL;
2331
2332         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2333                 vcpu->guest_debug = dbg->control;
2334                 /* enforce guest PER */
2335                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2336
2337                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2338                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2339         } else {
2340                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2341                 vcpu->arch.guestdbg.last_bp = 0;
2342         }
2343
2344         if (rc) {
2345                 vcpu->guest_debug = 0;
2346                 kvm_s390_clear_bp_data(vcpu);
2347                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2348         }
2349
2350         return rc;
2351 }
2352
2353 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2354                                     struct kvm_mp_state *mp_state)
2355 {
2356         /* CHECK_STOP and LOAD are not supported yet */
2357         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2358                                        KVM_MP_STATE_OPERATING;
2359 }
2360
2361 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2362                                     struct kvm_mp_state *mp_state)
2363 {
2364         int rc = 0;
2365
2366         /* user space knows about this interface - let it control the state */
2367         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2368
2369         switch (mp_state->mp_state) {
2370         case KVM_MP_STATE_STOPPED:
2371                 kvm_s390_vcpu_stop(vcpu);
2372                 break;
2373         case KVM_MP_STATE_OPERATING:
2374                 kvm_s390_vcpu_start(vcpu);
2375                 break;
2376         case KVM_MP_STATE_LOAD:
2377         case KVM_MP_STATE_CHECK_STOP:
2378                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2379         default:
2380                 rc = -ENXIO;
2381         }
2382
2383         return rc;
2384 }
2385
2386 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2387 {
2388         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2389 }
2390
2391 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2392 {
2393 retry:
2394         kvm_s390_vcpu_request_handled(vcpu);
2395         if (!vcpu->requests)
2396                 return 0;
2397         /*
2398          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2399          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2400          * This ensures that the ipte instruction for this request has
2401          * already finished. We might race against a second unmapper that
2402          * wants to set the blocking bit. Lets just retry the request loop.
2403          */
2404         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2405                 int rc;
2406                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2407                                           kvm_s390_get_prefix(vcpu),
2408                                           PAGE_SIZE * 2, PROT_WRITE);
2409                 if (rc) {
2410                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2411                         return rc;
2412                 }
2413                 goto retry;
2414         }
2415
2416         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2417                 vcpu->arch.sie_block->ihcpu = 0xffff;
2418                 goto retry;
2419         }
2420
2421         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2422                 if (!ibs_enabled(vcpu)) {
2423                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2424                         atomic_or(CPUSTAT_IBS,
2425                                         &vcpu->arch.sie_block->cpuflags);
2426                 }
2427                 goto retry;
2428         }
2429
2430         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2431                 if (ibs_enabled(vcpu)) {
2432                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2433                         atomic_andnot(CPUSTAT_IBS,
2434                                           &vcpu->arch.sie_block->cpuflags);
2435                 }
2436                 goto retry;
2437         }
2438
2439         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2440                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2441                 goto retry;
2442         }
2443
2444         /* nothing to do, just clear the request */
2445         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2446
2447         return 0;
2448 }
2449
2450 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2451 {
2452         struct kvm_vcpu *vcpu;
2453         int i;
2454
2455         mutex_lock(&kvm->lock);
2456         preempt_disable();
2457         kvm->arch.epoch = tod - get_tod_clock();
2458         kvm_s390_vcpu_block_all(kvm);
2459         kvm_for_each_vcpu(i, vcpu, kvm)
2460                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2461         kvm_s390_vcpu_unblock_all(kvm);
2462         preempt_enable();
2463         mutex_unlock(&kvm->lock);
2464 }
2465
2466 /**
2467  * kvm_arch_fault_in_page - fault-in guest page if necessary
2468  * @vcpu: The corresponding virtual cpu
2469  * @gpa: Guest physical address
2470  * @writable: Whether the page should be writable or not
2471  *
2472  * Make sure that a guest page has been faulted-in on the host.
2473  *
2474  * Return: Zero on success, negative error code otherwise.
2475  */
2476 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2477 {
2478         return gmap_fault(vcpu->arch.gmap, gpa,
2479                           writable ? FAULT_FLAG_WRITE : 0);
2480 }
2481
2482 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2483                                       unsigned long token)
2484 {
2485         struct kvm_s390_interrupt inti;
2486         struct kvm_s390_irq irq;
2487
2488         if (start_token) {
2489                 irq.u.ext.ext_params2 = token;
2490                 irq.type = KVM_S390_INT_PFAULT_INIT;
2491                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2492         } else {
2493                 inti.type = KVM_S390_INT_PFAULT_DONE;
2494                 inti.parm64 = token;
2495                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2496         }
2497 }
2498
2499 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2500                                      struct kvm_async_pf *work)
2501 {
2502         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2503         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2504 }
2505
2506 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2507                                  struct kvm_async_pf *work)
2508 {
2509         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2510         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2511 }
2512
2513 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2514                                struct kvm_async_pf *work)
2515 {
2516         /* s390 will always inject the page directly */
2517 }
2518
2519 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2520 {
2521         /*
2522          * s390 will always inject the page directly,
2523          * but we still want check_async_completion to cleanup
2524          */
2525         return true;
2526 }
2527
2528 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2529 {
2530         hva_t hva;
2531         struct kvm_arch_async_pf arch;
2532         int rc;
2533
2534         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2535                 return 0;
2536         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2537             vcpu->arch.pfault_compare)
2538                 return 0;
2539         if (psw_extint_disabled(vcpu))
2540                 return 0;
2541         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2542                 return 0;
2543         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2544                 return 0;
2545         if (!vcpu->arch.gmap->pfault_enabled)
2546                 return 0;
2547
2548         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2549         hva += current->thread.gmap_addr & ~PAGE_MASK;
2550         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2551                 return 0;
2552
2553         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2554         return rc;
2555 }
2556
2557 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2558 {
2559         int rc, cpuflags;
2560
2561         /*
2562          * On s390 notifications for arriving pages will be delivered directly
2563          * to the guest but the house keeping for completed pfaults is
2564          * handled outside the worker.
2565          */
2566         kvm_check_async_pf_completion(vcpu);
2567
2568         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2569         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2570
2571         if (need_resched())
2572                 schedule();
2573
2574         if (test_cpu_flag(CIF_MCCK_PENDING))
2575                 s390_handle_mcck();
2576
2577         if (!kvm_is_ucontrol(vcpu->kvm)) {
2578                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2579                 if (rc)
2580                         return rc;
2581         }
2582
2583         rc = kvm_s390_handle_requests(vcpu);
2584         if (rc)
2585                 return rc;
2586
2587         if (guestdbg_enabled(vcpu)) {
2588                 kvm_s390_backup_guest_per_regs(vcpu);
2589                 kvm_s390_patch_guest_per_regs(vcpu);
2590         }
2591
2592         vcpu->arch.sie_block->icptcode = 0;
2593         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2594         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2595         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2596
2597         return 0;
2598 }
2599
2600 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2601 {
2602         struct kvm_s390_pgm_info pgm_info = {
2603                 .code = PGM_ADDRESSING,
2604         };
2605         u8 opcode, ilen;
2606         int rc;
2607
2608         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2609         trace_kvm_s390_sie_fault(vcpu);
2610
2611         /*
2612          * We want to inject an addressing exception, which is defined as a
2613          * suppressing or terminating exception. However, since we came here
2614          * by a DAT access exception, the PSW still points to the faulting
2615          * instruction since DAT exceptions are nullifying. So we've got
2616          * to look up the current opcode to get the length of the instruction
2617          * to be able to forward the PSW.
2618          */
2619         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2620         ilen = insn_length(opcode);
2621         if (rc < 0) {
2622                 return rc;
2623         } else if (rc) {
2624                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2625                  * Forward by arbitrary ilc, injection will take care of
2626                  * nullification if necessary.
2627                  */
2628                 pgm_info = vcpu->arch.pgm;
2629                 ilen = 4;
2630         }
2631         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2632         kvm_s390_forward_psw(vcpu, ilen);
2633         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2634 }
2635
2636 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2637 {
2638         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2639                    vcpu->arch.sie_block->icptcode);
2640         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2641
2642         if (guestdbg_enabled(vcpu))
2643                 kvm_s390_restore_guest_per_regs(vcpu);
2644
2645         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2646         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2647
2648         if (vcpu->arch.sie_block->icptcode > 0) {
2649                 int rc = kvm_handle_sie_intercept(vcpu);
2650
2651                 if (rc != -EOPNOTSUPP)
2652                         return rc;
2653                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2654                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2655                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2656                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2657                 return -EREMOTE;
2658         } else if (exit_reason != -EFAULT) {
2659                 vcpu->stat.exit_null++;
2660                 return 0;
2661         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2662                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2663                 vcpu->run->s390_ucontrol.trans_exc_code =
2664                                                 current->thread.gmap_addr;
2665                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2666                 return -EREMOTE;
2667         } else if (current->thread.gmap_pfault) {
2668                 trace_kvm_s390_major_guest_pfault(vcpu);
2669                 current->thread.gmap_pfault = 0;
2670                 if (kvm_arch_setup_async_pf(vcpu))
2671                         return 0;
2672                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2673         }
2674         return vcpu_post_run_fault_in_sie(vcpu);
2675 }
2676
2677 static int __vcpu_run(struct kvm_vcpu *vcpu)
2678 {
2679         int rc, exit_reason;
2680
2681         /*
2682          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2683          * ning the guest), so that memslots (and other stuff) are protected
2684          */
2685         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2686
2687         do {
2688                 rc = vcpu_pre_run(vcpu);
2689                 if (rc)
2690                         break;
2691
2692                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2693                 /*
2694                  * As PF_VCPU will be used in fault handler, between
2695                  * guest_enter and guest_exit should be no uaccess.
2696                  */
2697                 local_irq_disable();
2698                 guest_enter_irqoff();
2699                 __disable_cpu_timer_accounting(vcpu);
2700                 local_irq_enable();
2701                 exit_reason = sie64a(vcpu->arch.sie_block,
2702                                      vcpu->run->s.regs.gprs);
2703                 local_irq_disable();
2704                 __enable_cpu_timer_accounting(vcpu);
2705                 guest_exit_irqoff();
2706                 local_irq_enable();
2707                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2708
2709                 rc = vcpu_post_run(vcpu, exit_reason);
2710         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2711
2712         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2713         return rc;
2714 }
2715
2716 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2717 {
2718         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2719         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2720         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2721                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2722         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2723                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2724                 /* some control register changes require a tlb flush */
2725                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2726         }
2727         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2728                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2729                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2730                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2731                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2732                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2733         }
2734         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2735                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2736                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2737                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2738                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2739                         kvm_clear_async_pf_completion_queue(vcpu);
2740         }
2741         /*
2742          * If userspace sets the riccb (e.g. after migration) to a valid state,
2743          * we should enable RI here instead of doing the lazy enablement.
2744          */
2745         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2746             test_kvm_facility(vcpu->kvm, 64)) {
2747                 struct runtime_instr_cb *riccb =
2748                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2749
2750                 if (riccb->valid)
2751                         vcpu->arch.sie_block->ecb3 |= 0x01;
2752         }
2753         save_access_regs(vcpu->arch.host_acrs);
2754         restore_access_regs(vcpu->run->s.regs.acrs);
2755         /* save host (userspace) fprs/vrs */
2756         save_fpu_regs();
2757         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2758         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2759         if (MACHINE_HAS_VX)
2760                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2761         else
2762                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2763         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2764         if (test_fp_ctl(current->thread.fpu.fpc))
2765                 /* User space provided an invalid FPC, let's clear it */
2766                 current->thread.fpu.fpc = 0;
2767
2768         kvm_run->kvm_dirty_regs = 0;
2769 }
2770
2771 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2772 {
2773         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2774         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2775         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2776         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2777         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2778         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2779         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2780         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2781         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2782         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2783         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2784         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2785         save_access_regs(vcpu->run->s.regs.acrs);
2786         restore_access_regs(vcpu->arch.host_acrs);
2787         /* Save guest register state */
2788         save_fpu_regs();
2789         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2790         /* Restore will be done lazily at return */
2791         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2792         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2793
2794 }
2795
2796 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2797 {
2798         int rc;
2799         sigset_t sigsaved;
2800
2801         if (guestdbg_exit_pending(vcpu)) {
2802                 kvm_s390_prepare_debug_exit(vcpu);
2803                 return 0;
2804         }
2805
2806         if (vcpu->sigset_active)
2807                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2808
2809         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2810                 kvm_s390_vcpu_start(vcpu);
2811         } else if (is_vcpu_stopped(vcpu)) {
2812                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2813                                    vcpu->vcpu_id);
2814                 return -EINVAL;
2815         }
2816
2817         sync_regs(vcpu, kvm_run);
2818         enable_cpu_timer_accounting(vcpu);
2819
2820         might_fault();
2821         rc = __vcpu_run(vcpu);
2822
2823         if (signal_pending(current) && !rc) {
2824                 kvm_run->exit_reason = KVM_EXIT_INTR;
2825                 rc = -EINTR;
2826         }
2827
2828         if (guestdbg_exit_pending(vcpu) && !rc)  {
2829                 kvm_s390_prepare_debug_exit(vcpu);
2830                 rc = 0;
2831         }
2832
2833         if (rc == -EREMOTE) {
2834                 /* userspace support is needed, kvm_run has been prepared */
2835                 rc = 0;
2836         }
2837
2838         disable_cpu_timer_accounting(vcpu);
2839         store_regs(vcpu, kvm_run);
2840
2841         if (vcpu->sigset_active)
2842                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2843
2844         vcpu->stat.exit_userspace++;
2845         return rc;
2846 }
2847
2848 /*
2849  * store status at address
2850  * we use have two special cases:
2851  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2852  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2853  */
2854 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2855 {
2856         unsigned char archmode = 1;
2857         freg_t fprs[NUM_FPRS];
2858         unsigned int px;
2859         u64 clkcomp, cputm;
2860         int rc;
2861
2862         px = kvm_s390_get_prefix(vcpu);
2863         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2864                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2865                         return -EFAULT;
2866                 gpa = 0;
2867         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2868                 if (write_guest_real(vcpu, 163, &archmode, 1))
2869                         return -EFAULT;
2870                 gpa = px;
2871         } else
2872                 gpa -= __LC_FPREGS_SAVE_AREA;
2873
2874         /* manually convert vector registers if necessary */
2875         if (MACHINE_HAS_VX) {
2876                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2877                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2878                                      fprs, 128);
2879         } else {
2880                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2881                                      vcpu->run->s.regs.fprs, 128);
2882         }
2883         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2884                               vcpu->run->s.regs.gprs, 128);
2885         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2886                               &vcpu->arch.sie_block->gpsw, 16);
2887         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2888                               &px, 4);
2889         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2890                               &vcpu->run->s.regs.fpc, 4);
2891         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2892                               &vcpu->arch.sie_block->todpr, 4);
2893         cputm = kvm_s390_get_cpu_timer(vcpu);
2894         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2895                               &cputm, 8);
2896         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2897         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2898                               &clkcomp, 8);
2899         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2900                               &vcpu->run->s.regs.acrs, 64);
2901         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2902                               &vcpu->arch.sie_block->gcr, 128);
2903         return rc ? -EFAULT : 0;
2904 }
2905
2906 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2907 {
2908         /*
2909          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2910          * switch in the run ioctl. Let's update our copies before we save
2911          * it into the save area
2912          */
2913         save_fpu_regs();
2914         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2915         save_access_regs(vcpu->run->s.regs.acrs);
2916
2917         return kvm_s390_store_status_unloaded(vcpu, addr);
2918 }
2919
2920 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2921 {
2922         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2923         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2924 }
2925
2926 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2927 {
2928         unsigned int i;
2929         struct kvm_vcpu *vcpu;
2930
2931         kvm_for_each_vcpu(i, vcpu, kvm) {
2932                 __disable_ibs_on_vcpu(vcpu);
2933         }
2934 }
2935
2936 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2937 {
2938         if (!sclp.has_ibs)
2939                 return;
2940         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2941         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2942 }
2943
2944 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2945 {
2946         int i, online_vcpus, started_vcpus = 0;
2947
2948         if (!is_vcpu_stopped(vcpu))
2949                 return;
2950
2951         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2952         /* Only one cpu at a time may enter/leave the STOPPED state. */
2953         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2954         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2955
2956         for (i = 0; i < online_vcpus; i++) {
2957                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2958                         started_vcpus++;
2959         }
2960
2961         if (started_vcpus == 0) {
2962                 /* we're the only active VCPU -> speed it up */
2963                 __enable_ibs_on_vcpu(vcpu);
2964         } else if (started_vcpus == 1) {
2965                 /*
2966                  * As we are starting a second VCPU, we have to disable
2967                  * the IBS facility on all VCPUs to remove potentially
2968                  * oustanding ENABLE requests.
2969                  */
2970                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2971         }
2972
2973         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2974         /*
2975          * Another VCPU might have used IBS while we were offline.
2976          * Let's play safe and flush the VCPU at startup.
2977          */
2978         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2979         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2980         return;
2981 }
2982
2983 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2984 {
2985         int i, online_vcpus, started_vcpus = 0;
2986         struct kvm_vcpu *started_vcpu = NULL;
2987
2988         if (is_vcpu_stopped(vcpu))
2989                 return;
2990
2991         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2992         /* Only one cpu at a time may enter/leave the STOPPED state. */
2993         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2994         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2995
2996         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2997         kvm_s390_clear_stop_irq(vcpu);
2998
2999         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3000         __disable_ibs_on_vcpu(vcpu);
3001
3002         for (i = 0; i < online_vcpus; i++) {
3003                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3004                         started_vcpus++;
3005                         started_vcpu = vcpu->kvm->vcpus[i];
3006                 }
3007         }
3008
3009         if (started_vcpus == 1) {
3010                 /*
3011                  * As we only have one VCPU left, we want to enable the
3012                  * IBS facility for that VCPU to speed it up.
3013                  */
3014                 __enable_ibs_on_vcpu(started_vcpu);
3015         }
3016
3017         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3018         return;
3019 }
3020
3021 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3022                                      struct kvm_enable_cap *cap)
3023 {
3024         int r;
3025
3026         if (cap->flags)
3027                 return -EINVAL;
3028
3029         switch (cap->cap) {
3030         case KVM_CAP_S390_CSS_SUPPORT:
3031                 if (!vcpu->kvm->arch.css_support) {
3032                         vcpu->kvm->arch.css_support = 1;
3033                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3034                         trace_kvm_s390_enable_css(vcpu->kvm);
3035                 }
3036                 r = 0;
3037                 break;
3038         default:
3039                 r = -EINVAL;
3040                 break;
3041         }
3042         return r;
3043 }
3044
3045 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3046                                   struct kvm_s390_mem_op *mop)
3047 {
3048         void __user *uaddr = (void __user *)mop->buf;
3049         void *tmpbuf = NULL;
3050         int r, srcu_idx;
3051         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3052                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3053
3054         if (mop->flags & ~supported_flags)
3055                 return -EINVAL;
3056
3057         if (mop->size > MEM_OP_MAX_SIZE)
3058                 return -E2BIG;
3059
3060         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3061                 tmpbuf = vmalloc(mop->size);
3062                 if (!tmpbuf)
3063                         return -ENOMEM;
3064         }
3065
3066         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3067
3068         switch (mop->op) {
3069         case KVM_S390_MEMOP_LOGICAL_READ:
3070                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3071                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3072                                             mop->size, GACC_FETCH);
3073                         break;
3074                 }
3075                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3076                 if (r == 0) {
3077                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3078                                 r = -EFAULT;
3079                 }
3080                 break;
3081         case KVM_S390_MEMOP_LOGICAL_WRITE:
3082                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3083                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3084                                             mop->size, GACC_STORE);
3085                         break;
3086                 }
3087                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3088                         r = -EFAULT;
3089                         break;
3090                 }
3091                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3092                 break;
3093         default:
3094                 r = -EINVAL;
3095         }
3096
3097         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3098
3099         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3100                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3101
3102         vfree(tmpbuf);
3103         return r;
3104 }
3105
3106 long kvm_arch_vcpu_ioctl(struct file *filp,
3107                          unsigned int ioctl, unsigned long arg)
3108 {
3109         struct kvm_vcpu *vcpu = filp->private_data;
3110         void __user *argp = (void __user *)arg;
3111         int idx;
3112         long r;
3113
3114         switch (ioctl) {
3115         case KVM_S390_IRQ: {
3116                 struct kvm_s390_irq s390irq;
3117
3118                 r = -EFAULT;
3119                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3120                         break;
3121                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3122                 break;
3123         }
3124         case KVM_S390_INTERRUPT: {
3125                 struct kvm_s390_interrupt s390int;
3126                 struct kvm_s390_irq s390irq;
3127
3128                 r = -EFAULT;
3129                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3130                         break;
3131                 if (s390int_to_s390irq(&s390int, &s390irq))
3132                         return -EINVAL;
3133                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3134                 break;
3135         }
3136         case KVM_S390_STORE_STATUS:
3137                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3138                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3139                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3140                 break;
3141         case KVM_S390_SET_INITIAL_PSW: {
3142                 psw_t psw;
3143
3144                 r = -EFAULT;
3145                 if (copy_from_user(&psw, argp, sizeof(psw)))
3146                         break;
3147                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3148                 break;
3149         }
3150         case KVM_S390_INITIAL_RESET:
3151                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3152                 break;
3153         case KVM_SET_ONE_REG:
3154         case KVM_GET_ONE_REG: {
3155                 struct kvm_one_reg reg;
3156                 r = -EFAULT;
3157                 if (copy_from_user(&reg, argp, sizeof(reg)))
3158                         break;
3159                 if (ioctl == KVM_SET_ONE_REG)
3160                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3161                 else
3162                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3163                 break;
3164         }
3165 #ifdef CONFIG_KVM_S390_UCONTROL
3166         case KVM_S390_UCAS_MAP: {
3167                 struct kvm_s390_ucas_mapping ucasmap;
3168
3169                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3170                         r = -EFAULT;
3171                         break;
3172                 }
3173
3174                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3175                         r = -EINVAL;
3176                         break;
3177                 }
3178
3179                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3180                                      ucasmap.vcpu_addr, ucasmap.length);
3181                 break;
3182         }
3183         case KVM_S390_UCAS_UNMAP: {
3184                 struct kvm_s390_ucas_mapping ucasmap;
3185
3186                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3187                         r = -EFAULT;
3188                         break;
3189                 }
3190
3191                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3192                         r = -EINVAL;
3193                         break;
3194                 }
3195
3196                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3197                         ucasmap.length);
3198                 break;
3199         }
3200 #endif
3201         case KVM_S390_VCPU_FAULT: {
3202                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3203                 break;
3204         }
3205         case KVM_ENABLE_CAP:
3206         {
3207                 struct kvm_enable_cap cap;
3208                 r = -EFAULT;
3209                 if (copy_from_user(&cap, argp, sizeof(cap)))
3210                         break;
3211                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3212                 break;
3213         }
3214         case KVM_S390_MEM_OP: {
3215                 struct kvm_s390_mem_op mem_op;
3216
3217                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3218                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3219                 else
3220                         r = -EFAULT;
3221                 break;
3222         }
3223         case KVM_S390_SET_IRQ_STATE: {
3224                 struct kvm_s390_irq_state irq_state;
3225
3226                 r = -EFAULT;
3227                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3228                         break;
3229                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3230                     irq_state.len == 0 ||
3231                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3232                         r = -EINVAL;
3233                         break;
3234                 }
3235                 r = kvm_s390_set_irq_state(vcpu,
3236                                            (void __user *) irq_state.buf,
3237                                            irq_state.len);
3238                 break;
3239         }
3240         case KVM_S390_GET_IRQ_STATE: {
3241                 struct kvm_s390_irq_state irq_state;
3242
3243                 r = -EFAULT;
3244                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3245                         break;
3246                 if (irq_state.len == 0) {
3247                         r = -EINVAL;
3248                         break;
3249                 }
3250                 r = kvm_s390_get_irq_state(vcpu,
3251                                            (__u8 __user *)  irq_state.buf,
3252                                            irq_state.len);
3253                 break;
3254         }
3255         default:
3256                 r = -ENOTTY;
3257         }
3258         return r;
3259 }
3260
3261 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3262 {
3263 #ifdef CONFIG_KVM_S390_UCONTROL
3264         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3265                  && (kvm_is_ucontrol(vcpu->kvm))) {
3266                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3267                 get_page(vmf->page);
3268                 return 0;
3269         }
3270 #endif
3271         return VM_FAULT_SIGBUS;
3272 }
3273
3274 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3275                             unsigned long npages)
3276 {
3277         return 0;
3278 }
3279
3280 /* Section: memory related */
3281 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3282                                    struct kvm_memory_slot *memslot,
3283                                    const struct kvm_userspace_memory_region *mem,
3284                                    enum kvm_mr_change change)
3285 {
3286         /* A few sanity checks. We can have memory slots which have to be
3287            located/ended at a segment boundary (1MB). The memory in userland is
3288            ok to be fragmented into various different vmas. It is okay to mmap()
3289            and munmap() stuff in this slot after doing this call at any time */
3290
3291         if (mem->userspace_addr & 0xffffful)
3292                 return -EINVAL;
3293
3294         if (mem->memory_size & 0xffffful)
3295                 return -EINVAL;
3296
3297         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3298                 return -EINVAL;
3299
3300         return 0;
3301 }
3302
3303 void kvm_arch_commit_memory_region(struct kvm *kvm,
3304                                 const struct kvm_userspace_memory_region *mem,
3305                                 const struct kvm_memory_slot *old,
3306                                 const struct kvm_memory_slot *new,
3307                                 enum kvm_mr_change change)
3308 {
3309         int rc;
3310
3311         /* If the basics of the memslot do not change, we do not want
3312          * to update the gmap. Every update causes several unnecessary
3313          * segment translation exceptions. This is usually handled just
3314          * fine by the normal fault handler + gmap, but it will also
3315          * cause faults on the prefix page of running guest CPUs.
3316          */
3317         if (old->userspace_addr == mem->userspace_addr &&
3318             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3319             old->npages * PAGE_SIZE == mem->memory_size)
3320                 return;
3321
3322         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3323                 mem->guest_phys_addr, mem->memory_size);
3324         if (rc)
3325                 pr_warn("failed to commit memory region\n");
3326         return;
3327 }
3328
3329 static inline unsigned long nonhyp_mask(int i)
3330 {
3331         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3332
3333         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3334 }
3335
3336 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3337 {
3338         vcpu->valid_wakeup = false;
3339 }
3340
3341 static int __init kvm_s390_init(void)
3342 {
3343         int i;
3344
3345         if (!sclp.has_sief2) {
3346                 pr_info("SIE not available\n");
3347                 return -ENODEV;
3348         }
3349
3350         for (i = 0; i < 16; i++)
3351                 kvm_s390_fac_list_mask[i] |=
3352                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3353
3354         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3355 }
3356
3357 static void __exit kvm_s390_exit(void)
3358 {
3359         kvm_exit();
3360 }
3361
3362 module_init(kvm_s390_init);
3363 module_exit(kvm_s390_exit);
3364
3365 /*
3366  * Enable autoloading of the kvm module.
3367  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3368  * since x86 takes a different approach.
3369  */
3370 #include <linux/miscdevice.h>
3371 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3372 MODULE_ALIAS("devname:kvm");