KVM: MIPS: fix spelling mistake "Exteneded" -> "Extended"
[platform/kernel/linux-rpi.git] / arch / x86 / kernel / kvm.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * KVM paravirt_ops implementation
4  *
5  * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
6  * Copyright IBM Corporation, 2007
7  *   Authors: Anthony Liguori <aliguori@us.ibm.com>
8  */
9
10 #include <linux/context_tracking.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/kvm_para.h>
14 #include <linux/cpu.h>
15 #include <linux/mm.h>
16 #include <linux/highmem.h>
17 #include <linux/hardirq.h>
18 #include <linux/notifier.h>
19 #include <linux/reboot.h>
20 #include <linux/hash.h>
21 #include <linux/sched.h>
22 #include <linux/slab.h>
23 #include <linux/kprobes.h>
24 #include <linux/nmi.h>
25 #include <linux/swait.h>
26 #include <asm/timer.h>
27 #include <asm/cpu.h>
28 #include <asm/traps.h>
29 #include <asm/desc.h>
30 #include <asm/tlbflush.h>
31 #include <asm/apic.h>
32 #include <asm/apicdef.h>
33 #include <asm/hypervisor.h>
34 #include <asm/tlb.h>
35 #include <asm/cpuidle_haltpoll.h>
36
37 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
38
39 static int kvmapf = 1;
40
41 static int __init parse_no_kvmapf(char *arg)
42 {
43         kvmapf = 0;
44         return 0;
45 }
46
47 early_param("no-kvmapf", parse_no_kvmapf);
48
49 static int steal_acc = 1;
50 static int __init parse_no_stealacc(char *arg)
51 {
52         steal_acc = 0;
53         return 0;
54 }
55
56 early_param("no-steal-acc", parse_no_stealacc);
57
58 static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
59 DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
60 static int has_steal_clock = 0;
61
62 /*
63  * No need for any "IO delay" on KVM
64  */
65 static void kvm_io_delay(void)
66 {
67 }
68
69 #define KVM_TASK_SLEEP_HASHBITS 8
70 #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
71
72 struct kvm_task_sleep_node {
73         struct hlist_node link;
74         struct swait_queue_head wq;
75         u32 token;
76         int cpu;
77 };
78
79 static struct kvm_task_sleep_head {
80         raw_spinlock_t lock;
81         struct hlist_head list;
82 } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
83
84 static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
85                                                   u32 token)
86 {
87         struct hlist_node *p;
88
89         hlist_for_each(p, &b->list) {
90                 struct kvm_task_sleep_node *n =
91                         hlist_entry(p, typeof(*n), link);
92                 if (n->token == token)
93                         return n;
94         }
95
96         return NULL;
97 }
98
99 static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
100 {
101         u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
102         struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
103         struct kvm_task_sleep_node *e;
104
105         raw_spin_lock(&b->lock);
106         e = _find_apf_task(b, token);
107         if (e) {
108                 /* dummy entry exist -> wake up was delivered ahead of PF */
109                 hlist_del(&e->link);
110                 raw_spin_unlock(&b->lock);
111                 kfree(e);
112                 return false;
113         }
114
115         n->token = token;
116         n->cpu = smp_processor_id();
117         init_swait_queue_head(&n->wq);
118         hlist_add_head(&n->link, &b->list);
119         raw_spin_unlock(&b->lock);
120         return true;
121 }
122
123 /*
124  * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
125  * @token:      Token to identify the sleep node entry
126  *
127  * Invoked from the async pagefault handling code or from the VM exit page
128  * fault handler. In both cases RCU is watching.
129  */
130 void kvm_async_pf_task_wait_schedule(u32 token)
131 {
132         struct kvm_task_sleep_node n;
133         DECLARE_SWAITQUEUE(wait);
134
135         lockdep_assert_irqs_disabled();
136
137         if (!kvm_async_pf_queue_task(token, &n))
138                 return;
139
140         for (;;) {
141                 prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
142                 if (hlist_unhashed(&n.link))
143                         break;
144
145                 local_irq_enable();
146                 schedule();
147                 local_irq_disable();
148         }
149         finish_swait(&n.wq, &wait);
150 }
151 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
152
153 static void apf_task_wake_one(struct kvm_task_sleep_node *n)
154 {
155         hlist_del_init(&n->link);
156         if (swq_has_sleeper(&n->wq))
157                 swake_up_one(&n->wq);
158 }
159
160 static void apf_task_wake_all(void)
161 {
162         int i;
163
164         for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
165                 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
166                 struct kvm_task_sleep_node *n;
167                 struct hlist_node *p, *next;
168
169                 raw_spin_lock(&b->lock);
170                 hlist_for_each_safe(p, next, &b->list) {
171                         n = hlist_entry(p, typeof(*n), link);
172                         if (n->cpu == smp_processor_id())
173                                 apf_task_wake_one(n);
174                 }
175                 raw_spin_unlock(&b->lock);
176         }
177 }
178
179 void kvm_async_pf_task_wake(u32 token)
180 {
181         u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
182         struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
183         struct kvm_task_sleep_node *n;
184
185         if (token == ~0) {
186                 apf_task_wake_all();
187                 return;
188         }
189
190 again:
191         raw_spin_lock(&b->lock);
192         n = _find_apf_task(b, token);
193         if (!n) {
194                 /*
195                  * async PF was not yet handled.
196                  * Add dummy entry for the token.
197                  */
198                 n = kzalloc(sizeof(*n), GFP_ATOMIC);
199                 if (!n) {
200                         /*
201                          * Allocation failed! Busy wait while other cpu
202                          * handles async PF.
203                          */
204                         raw_spin_unlock(&b->lock);
205                         cpu_relax();
206                         goto again;
207                 }
208                 n->token = token;
209                 n->cpu = smp_processor_id();
210                 init_swait_queue_head(&n->wq);
211                 hlist_add_head(&n->link, &b->list);
212         } else {
213                 apf_task_wake_one(n);
214         }
215         raw_spin_unlock(&b->lock);
216         return;
217 }
218 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
219
220 noinstr u32 kvm_read_and_reset_apf_flags(void)
221 {
222         u32 flags = 0;
223
224         if (__this_cpu_read(apf_reason.enabled)) {
225                 flags = __this_cpu_read(apf_reason.flags);
226                 __this_cpu_write(apf_reason.flags, 0);
227         }
228
229         return flags;
230 }
231 EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
232
233 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
234 {
235         u32 reason = kvm_read_and_reset_apf_flags();
236         bool rcu_exit;
237
238         switch (reason) {
239         case KVM_PV_REASON_PAGE_NOT_PRESENT:
240         case KVM_PV_REASON_PAGE_READY:
241                 break;
242         default:
243                 return false;
244         }
245
246         rcu_exit = idtentry_enter_cond_rcu(regs);
247         instrumentation_begin();
248
249         /*
250          * If the host managed to inject an async #PF into an interrupt
251          * disabled region, then die hard as this is not going to end well
252          * and the host side is seriously broken.
253          */
254         if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
255                 panic("Host injected async #PF in interrupt disabled region\n");
256
257         if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
258                 if (unlikely(!(user_mode(regs))))
259                         panic("Host injected async #PF in kernel mode\n");
260                 /* Page is swapped out by the host. */
261                 kvm_async_pf_task_wait_schedule(token);
262         } else {
263                 kvm_async_pf_task_wake(token);
264         }
265
266         instrumentation_end();
267         idtentry_exit_cond_rcu(regs, rcu_exit);
268         return true;
269 }
270
271 static void __init paravirt_ops_setup(void)
272 {
273         pv_info.name = "KVM";
274
275         if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
276                 pv_ops.cpu.io_delay = kvm_io_delay;
277
278 #ifdef CONFIG_X86_IO_APIC
279         no_timer_check = 1;
280 #endif
281 }
282
283 static void kvm_register_steal_time(void)
284 {
285         int cpu = smp_processor_id();
286         struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
287
288         if (!has_steal_clock)
289                 return;
290
291         wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
292         pr_info("kvm-stealtime: cpu %d, msr %llx\n",
293                 cpu, (unsigned long long) slow_virt_to_phys(st));
294 }
295
296 static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
297
298 static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
299 {
300         /**
301          * This relies on __test_and_clear_bit to modify the memory
302          * in a way that is atomic with respect to the local CPU.
303          * The hypervisor only accesses this memory from the local CPU so
304          * there's no need for lock or memory barriers.
305          * An optimization barrier is implied in apic write.
306          */
307         if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
308                 return;
309         apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
310 }
311
312 static void kvm_guest_cpu_init(void)
313 {
314         if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
315                 u64 pa;
316
317                 WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
318
319                 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
320                 pa |= KVM_ASYNC_PF_ENABLED;
321
322                 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
323                         pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
324
325                 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
326                 __this_cpu_write(apf_reason.enabled, 1);
327                 pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
328         }
329
330         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
331                 unsigned long pa;
332
333                 /* Size alignment is implied but just to make it explicit. */
334                 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
335                 __this_cpu_write(kvm_apic_eoi, 0);
336                 pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
337                         | KVM_MSR_ENABLED;
338                 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
339         }
340
341         if (has_steal_clock)
342                 kvm_register_steal_time();
343 }
344
345 static void kvm_pv_disable_apf(void)
346 {
347         if (!__this_cpu_read(apf_reason.enabled))
348                 return;
349
350         wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
351         __this_cpu_write(apf_reason.enabled, 0);
352
353         pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
354 }
355
356 static void kvm_pv_guest_cpu_reboot(void *unused)
357 {
358         /*
359          * We disable PV EOI before we load a new kernel by kexec,
360          * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
361          * New kernel can re-enable when it boots.
362          */
363         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
364                 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
365         kvm_pv_disable_apf();
366         kvm_disable_steal_time();
367 }
368
369 static int kvm_pv_reboot_notify(struct notifier_block *nb,
370                                 unsigned long code, void *unused)
371 {
372         if (code == SYS_RESTART)
373                 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
374         return NOTIFY_DONE;
375 }
376
377 static struct notifier_block kvm_pv_reboot_nb = {
378         .notifier_call = kvm_pv_reboot_notify,
379 };
380
381 static u64 kvm_steal_clock(int cpu)
382 {
383         u64 steal;
384         struct kvm_steal_time *src;
385         int version;
386
387         src = &per_cpu(steal_time, cpu);
388         do {
389                 version = src->version;
390                 virt_rmb();
391                 steal = src->steal;
392                 virt_rmb();
393         } while ((version & 1) || (version != src->version));
394
395         return steal;
396 }
397
398 void kvm_disable_steal_time(void)
399 {
400         if (!has_steal_clock)
401                 return;
402
403         wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
404 }
405
406 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
407 {
408         early_set_memory_decrypted((unsigned long) ptr, size);
409 }
410
411 /*
412  * Iterate through all possible CPUs and map the memory region pointed
413  * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
414  *
415  * Note: we iterate through all possible CPUs to ensure that CPUs
416  * hotplugged will have their per-cpu variable already mapped as
417  * decrypted.
418  */
419 static void __init sev_map_percpu_data(void)
420 {
421         int cpu;
422
423         if (!sev_active())
424                 return;
425
426         for_each_possible_cpu(cpu) {
427                 __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
428                 __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
429                 __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
430         }
431 }
432
433 static bool pv_tlb_flush_supported(void)
434 {
435         return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
436                 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
437                 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
438 }
439
440 static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
441
442 #ifdef CONFIG_SMP
443
444 static bool pv_ipi_supported(void)
445 {
446         return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
447 }
448
449 static bool pv_sched_yield_supported(void)
450 {
451         return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
452                 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
453             kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
454 }
455
456 #define KVM_IPI_CLUSTER_SIZE    (2 * BITS_PER_LONG)
457
458 static void __send_ipi_mask(const struct cpumask *mask, int vector)
459 {
460         unsigned long flags;
461         int cpu, apic_id, icr;
462         int min = 0, max = 0;
463 #ifdef CONFIG_X86_64
464         __uint128_t ipi_bitmap = 0;
465 #else
466         u64 ipi_bitmap = 0;
467 #endif
468         long ret;
469
470         if (cpumask_empty(mask))
471                 return;
472
473         local_irq_save(flags);
474
475         switch (vector) {
476         default:
477                 icr = APIC_DM_FIXED | vector;
478                 break;
479         case NMI_VECTOR:
480                 icr = APIC_DM_NMI;
481                 break;
482         }
483
484         for_each_cpu(cpu, mask) {
485                 apic_id = per_cpu(x86_cpu_to_apicid, cpu);
486                 if (!ipi_bitmap) {
487                         min = max = apic_id;
488                 } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
489                         ipi_bitmap <<= min - apic_id;
490                         min = apic_id;
491                 } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
492                         max = apic_id < max ? max : apic_id;
493                 } else {
494                         ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
495                                 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
496                         WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
497                         min = max = apic_id;
498                         ipi_bitmap = 0;
499                 }
500                 __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
501         }
502
503         if (ipi_bitmap) {
504                 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
505                         (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
506                 WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
507         }
508
509         local_irq_restore(flags);
510 }
511
512 static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
513 {
514         __send_ipi_mask(mask, vector);
515 }
516
517 static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
518 {
519         unsigned int this_cpu = smp_processor_id();
520         struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
521         const struct cpumask *local_mask;
522
523         cpumask_copy(new_mask, mask);
524         cpumask_clear_cpu(this_cpu, new_mask);
525         local_mask = new_mask;
526         __send_ipi_mask(local_mask, vector);
527 }
528
529 /*
530  * Set the IPI entry points
531  */
532 static void kvm_setup_pv_ipi(void)
533 {
534         apic->send_IPI_mask = kvm_send_ipi_mask;
535         apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
536         pr_info("KVM setup pv IPIs\n");
537 }
538
539 static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
540 {
541         int cpu;
542
543         native_send_call_func_ipi(mask);
544
545         /* Make sure other vCPUs get a chance to run if they need to. */
546         for_each_cpu(cpu, mask) {
547                 if (vcpu_is_preempted(cpu)) {
548                         kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
549                         break;
550                 }
551         }
552 }
553
554 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
555 {
556         native_smp_prepare_cpus(max_cpus);
557         if (kvm_para_has_hint(KVM_HINTS_REALTIME))
558                 static_branch_disable(&virt_spin_lock_key);
559 }
560
561 static void __init kvm_smp_prepare_boot_cpu(void)
562 {
563         /*
564          * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
565          * shares the guest physical address with the hypervisor.
566          */
567         sev_map_percpu_data();
568
569         kvm_guest_cpu_init();
570         native_smp_prepare_boot_cpu();
571         kvm_spinlock_init();
572 }
573
574 static void kvm_guest_cpu_offline(void)
575 {
576         kvm_disable_steal_time();
577         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
578                 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
579         kvm_pv_disable_apf();
580         apf_task_wake_all();
581 }
582
583 static int kvm_cpu_online(unsigned int cpu)
584 {
585         local_irq_disable();
586         kvm_guest_cpu_init();
587         local_irq_enable();
588         return 0;
589 }
590
591 static int kvm_cpu_down_prepare(unsigned int cpu)
592 {
593         local_irq_disable();
594         kvm_guest_cpu_offline();
595         local_irq_enable();
596         return 0;
597 }
598 #endif
599
600 static void kvm_flush_tlb_others(const struct cpumask *cpumask,
601                         const struct flush_tlb_info *info)
602 {
603         u8 state;
604         int cpu;
605         struct kvm_steal_time *src;
606         struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
607
608         cpumask_copy(flushmask, cpumask);
609         /*
610          * We have to call flush only on online vCPUs. And
611          * queue flush_on_enter for pre-empted vCPUs
612          */
613         for_each_cpu(cpu, flushmask) {
614                 src = &per_cpu(steal_time, cpu);
615                 state = READ_ONCE(src->preempted);
616                 if ((state & KVM_VCPU_PREEMPTED)) {
617                         if (try_cmpxchg(&src->preempted, &state,
618                                         state | KVM_VCPU_FLUSH_TLB))
619                                 __cpumask_clear_cpu(cpu, flushmask);
620                 }
621         }
622
623         native_flush_tlb_others(flushmask, info);
624 }
625
626 static void __init kvm_guest_init(void)
627 {
628         int i;
629
630         paravirt_ops_setup();
631         register_reboot_notifier(&kvm_pv_reboot_nb);
632         for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
633                 raw_spin_lock_init(&async_pf_sleepers[i].lock);
634
635         if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
636                 has_steal_clock = 1;
637                 pv_ops.time.steal_clock = kvm_steal_clock;
638         }
639
640         if (pv_tlb_flush_supported()) {
641                 pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
642                 pv_ops.mmu.tlb_remove_table = tlb_remove_table;
643                 pr_info("KVM setup pv remote TLB flush\n");
644         }
645
646         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
647                 apic_set_eoi_write(kvm_guest_apic_eoi_write);
648
649         if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
650                 static_branch_enable(&kvm_async_pf_enabled);
651
652 #ifdef CONFIG_SMP
653         smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
654         smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
655         if (pv_sched_yield_supported()) {
656                 smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
657                 pr_info("KVM setup pv sched yield\n");
658         }
659         if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
660                                       kvm_cpu_online, kvm_cpu_down_prepare) < 0)
661                 pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
662 #else
663         sev_map_percpu_data();
664         kvm_guest_cpu_init();
665 #endif
666
667         /*
668          * Hard lockup detection is enabled by default. Disable it, as guests
669          * can get false positives too easily, for example if the host is
670          * overcommitted.
671          */
672         hardlockup_detector_disable();
673 }
674
675 static noinline uint32_t __kvm_cpuid_base(void)
676 {
677         if (boot_cpu_data.cpuid_level < 0)
678                 return 0;       /* So we don't blow up on old processors */
679
680         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
681                 return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
682
683         return 0;
684 }
685
686 static inline uint32_t kvm_cpuid_base(void)
687 {
688         static int kvm_cpuid_base = -1;
689
690         if (kvm_cpuid_base == -1)
691                 kvm_cpuid_base = __kvm_cpuid_base();
692
693         return kvm_cpuid_base;
694 }
695
696 bool kvm_para_available(void)
697 {
698         return kvm_cpuid_base() != 0;
699 }
700 EXPORT_SYMBOL_GPL(kvm_para_available);
701
702 unsigned int kvm_arch_para_features(void)
703 {
704         return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
705 }
706
707 unsigned int kvm_arch_para_hints(void)
708 {
709         return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
710 }
711 EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
712
713 static uint32_t __init kvm_detect(void)
714 {
715         return kvm_cpuid_base();
716 }
717
718 static void __init kvm_apic_init(void)
719 {
720 #if defined(CONFIG_SMP)
721         if (pv_ipi_supported())
722                 kvm_setup_pv_ipi();
723 #endif
724 }
725
726 static void __init kvm_init_platform(void)
727 {
728         kvmclock_init();
729         x86_platform.apic_post_init = kvm_apic_init;
730 }
731
732 const __initconst struct hypervisor_x86 x86_hyper_kvm = {
733         .name                   = "KVM",
734         .detect                 = kvm_detect,
735         .type                   = X86_HYPER_KVM,
736         .init.guest_late_init   = kvm_guest_init,
737         .init.x2apic_available  = kvm_para_available,
738         .init.init_platform     = kvm_init_platform,
739 };
740
741 static __init int activate_jump_labels(void)
742 {
743         if (has_steal_clock) {
744                 static_key_slow_inc(&paravirt_steal_enabled);
745                 if (steal_acc)
746                         static_key_slow_inc(&paravirt_steal_rq_enabled);
747         }
748
749         return 0;
750 }
751 arch_initcall(activate_jump_labels);
752
753 static __init int kvm_alloc_cpumask(void)
754 {
755         int cpu;
756         bool alloc = false;
757
758         if (!kvm_para_available() || nopv)
759                 return 0;
760
761         if (pv_tlb_flush_supported())
762                 alloc = true;
763
764 #if defined(CONFIG_SMP)
765         if (pv_ipi_supported())
766                 alloc = true;
767 #endif
768
769         if (alloc)
770                 for_each_possible_cpu(cpu) {
771                         zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
772                                 GFP_KERNEL, cpu_to_node(cpu));
773                 }
774
775         return 0;
776 }
777 arch_initcall(kvm_alloc_cpumask);
778
779 #ifdef CONFIG_PARAVIRT_SPINLOCKS
780
781 /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
782 static void kvm_kick_cpu(int cpu)
783 {
784         int apicid;
785         unsigned long flags = 0;
786
787         apicid = per_cpu(x86_cpu_to_apicid, cpu);
788         kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
789 }
790
791 #include <asm/qspinlock.h>
792
793 static void kvm_wait(u8 *ptr, u8 val)
794 {
795         unsigned long flags;
796
797         if (in_nmi())
798                 return;
799
800         local_irq_save(flags);
801
802         if (READ_ONCE(*ptr) != val)
803                 goto out;
804
805         /*
806          * halt until it's our turn and kicked. Note that we do safe halt
807          * for irq enabled case to avoid hang when lock info is overwritten
808          * in irq spinlock slowpath and no spurious interrupt occur to save us.
809          */
810         if (arch_irqs_disabled_flags(flags))
811                 halt();
812         else
813                 safe_halt();
814
815 out:
816         local_irq_restore(flags);
817 }
818
819 #ifdef CONFIG_X86_32
820 __visible bool __kvm_vcpu_is_preempted(long cpu)
821 {
822         struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
823
824         return !!(src->preempted & KVM_VCPU_PREEMPTED);
825 }
826 PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
827
828 #else
829
830 #include <asm/asm-offsets.h>
831
832 extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
833
834 /*
835  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
836  * restoring to/from the stack.
837  */
838 asm(
839 ".pushsection .text;"
840 ".global __raw_callee_save___kvm_vcpu_is_preempted;"
841 ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
842 "__raw_callee_save___kvm_vcpu_is_preempted:"
843 "movq   __per_cpu_offset(,%rdi,8), %rax;"
844 "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
845 "setne  %al;"
846 "ret;"
847 ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
848 ".popsection");
849
850 #endif
851
852 /*
853  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
854  */
855 void __init kvm_spinlock_init(void)
856 {
857         /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
858         if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
859                 return;
860
861         if (kvm_para_has_hint(KVM_HINTS_REALTIME))
862                 return;
863
864         /* Don't use the pvqspinlock code if there is only 1 vCPU. */
865         if (num_possible_cpus() == 1)
866                 return;
867
868         __pv_init_lock_hash();
869         pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
870         pv_ops.lock.queued_spin_unlock =
871                 PV_CALLEE_SAVE(__pv_queued_spin_unlock);
872         pv_ops.lock.wait = kvm_wait;
873         pv_ops.lock.kick = kvm_kick_cpu;
874
875         if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
876                 pv_ops.lock.vcpu_is_preempted =
877                         PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
878         }
879 }
880
881 #endif  /* CONFIG_PARAVIRT_SPINLOCKS */
882
883 #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
884
885 static void kvm_disable_host_haltpoll(void *i)
886 {
887         wrmsrl(MSR_KVM_POLL_CONTROL, 0);
888 }
889
890 static void kvm_enable_host_haltpoll(void *i)
891 {
892         wrmsrl(MSR_KVM_POLL_CONTROL, 1);
893 }
894
895 void arch_haltpoll_enable(unsigned int cpu)
896 {
897         if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
898                 pr_err_once("kvm: host does not support poll control\n");
899                 pr_err_once("kvm: host upgrade recommended\n");
900                 return;
901         }
902
903         /* Enable guest halt poll disables host halt poll */
904         smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
905 }
906 EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
907
908 void arch_haltpoll_disable(unsigned int cpu)
909 {
910         if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
911                 return;
912
913         /* Enable guest halt poll disables host halt poll */
914         smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
915 }
916 EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
917 #endif