Drivers: hv: vmbus: Support fully enlightened TDX guests
[platform/kernel/linux-starfive.git] / drivers / hv / hv.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * Authors:
6  *   Haiyang Zhang <haiyangz@microsoft.com>
7  *   Hank Janssen  <hjanssen@microsoft.com>
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/io.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/hyperv.h>
17 #include <linux/random.h>
18 #include <linux/clockchips.h>
19 #include <linux/delay.h>
20 #include <linux/interrupt.h>
21 #include <clocksource/hyperv_timer.h>
22 #include <asm/mshyperv.h>
23 #include <linux/set_memory.h>
24 #include "hyperv_vmbus.h"
25
26 /* The one and only */
27 struct hv_context hv_context;
28
29 /*
30  * hv_init - Main initialization routine.
31  *
32  * This routine must be called before any other routines in here are called
33  */
34 int hv_init(void)
35 {
36         hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
37         if (!hv_context.cpu_context)
38                 return -ENOMEM;
39         return 0;
40 }
41
42 /*
43  * hv_post_message - Post a message using the hypervisor message IPC.
44  *
45  * This involves a hypercall.
46  */
47 int hv_post_message(union hv_connection_id connection_id,
48                   enum hv_message_type message_type,
49                   void *payload, size_t payload_size)
50 {
51         struct hv_input_post_message *aligned_msg;
52         unsigned long flags;
53         u64 status;
54
55         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
56                 return -EMSGSIZE;
57
58         local_irq_save(flags);
59
60         aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
61         aligned_msg->connectionid = connection_id;
62         aligned_msg->reserved = 0;
63         aligned_msg->message_type = message_type;
64         aligned_msg->payload_size = payload_size;
65         memcpy((void *)aligned_msg->payload, payload, payload_size);
66
67         if (hv_isolation_type_snp())
68                 status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
69                                 (void *)aligned_msg, NULL,
70                                 sizeof(*aligned_msg));
71         else
72                 status = hv_do_hypercall(HVCALL_POST_MESSAGE,
73                                 aligned_msg, NULL);
74
75         local_irq_restore(flags);
76
77         return hv_result(status);
78 }
79
80 int hv_synic_alloc(void)
81 {
82         int cpu, ret = -ENOMEM;
83         struct hv_per_cpu_context *hv_cpu;
84
85         /*
86          * First, zero all per-cpu memory areas so hv_synic_free() can
87          * detect what memory has been allocated and cleanup properly
88          * after any failures.
89          */
90         for_each_present_cpu(cpu) {
91                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
92                 memset(hv_cpu, 0, sizeof(*hv_cpu));
93         }
94
95         hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
96                                          GFP_KERNEL);
97         if (hv_context.hv_numa_map == NULL) {
98                 pr_err("Unable to allocate NUMA map\n");
99                 goto err;
100         }
101
102         for_each_present_cpu(cpu) {
103                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
104
105                 tasklet_init(&hv_cpu->msg_dpc,
106                              vmbus_on_msg_dpc, (unsigned long) hv_cpu);
107
108                 /*
109                  * Synic message and event pages are allocated by paravisor.
110                  * Skip these pages allocation here.
111                  */
112                 if (!hv_isolation_type_snp() && !hv_root_partition) {
113                         hv_cpu->synic_message_page =
114                                 (void *)get_zeroed_page(GFP_ATOMIC);
115                         if (hv_cpu->synic_message_page == NULL) {
116                                 pr_err("Unable to allocate SYNIC message page\n");
117                                 goto err;
118                         }
119
120                         hv_cpu->synic_event_page =
121                                 (void *)get_zeroed_page(GFP_ATOMIC);
122                         if (hv_cpu->synic_event_page == NULL) {
123                                 pr_err("Unable to allocate SYNIC event page\n");
124
125                                 free_page((unsigned long)hv_cpu->synic_message_page);
126                                 hv_cpu->synic_message_page = NULL;
127                                 goto err;
128                         }
129                 }
130
131                 if (!ms_hyperv.paravisor_present &&
132                     (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
133                         ret = set_memory_decrypted((unsigned long)
134                                 hv_cpu->synic_message_page, 1);
135                         if (ret) {
136                                 pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
137                                 hv_cpu->synic_message_page = NULL;
138
139                                 /*
140                                  * Free the event page here so that hv_synic_free()
141                                  * won't later try to re-encrypt it.
142                                  */
143                                 free_page((unsigned long)hv_cpu->synic_event_page);
144                                 hv_cpu->synic_event_page = NULL;
145                                 goto err;
146                         }
147
148                         ret = set_memory_decrypted((unsigned long)
149                                 hv_cpu->synic_event_page, 1);
150                         if (ret) {
151                                 pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
152                                 hv_cpu->synic_event_page = NULL;
153                                 goto err;
154                         }
155
156                         memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
157                         memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
158                 }
159         }
160
161         return 0;
162
163 err:
164         /*
165          * Any memory allocations that succeeded will be freed when
166          * the caller cleans up by calling hv_synic_free()
167          */
168         return ret;
169 }
170
171
172 void hv_synic_free(void)
173 {
174         int cpu, ret;
175
176         for_each_present_cpu(cpu) {
177                 struct hv_per_cpu_context *hv_cpu
178                         = per_cpu_ptr(hv_context.cpu_context, cpu);
179
180                 /* It's better to leak the page if the encryption fails. */
181                 if (!ms_hyperv.paravisor_present &&
182                     (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
183                         if (hv_cpu->synic_message_page) {
184                                 ret = set_memory_encrypted((unsigned long)
185                                         hv_cpu->synic_message_page, 1);
186                                 if (ret) {
187                                         pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
188                                         hv_cpu->synic_message_page = NULL;
189                                 }
190                         }
191
192                         if (hv_cpu->synic_event_page) {
193                                 ret = set_memory_encrypted((unsigned long)
194                                         hv_cpu->synic_event_page, 1);
195                                 if (ret) {
196                                         pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
197                                         hv_cpu->synic_event_page = NULL;
198                                 }
199                         }
200                 }
201
202                 free_page((unsigned long)hv_cpu->synic_event_page);
203                 free_page((unsigned long)hv_cpu->synic_message_page);
204         }
205
206         kfree(hv_context.hv_numa_map);
207 }
208
209 /*
210  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
211  *
212  * If it is already initialized by another entity (ie x2v shim), we need to
213  * retrieve the initialized message and event pages.  Otherwise, we create and
214  * initialize the message and event pages.
215  */
216 void hv_synic_enable_regs(unsigned int cpu)
217 {
218         struct hv_per_cpu_context *hv_cpu
219                 = per_cpu_ptr(hv_context.cpu_context, cpu);
220         union hv_synic_simp simp;
221         union hv_synic_siefp siefp;
222         union hv_synic_sint shared_sint;
223         union hv_synic_scontrol sctrl;
224
225         /* Setup the Synic's message page */
226         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
227         simp.simp_enabled = 1;
228
229         if (hv_isolation_type_snp() || hv_root_partition) {
230                 /* Mask out vTOM bit. ioremap_cache() maps decrypted */
231                 u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
232                                 ~ms_hyperv.shared_gpa_boundary;
233                 hv_cpu->synic_message_page
234                         = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
235                 if (!hv_cpu->synic_message_page)
236                         pr_err("Fail to map synic message page.\n");
237         } else {
238                 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
239                         >> HV_HYP_PAGE_SHIFT;
240         }
241
242         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
243
244         /* Setup the Synic's event page */
245         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
246         siefp.siefp_enabled = 1;
247
248         if (hv_isolation_type_snp() || hv_root_partition) {
249                 /* Mask out vTOM bit. ioremap_cache() maps decrypted */
250                 u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
251                                 ~ms_hyperv.shared_gpa_boundary;
252                 hv_cpu->synic_event_page
253                         = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
254                 if (!hv_cpu->synic_event_page)
255                         pr_err("Fail to map synic event page.\n");
256         } else {
257                 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
258                         >> HV_HYP_PAGE_SHIFT;
259         }
260
261         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
262
263         /* Setup the shared SINT. */
264         if (vmbus_irq != -1)
265                 enable_percpu_irq(vmbus_irq, 0);
266         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
267                                         VMBUS_MESSAGE_SINT);
268
269         shared_sint.vector = vmbus_interrupt;
270         shared_sint.masked = false;
271
272         /*
273          * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
274          * it doesn't provide a recommendation flag and AEOI must be disabled.
275          */
276 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
277         shared_sint.auto_eoi =
278                         !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
279 #else
280         shared_sint.auto_eoi = 0;
281 #endif
282         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
283                                 shared_sint.as_uint64);
284
285         /* Enable the global synic bit */
286         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
287         sctrl.enable = 1;
288
289         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
290 }
291
292 int hv_synic_init(unsigned int cpu)
293 {
294         hv_synic_enable_regs(cpu);
295
296         hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
297
298         return 0;
299 }
300
301 /*
302  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
303  */
304 void hv_synic_disable_regs(unsigned int cpu)
305 {
306         struct hv_per_cpu_context *hv_cpu
307                 = per_cpu_ptr(hv_context.cpu_context, cpu);
308         union hv_synic_sint shared_sint;
309         union hv_synic_simp simp;
310         union hv_synic_siefp siefp;
311         union hv_synic_scontrol sctrl;
312
313         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
314                                         VMBUS_MESSAGE_SINT);
315
316         shared_sint.masked = 1;
317
318         /* Need to correctly cleanup in the case of SMP!!! */
319         /* Disable the interrupt */
320         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
321                                 shared_sint.as_uint64);
322
323         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
324         /*
325          * In Isolation VM, sim and sief pages are allocated by
326          * paravisor. These pages also will be used by kdump
327          * kernel. So just reset enable bit here and keep page
328          * addresses.
329          */
330         simp.simp_enabled = 0;
331         if (hv_isolation_type_snp() || hv_root_partition) {
332                 iounmap(hv_cpu->synic_message_page);
333                 hv_cpu->synic_message_page = NULL;
334         } else {
335                 simp.base_simp_gpa = 0;
336         }
337
338         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
339
340         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
341         siefp.siefp_enabled = 0;
342
343         if (hv_isolation_type_snp() || hv_root_partition) {
344                 iounmap(hv_cpu->synic_event_page);
345                 hv_cpu->synic_event_page = NULL;
346         } else {
347                 siefp.base_siefp_gpa = 0;
348         }
349
350         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
351
352         /* Disable the global synic bit */
353         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
354         sctrl.enable = 0;
355         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
356
357         if (vmbus_irq != -1)
358                 disable_percpu_irq(vmbus_irq);
359 }
360
361 #define HV_MAX_TRIES 3
362 /*
363  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
364  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
365  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
366  *
367  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
368  * that the normal interrupt handling mechanism will find and process the channel interrupt
369  * "very soon", and in the process clear the bit.
370  */
371 static bool hv_synic_event_pending(void)
372 {
373         struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
374         union hv_synic_event_flags *event =
375                 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
376         unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
377         bool pending;
378         u32 relid;
379         int tries = 0;
380
381 retry:
382         pending = false;
383         for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
384                 /* Special case - VMBus channel protocol messages */
385                 if (relid == 0)
386                         continue;
387                 pending = true;
388                 break;
389         }
390         if (pending && tries++ < HV_MAX_TRIES) {
391                 usleep_range(10000, 20000);
392                 goto retry;
393         }
394         return pending;
395 }
396
397 int hv_synic_cleanup(unsigned int cpu)
398 {
399         struct vmbus_channel *channel, *sc;
400         bool channel_found = false;
401
402         if (vmbus_connection.conn_state != CONNECTED)
403                 goto always_cleanup;
404
405         /*
406          * Hyper-V does not provide a way to change the connect CPU once
407          * it is set; we must prevent the connect CPU from going offline
408          * while the VM is running normally. But in the panic or kexec()
409          * path where the vmbus is already disconnected, the CPU must be
410          * allowed to shut down.
411          */
412         if (cpu == VMBUS_CONNECT_CPU)
413                 return -EBUSY;
414
415         /*
416          * Search for channels which are bound to the CPU we're about to
417          * cleanup.  In case we find one and vmbus is still connected, we
418          * fail; this will effectively prevent CPU offlining.
419          *
420          * TODO: Re-bind the channels to different CPUs.
421          */
422         mutex_lock(&vmbus_connection.channel_mutex);
423         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
424                 if (channel->target_cpu == cpu) {
425                         channel_found = true;
426                         break;
427                 }
428                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
429                         if (sc->target_cpu == cpu) {
430                                 channel_found = true;
431                                 break;
432                         }
433                 }
434                 if (channel_found)
435                         break;
436         }
437         mutex_unlock(&vmbus_connection.channel_mutex);
438
439         if (channel_found)
440                 return -EBUSY;
441
442         /*
443          * channel_found == false means that any channels that were previously
444          * assigned to the CPU have been reassigned elsewhere with a call of
445          * vmbus_send_modifychannel().  Scan the event flags page looking for
446          * bits that are set and waiting with a timeout for vmbus_chan_sched()
447          * to process such bits.  If bits are still set after this operation
448          * and VMBus is connected, fail the CPU offlining operation.
449          */
450         if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
451                 return -EBUSY;
452
453 always_cleanup:
454         hv_stimer_legacy_cleanup(cpu);
455
456         hv_synic_disable_regs(cpu);
457
458         return 0;
459 }