drivers/hv/hv.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2009, Microsoft Corporation.
   4  *
   5  * Authors:
   6  *   Haiyang Zhang <haiyangz@microsoft.com>
   7  *   Hank Janssen  <hjanssen@microsoft.com>
   8  */
   9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11 #include <linux/io.h>
  12 #include <linux/kernel.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/vmalloc.h>
  16 #include <linux/hyperv.h>
  17 #include <linux/random.h>
  18 #include <linux/clockchips.h>
  19 #include <linux/delay.h>
  20 #include <linux/interrupt.h>
  21 #include <clocksource/hyperv_timer.h>
  22 #include <asm/mshyperv.h>
  23 #include "hyperv_vmbus.h"
  24
  25 /* The one and only */
  26 struct hv_context hv_context;
  27
  28 /*
  29  * hv_init - Main initialization routine.
  30  *
  31  * This routine must be called before any other routines in here are called
  32  */
  33 int hv_init(void)
  34 {
  35         hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
  36         if (!hv_context.cpu_context)
  37                 return -ENOMEM;
  38         return 0;
  39 }
  40
  41 /*
  42  * Functions for allocating and freeing memory with size and
  43  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
  44  * the guest page size may not be the same as the Hyper-V page
  45  * size. We depend upon kmalloc() aligning power-of-two size
  46  * allocations to the allocation size boundary, so that the
  47  * allocated memory appears to Hyper-V as a page of the size
  48  * it expects.
  49  */
  50
  51 void *hv_alloc_hyperv_page(void)
  52 {
  53         BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
  54
  55         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  56                 return (void *)__get_free_page(GFP_KERNEL);
  57         else
  58                 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  59 }
  60
  61 void *hv_alloc_hyperv_zeroed_page(void)
  62 {
  63         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  64                 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
  65         else
  66                 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  67 }
  68
  69 void hv_free_hyperv_page(unsigned long addr)
  70 {
  71         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  72                 free_page(addr);
  73         else
  74                 kfree((void *)addr);
  75 }
  76
  77 /*
  78  * hv_post_message - Post a message using the hypervisor message IPC.
  79  *
  80  * This involves a hypercall.
  81  */
  82 int hv_post_message(union hv_connection_id connection_id,
  83                   enum hv_message_type message_type,
  84                   void *payload, size_t payload_size)
  85 {
  86         struct hv_input_post_message *aligned_msg;
  87         struct hv_per_cpu_context *hv_cpu;
  88         u64 status;
  89
  90         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
  91                 return -EMSGSIZE;
  92
  93         hv_cpu = get_cpu_ptr(hv_context.cpu_context);
  94         aligned_msg = hv_cpu->post_msg_page;
  95         aligned_msg->connectionid = connection_id;
  96         aligned_msg->reserved = 0;
  97         aligned_msg->message_type = message_type;
  98         aligned_msg->payload_size = payload_size;
  99         memcpy((void *)aligned_msg->payload, payload, payload_size);
 100
 101         if (hv_isolation_type_snp())
 102                 status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
 103                                 (void *)aligned_msg, NULL,
 104                                 sizeof(*aligned_msg));
 105         else
 106                 status = hv_do_hypercall(HVCALL_POST_MESSAGE,
 107                                 aligned_msg, NULL);
 108
 109         /* Preemption must remain disabled until after the hypercall
 110          * so some other thread can't get scheduled onto this cpu and
 111          * corrupt the per-cpu post_msg_page
 112          */
 113         put_cpu_ptr(hv_cpu);
 114
 115         return hv_result(status);
 116 }
 117
 118 int hv_synic_alloc(void)
 119 {
 120         int cpu;
 121         struct hv_per_cpu_context *hv_cpu;
 122
 123         /*
 124          * First, zero all per-cpu memory areas so hv_synic_free() can
 125          * detect what memory has been allocated and cleanup properly
 126          * after any failures.
 127          */
 128         for_each_present_cpu(cpu) {
 129                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
 130                 memset(hv_cpu, 0, sizeof(*hv_cpu));
 131         }
 132
 133         hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
 134                                          GFP_KERNEL);
 135         if (hv_context.hv_numa_map == NULL) {
 136                 pr_err("Unable to allocate NUMA map\n");
 137                 goto err;
 138         }
 139
 140         for_each_present_cpu(cpu) {
 141                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
 142
 143                 tasklet_init(&hv_cpu->msg_dpc,
 144                              vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 145
 146                 /*
 147                  * Synic message and event pages are allocated by paravisor.
 148                  * Skip these pages allocation here.
 149                  */
 150                 if (!hv_isolation_type_snp()) {
 151                         hv_cpu->synic_message_page =
 152                                 (void *)get_zeroed_page(GFP_ATOMIC);
 153                         if (hv_cpu->synic_message_page == NULL) {
 154                                 pr_err("Unable to allocate SYNIC message page\n");
 155                                 goto err;
 156                         }
 157
 158                         hv_cpu->synic_event_page =
 159                                 (void *)get_zeroed_page(GFP_ATOMIC);
 160                         if (hv_cpu->synic_event_page == NULL) {
 161                                 pr_err("Unable to allocate SYNIC event page\n");
 162                                 goto err;
 163                         }
 164                 }
 165
 166                 hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
 167                 if (hv_cpu->post_msg_page == NULL) {
 168                         pr_err("Unable to allocate post msg page\n");
 169                         goto err;
 170                 }
 171         }
 172
 173         return 0;
 174 err:
 175         /*
 176          * Any memory allocations that succeeded will be freed when
 177          * the caller cleans up by calling hv_synic_free()
 178          */
 179         return -ENOMEM;
 180 }
 181
 182
 183 void hv_synic_free(void)
 184 {
 185         int cpu;
 186
 187         for_each_present_cpu(cpu) {
 188                 struct hv_per_cpu_context *hv_cpu
 189                         = per_cpu_ptr(hv_context.cpu_context, cpu);
 190
 191                 free_page((unsigned long)hv_cpu->synic_event_page);
 192                 free_page((unsigned long)hv_cpu->synic_message_page);
 193                 free_page((unsigned long)hv_cpu->post_msg_page);
 194         }
 195
 196         kfree(hv_context.hv_numa_map);
 197 }
 198
 199 /*
 200  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
 201  *
 202  * If it is already initialized by another entity (ie x2v shim), we need to
 203  * retrieve the initialized message and event pages.  Otherwise, we create and
 204  * initialize the message and event pages.
 205  */
 206 void hv_synic_enable_regs(unsigned int cpu)
 207 {
 208         struct hv_per_cpu_context *hv_cpu
 209                 = per_cpu_ptr(hv_context.cpu_context, cpu);
 210         union hv_synic_simp simp;
 211         union hv_synic_siefp siefp;
 212         union hv_synic_sint shared_sint;
 213         union hv_synic_scontrol sctrl;
 214
 215         /* Setup the Synic's message page */
 216         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
 217         simp.simp_enabled = 1;
 218
 219         if (hv_isolation_type_snp()) {
 220                 hv_cpu->synic_message_page
 221                         = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
 222                                    HV_HYP_PAGE_SIZE, MEMREMAP_WB);
 223                 if (!hv_cpu->synic_message_page)
 224                         pr_err("Fail to map syinc message page.\n");
 225         } else {
 226                 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
 227                         >> HV_HYP_PAGE_SHIFT;
 228         }
 229
 230         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
 231
 232         /* Setup the Synic's event page */
 233         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 234         siefp.siefp_enabled = 1;
 235
 236         if (hv_isolation_type_snp()) {
 237                 hv_cpu->synic_event_page =
 238                         memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
 239                                  HV_HYP_PAGE_SIZE, MEMREMAP_WB);
 240
 241                 if (!hv_cpu->synic_event_page)
 242                         pr_err("Fail to map syinc event page.\n");
 243         } else {
 244                 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
 245                         >> HV_HYP_PAGE_SHIFT;
 246         }
 247
 248         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
 249
 250         /* Setup the shared SINT. */
 251         if (vmbus_irq != -1)
 252                 enable_percpu_irq(vmbus_irq, 0);
 253         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
 254                                         VMBUS_MESSAGE_SINT);
 255
 256         shared_sint.vector = vmbus_interrupt;
 257         shared_sint.masked = false;
 258
 259         /*
 260          * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
 261          * it doesn't provide a recommendation flag and AEOI must be disabled.
 262          */
 263 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
 264         shared_sint.auto_eoi =
 265                         !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
 266 #else
 267         shared_sint.auto_eoi = 0;
 268 #endif
 269         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
 270                                 shared_sint.as_uint64);
 271
 272         /* Enable the global synic bit */
 273         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
 274         sctrl.enable = 1;
 275
 276         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
 277 }
 278
 279 int hv_synic_init(unsigned int cpu)
 280 {
 281         hv_synic_enable_regs(cpu);
 282
 283         hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
 284
 285         return 0;
 286 }
 287
 288 /*
 289  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
 290  */
 291 void hv_synic_disable_regs(unsigned int cpu)
 292 {
 293         struct hv_per_cpu_context *hv_cpu
 294                 = per_cpu_ptr(hv_context.cpu_context, cpu);
 295         union hv_synic_sint shared_sint;
 296         union hv_synic_simp simp;
 297         union hv_synic_siefp siefp;
 298         union hv_synic_scontrol sctrl;
 299
 300         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
 301                                         VMBUS_MESSAGE_SINT);
 302
 303         shared_sint.masked = 1;
 304
 305         /* Need to correctly cleanup in the case of SMP!!! */
 306         /* Disable the interrupt */
 307         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
 308                                 shared_sint.as_uint64);
 309
 310         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
 311         /*
 312          * In Isolation VM, sim and sief pages are allocated by
 313          * paravisor. These pages also will be used by kdump
 314          * kernel. So just reset enable bit here and keep page
 315          * addresses.
 316          */
 317         simp.simp_enabled = 0;
 318         if (hv_isolation_type_snp())
 319                 memunmap(hv_cpu->synic_message_page);
 320         else
 321                 simp.base_simp_gpa = 0;
 322
 323         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
 324
 325         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 326         siefp.siefp_enabled = 0;
 327
 328         if (hv_isolation_type_snp())
 329                 memunmap(hv_cpu->synic_event_page);
 330         else
 331                 siefp.base_siefp_gpa = 0;
 332
 333         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
 334
 335         /* Disable the global synic bit */
 336         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
 337         sctrl.enable = 0;
 338         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
 339
 340         if (vmbus_irq != -1)
 341                 disable_percpu_irq(vmbus_irq);
 342 }
 343
 344 #define HV_MAX_TRIES 3
 345 /*
 346  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
 347  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
 348  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
 349  *
 350  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
 351  * that the normal interrupt handling mechanism will find and process the channel interrupt
 352  * "very soon", and in the process clear the bit.
 353  */
 354 static bool hv_synic_event_pending(void)
 355 {
 356         struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
 357         union hv_synic_event_flags *event =
 358                 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
 359         unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
 360         bool pending;
 361         u32 relid;
 362         int tries = 0;
 363
 364 retry:
 365         pending = false;
 366         for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
 367                 /* Special case - VMBus channel protocol messages */
 368                 if (relid == 0)
 369                         continue;
 370                 pending = true;
 371                 break;
 372         }
 373         if (pending && tries++ < HV_MAX_TRIES) {
 374                 usleep_range(10000, 20000);
 375                 goto retry;
 376         }
 377         return pending;
 378 }
 379
 380 int hv_synic_cleanup(unsigned int cpu)
 381 {
 382         struct vmbus_channel *channel, *sc;
 383         bool channel_found = false;
 384
 385         if (vmbus_connection.conn_state != CONNECTED)
 386                 goto always_cleanup;
 387
 388         /*
 389          * Hyper-V does not provide a way to change the connect CPU once
 390          * it is set; we must prevent the connect CPU from going offline
 391          * while the VM is running normally. But in the panic or kexec()
 392          * path where the vmbus is already disconnected, the CPU must be
 393          * allowed to shut down.
 394          */
 395         if (cpu == VMBUS_CONNECT_CPU)
 396                 return -EBUSY;
 397
 398         /*
 399          * Search for channels which are bound to the CPU we're about to
 400          * cleanup.  In case we find one and vmbus is still connected, we
 401          * fail; this will effectively prevent CPU offlining.
 402          *
 403          * TODO: Re-bind the channels to different CPUs.
 404          */
 405         mutex_lock(&vmbus_connection.channel_mutex);
 406         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 407                 if (channel->target_cpu == cpu) {
 408                         channel_found = true;
 409                         break;
 410                 }
 411                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
 412                         if (sc->target_cpu == cpu) {
 413                                 channel_found = true;
 414                                 break;
 415                         }
 416                 }
 417                 if (channel_found)
 418                         break;
 419         }
 420         mutex_unlock(&vmbus_connection.channel_mutex);
 421
 422         if (channel_found)
 423                 return -EBUSY;
 424
 425         /*
 426          * channel_found == false means that any channels that were previously
 427          * assigned to the CPU have been reassigned elsewhere with a call of
 428          * vmbus_send_modifychannel().  Scan the event flags page looking for
 429          * bits that are set and waiting with a timeout for vmbus_chan_sched()
 430          * to process such bits.  If bits are still set after this operation
 431          * and VMBus is connected, fail the CPU offlining operation.
 432          */
 433         if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
 434                 return -EBUSY;
 435
 436 always_cleanup:
 437         hv_stimer_legacy_cleanup(cpu);
 438
 439         hv_synic_disable_regs(cpu);
 440
 441         return 0;
 442 }