Merge tag 'hyperv-next-signed-20210629' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 18:21:35 +0000 (11:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 18:21:35 +0000 (11:21 -0700)
Pull hyperv updates from Wei Liu:
 "Just a few minor enhancement patches and bug fixes"

* tag 'hyperv-next-signed-20210629' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  PCI: hv: Add check for hyperv_initialized in init_hv_pci_drv()
  Drivers: hv: Move Hyper-V extended capability check to arch neutral code
  drivers: hv: Fix missing error code in vmbus_connect()
  x86/hyperv: fix logical processor creation
  hv_utils: Fix passing zero to 'PTR_ERR' warning
  scsi: storvsc: Use blk_mq_unique_tag() to generate requestIDs
  Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer
  hv_balloon: Remove redundant assignment to region_start

20 files changed:
arch/x86/hyperv/hv_init.c
arch/x86/kernel/cpu/mshyperv.c
drivers/Makefile
drivers/hv/Makefile
drivers/hv/channel.c
drivers/hv/connection.c
drivers/hv/hv_balloon.c
drivers/hv/hv_common.c [new file with mode: 0644]
drivers/hv/hv_fcopy.c
drivers/hv/hv_kvp.c
drivers/hv/hv_util.c
drivers/hv/hyperv_vmbus.h
drivers/hv/ring_buffer.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/rndis_filter.c
drivers/pci/controller/pci-hyperv.c
drivers/scsi/storvsc_drv.c
include/linux/hyperv.h
net/vmw_vsock/hyperv_transport.c

index 256ad0e..6952e21 100644 (file)
@@ -614,50 +614,3 @@ bool hv_is_isolation_supported(void)
        return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
-
-/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
-bool hv_query_ext_cap(u64 cap_query)
-{
-       /*
-        * The address of the 'hv_extended_cap' variable will be used as an
-        * output parameter to the hypercall below and so it should be
-        * compatible with 'virt_to_phys'. Which means, it's address should be
-        * directly mapped. Use 'static' to keep it compatible; stack variables
-        * can be virtually mapped, making them incompatible with
-        * 'virt_to_phys'.
-        * Hypercall input/output addresses should also be 8-byte aligned.
-        */
-       static u64 hv_extended_cap __aligned(8);
-       static bool hv_extended_cap_queried;
-       u64 status;
-
-       /*
-        * Querying extended capabilities is an extended hypercall. Check if the
-        * partition supports extended hypercall, first.
-        */
-       if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
-               return false;
-
-       /* Extended capabilities do not change at runtime. */
-       if (hv_extended_cap_queried)
-               return hv_extended_cap & cap_query;
-
-       status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
-                                &hv_extended_cap);
-
-       /*
-        * The query extended capabilities hypercall should not fail under
-        * any normal circumstances. Avoid repeatedly making the hypercall, on
-        * error.
-        */
-       hv_extended_cap_queried = true;
-       status &= HV_HYPERCALL_RESULT_MASK;
-       if (status != HV_STATUS_SUCCESS) {
-               pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
-                      status);
-               return false;
-       }
-
-       return hv_extended_cap & cap_query;
-}
-EXPORT_SYMBOL_GPL(hv_query_ext_cap);
index c268c27..01ca94f 100644 (file)
@@ -236,7 +236,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
        for_each_present_cpu(i) {
                if (i == 0)
                        continue;
-               ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
+               ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i);
                BUG_ON(ret);
        }
 
index f85185f..27c018b 100644 (file)
@@ -160,7 +160,7 @@ obj-$(CONFIG_SOUNDWIRE)             += soundwire/
 
 # Virtualization drivers
 obj-$(CONFIG_VIRT_DRIVERS)     += virt/
-obj-$(CONFIG_HYPERV)           += hv/
+obj-$(subst m,y,$(CONFIG_HYPERV))      += hv/
 
 obj-$(CONFIG_PM_DEVFREQ)       += devfreq/
 obj-$(CONFIG_EXTCON)           += extcon/
index 94daf82..d76df5c 100644 (file)
@@ -11,3 +11,6 @@ hv_vmbus-y := vmbus_drv.o \
                 channel_mgmt.o ring_buffer.o hv_trace.o
 hv_vmbus-$(CONFIG_HYPERV_TESTING)      += hv_debugfs.o
 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o
+
+# Code that must be built-in
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
index c2635e9..f3761c7 100644 (file)
@@ -662,12 +662,15 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
        newchannel->onchannel_callback = onchannelcallback;
        newchannel->channel_callback_context = context;
 
-       err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+       if (!newchannel->max_pkt_size)
+               newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE;
+
+       err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0);
        if (err)
                goto error_clean_ring;
 
-       err = hv_ringbuffer_init(&newchannel->inbound,
-                                &page[send_pages], recv_pages);
+       err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
+                                recv_pages, newchannel->max_pkt_size);
        if (err)
                goto error_clean_ring;
 
@@ -1186,15 +1189,14 @@ EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
  * vmbus_next_request_id - Returns a new request id. It is also
  * the index at which the guest memory address is stored.
  * Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @rqst_add: Guest memory address to be stored in the array
  */
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr)
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
 {
+       struct vmbus_requestor *rqstor = &channel->requestor;
        unsigned long flags;
        u64 current_id;
-       const struct vmbus_channel *channel =
-               container_of(rqstor, const struct vmbus_channel, requestor);
 
        /* Check rqstor has been initialized */
        if (!channel->rqstor_size)
@@ -1228,16 +1230,15 @@ EXPORT_SYMBOL_GPL(vmbus_next_request_id);
 /*
  * vmbus_request_addr - Returns the memory address stored at @trans_id
  * in @rqstor. Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
  * next request id.
  */
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id)
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id)
 {
+       struct vmbus_requestor *rqstor = &channel->requestor;
        unsigned long flags;
        u64 req_addr;
-       const struct vmbus_channel *channel =
-               container_of(rqstor, const struct vmbus_channel, requestor);
 
        /* Check rqstor has been initialized */
        if (!channel->rqstor_size)
index 311cd00..5e479d5 100644 (file)
@@ -232,8 +232,10 @@ int vmbus_connect(void)
         */
 
        for (i = 0; ; i++) {
-               if (i == ARRAY_SIZE(vmbus_versions))
+               if (i == ARRAY_SIZE(vmbus_versions)) {
+                       ret = -EDOM;
                        goto cleanup;
+               }
 
                version = vmbus_versions[i];
                if (version > max_version)
index 58af84e..7f11ea0 100644 (file)
@@ -1010,7 +1010,6 @@ static void hot_add_req(struct work_struct *dummy)
                 * that need to be hot-added while ensuring the alignment
                 * and size requirements of Linux as it relates to hot-add.
                 */
-               region_start = pg_start;
                region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
                if (pfn_cnt % HA_CHUNK)
                        region_size += HA_CHUNK;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
new file mode 100644 (file)
index 0000000..7f42da9
--- /dev/null
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Architecture neutral utility routines for interacting with
+ * Hyper-V. This file is specifically for code that must be
+ * built-in to the kernel image when CONFIG_HYPERV is set
+ * (vs. being in a module) because it is called from architecture
+ * specific code under arch/.
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/bitfield.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+
+/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
+bool hv_query_ext_cap(u64 cap_query)
+{
+       /*
+        * The address of the 'hv_extended_cap' variable will be used as an
+        * output parameter to the hypercall below and so it should be
+        * compatible with 'virt_to_phys'. Which means, it's address should be
+        * directly mapped. Use 'static' to keep it compatible; stack variables
+        * can be virtually mapped, making them incompatible with
+        * 'virt_to_phys'.
+        * Hypercall input/output addresses should also be 8-byte aligned.
+        */
+       static u64 hv_extended_cap __aligned(8);
+       static bool hv_extended_cap_queried;
+       u64 status;
+
+       /*
+        * Querying extended capabilities is an extended hypercall. Check if the
+        * partition supports extended hypercall, first.
+        */
+       if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
+               return false;
+
+       /* Extended capabilities do not change at runtime. */
+       if (hv_extended_cap_queried)
+               return hv_extended_cap & cap_query;
+
+       status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
+                                &hv_extended_cap);
+
+       /*
+        * The query extended capabilities hypercall should not fail under
+        * any normal circumstances. Avoid repeatedly making the hypercall, on
+        * error.
+        */
+       hv_extended_cap_queried = true;
+       if (!hv_result_success(status)) {
+               pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
+                      status);
+               return false;
+       }
+
+       return hv_extended_cap & cap_query;
+}
+EXPORT_SYMBOL_GPL(hv_query_ext_cap);
index 59ce85e..660036d 100644 (file)
@@ -349,6 +349,7 @@ int hv_fcopy_init(struct hv_util_service *srv)
 {
        recv_buffer = srv->recv_buffer;
        fcopy_transaction.recv_channel = srv->channel;
+       fcopy_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
 
        /*
         * When this driver loads, the user level daemon that
index b49962d..c698592 100644 (file)
@@ -757,6 +757,7 @@ hv_kvp_init(struct hv_util_service *srv)
 {
        recv_buffer = srv->recv_buffer;
        kvp_transaction.recv_channel = srv->channel;
+       kvp_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 4;
 
        /*
         * When this driver loads, the user level daemon that
index e4aefeb..136576c 100644 (file)
@@ -750,8 +750,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
         */
        hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
        if (IS_ERR_OR_NULL(hv_ptp_clock)) {
-               pr_err("cannot register PTP clock: %ld\n",
-                      PTR_ERR(hv_ptp_clock));
+               pr_err("cannot register PTP clock: %d\n",
+                      PTR_ERR_OR_ZERO(hv_ptp_clock));
                hv_ptp_clock = NULL;
        }
 
index 9416e09..42f3d9d 100644 (file)
@@ -174,7 +174,7 @@ extern int hv_synic_cleanup(unsigned int cpu);
 void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
 
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-                      struct page *pages, u32 pagecnt);
+                      struct page *pages, u32 pagecnt, u32 max_pkt_size);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
index 374f8af..2aee356 100644 (file)
@@ -181,7 +181,7 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-                      struct page *pages, u32 page_cnt)
+                      struct page *pages, u32 page_cnt, u32 max_pkt_size)
 {
        int i;
        struct page **pages_wraparound;
@@ -223,6 +223,14 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
                sizeof(struct hv_ring_buffer);
        ring_info->priv_read_index = 0;
 
+       /* Initialize buffer that holds copies of incoming packets */
+       if (max_pkt_size) {
+               ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL);
+               if (!ring_info->pkt_buffer)
+                       return -ENOMEM;
+               ring_info->pkt_buffer_size = max_pkt_size;
+       }
+
        spin_lock_init(&ring_info->ring_lock);
 
        return 0;
@@ -235,6 +243,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
        vunmap(ring_info->ring_buffer);
        ring_info->ring_buffer = NULL;
        mutex_unlock(&ring_info->ring_buffer_mutex);
+
+       kfree(ring_info->pkt_buffer);
+       ring_info->pkt_buffer_size = 0;
 }
 
 /* Write to the ring buffer. */
@@ -301,10 +312,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
         */
 
        if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
-               rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
-               if (rqst_id == VMBUS_RQST_ERROR) {
-                       spin_unlock_irqrestore(&outring_info->ring_lock, flags);
-                       return -EAGAIN;
+               if (channel->next_request_id_callback != NULL) {
+                       rqst_id = channel->next_request_id_callback(channel, requestid);
+                       if (rqst_id == VMBUS_RQST_ERROR) {
+                               spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+                               return -EAGAIN;
+                       }
                }
        }
        desc = hv_get_ring_buffer(outring_info) + old_write;
@@ -332,7 +345,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
        if (channel->rescind) {
                if (rqst_id != VMBUS_NO_RQSTOR) {
                        /* Reclaim request ID to avoid leak of IDs */
-                       vmbus_request_addr(&channel->requestor, rqst_id);
+                       if (channel->request_addr_callback != NULL)
+                               channel->request_addr_callback(channel, rqst_id);
                }
                return -ENODEV;
        }
@@ -375,7 +389,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
        memcpy(buffer, (const char *)desc + offset, packetlen);
 
        /* Advance ring index to next packet descriptor */
-       __hv_pkt_iter_next(channel, desc);
+       __hv_pkt_iter_next(channel, desc, true);
 
        /* Notify host of update */
        hv_pkt_iter_close(channel);
@@ -402,6 +416,22 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 }
 
 /*
+ * Get first vmbus packet without copying it out of the ring buffer
+ */
+struct vmpacket_descriptor *hv_pkt_iter_first_raw(struct vmbus_channel *channel)
+{
+       struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+       hv_debug_delay_test(channel, MESSAGE_DELAY);
+
+       if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+               return NULL;
+
+       return (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw);
+
+/*
  * Get first vmbus packet from ring buffer after read_index
  *
  * If ring buffer is empty, returns NULL and no other action needed.
@@ -409,17 +439,49 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
 {
        struct hv_ring_buffer_info *rbi = &channel->inbound;
-       struct vmpacket_descriptor *desc;
+       struct vmpacket_descriptor *desc, *desc_copy;
+       u32 bytes_avail, pkt_len, pkt_offset;
 
-       hv_debug_delay_test(channel, MESSAGE_DELAY);
-       if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+       desc = hv_pkt_iter_first_raw(channel);
+       if (!desc)
                return NULL;
 
-       desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
-       if (desc)
-               prefetch((char *)desc + (desc->len8 << 3));
+       bytes_avail = min(rbi->pkt_buffer_size, hv_pkt_iter_avail(rbi));
+
+       /*
+        * Ensure the compiler does not use references to incoming Hyper-V values (which
+        * could change at any moment) when reading local variables later in the code
+        */
+       pkt_len = READ_ONCE(desc->len8) << 3;
+       pkt_offset = READ_ONCE(desc->offset8) << 3;
+
+       /*
+        * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and
+        * rbi->pkt_buffer_size
+        */
+       if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail)
+               pkt_len = bytes_avail;
+
+       /*
+        * If pkt_offset is invalid, arbitrarily set it to
+        * the size of vmpacket_descriptor
+        */
+       if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len)
+               pkt_offset = sizeof(struct vmpacket_descriptor);
+
+       /* Copy the Hyper-V packet out of the ring buffer */
+       desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer;
+       memcpy(desc_copy, desc, pkt_len);
+
+       /*
+        * Hyper-V could still change len8 and offset8 after the earlier read.
+        * Ensure that desc_copy has legal values for len8 and offset8 that
+        * are consistent with the copy we just made
+        */
+       desc_copy->len8 = pkt_len >> 3;
+       desc_copy->offset8 = pkt_offset >> 3;
 
-       return desc;
+       return desc_copy;
 }
 EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
 
@@ -431,7 +493,8 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
  */
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-                  const struct vmpacket_descriptor *desc)
+                  const struct vmpacket_descriptor *desc,
+                  bool copy)
 {
        struct hv_ring_buffer_info *rbi = &channel->inbound;
        u32 packetlen = desc->len8 << 3;
@@ -444,7 +507,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
                rbi->priv_read_index -= dsize;
 
        /* more data? */
-       return hv_pkt_iter_first(channel);
+       return copy ? hv_pkt_iter_first(channel) : hv_pkt_iter_first_raw(channel);
 }
 EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
 
index 442c520..b11aa68 100644 (file)
@@ -895,9 +895,16 @@ static inline u32 netvsc_rqstor_size(unsigned long ringbytes)
                ringbytes / NETVSC_MIN_IN_MSG_SIZE;
 }
 
+/* XFER PAGE packets can specify a maximum of 375 ranges for NDIS >= 6.0
+ * and a maximum of 64 ranges for NDIS < 6.0 with no RSC; with RSC, this
+ * limit is raised to 562 (= NVSP_RSC_MAX).
+ */
+#define NETVSC_MAX_XFER_PAGE_RANGES NVSP_RSC_MAX
 #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
                (offsetof(struct vmtransfer_page_packet_header, ranges) + \
                (rng_cnt) * sizeof(struct vmtransfer_page_range))
+#define NETVSC_MAX_PKT_SIZE (NETVSC_XFER_HEADER_SIZE(NETVSC_MAX_XFER_PAGE_RANGES) + \
+               sizeof(struct nvsp_message) + (sizeof(u32) * VRSS_SEND_TAB_SIZE))
 
 struct multi_send_data {
        struct sk_buff *skb; /* skb containing the pkt */
index 9d07c9c..7bd9354 100644 (file)
@@ -757,7 +757,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
        int queue_sends;
        u64 cmd_rqst;
 
-       cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
+       cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
        if (cmd_rqst == VMBUS_RQST_ERROR) {
                netdev_err(ndev, "Incorrect transaction id\n");
                return;
@@ -817,8 +817,8 @@ static void netvsc_send_completion(struct net_device *ndev,
 
        /* First check if this is a VMBUS completion without data payload */
        if (!msglen) {
-               cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
-                                             (u64)desc->trans_id);
+               cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
+                                                                  (u64)desc->trans_id);
                if (cmd_rqst == VMBUS_RQST_ERROR) {
                        netdev_err(ndev, "Invalid transaction id\n");
                        return;
@@ -1649,7 +1649,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
                       netvsc_poll, NAPI_POLL_WEIGHT);
 
        /* Open the channel */
+       device->channel->next_request_id_callback = vmbus_next_request_id;
+       device->channel->request_addr_callback = vmbus_request_addr;
        device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+       device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
        ret = vmbus_open(device->channel, netvsc_ring_bytes,
                         netvsc_ring_bytes,  NULL, 0,
                         netvsc_channel_cb, net_device->chan_table);
index c0e89e1..983bf36 100644 (file)
@@ -1259,7 +1259,11 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
        /* Set the channel before opening.*/
        nvchan->channel = new_sc;
 
+       new_sc->next_request_id_callback = vmbus_next_request_id;
+       new_sc->request_addr_callback = vmbus_request_addr;
        new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+       new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
        ret = vmbus_open(new_sc, netvsc_ring_bytes,
                         netvsc_ring_bytes, NULL, 0,
                         netvsc_channel_cb, nvchan);
index 6511648..bebe3ee 100644 (file)
@@ -3476,6 +3476,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+       if (!hv_is_hyperv_initialized())
+               return -ENODEV;
+
        /* Set the invalid domain number's bit, so it will not be used */
        set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
 
index e6718a7..4037539 100644 (file)
@@ -406,6 +406,14 @@ static void storvsc_on_channel_callback(void *context);
 #define STORVSC_IDE_MAX_TARGETS                                1
 #define STORVSC_IDE_MAX_CHANNELS                       1
 
+/*
+ * Upper bound on the size of a storvsc packet. vmscsi_size_delta is not
+ * included in the calculation because it is set after STORVSC_MAX_PKT_SIZE
+ * is used in storvsc_connect_to_vsp
+ */
+#define STORVSC_MAX_PKT_SIZE (sizeof(struct vmpacket_descriptor) +\
+                             sizeof(struct vstor_packet))
+
 struct storvsc_cmd_request {
        struct scsi_cmnd *cmd;
 
@@ -688,6 +696,23 @@ old_is_alloced:
        spin_unlock_irqrestore(&stor_device->lock, flags);
 }
 
+static u64 storvsc_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
+{
+       struct storvsc_cmd_request *request =
+               (struct storvsc_cmd_request *)(unsigned long)rqst_addr;
+
+       if (rqst_addr == VMBUS_RQST_INIT)
+               return VMBUS_RQST_INIT;
+       if (rqst_addr == VMBUS_RQST_RESET)
+               return VMBUS_RQST_RESET;
+
+       /*
+        * Cannot return an ID of 0, which is reserved for an unsolicited
+        * message from Hyper-V.
+        */
+       return (u64)blk_mq_unique_tag(request->cmd->request) + 1;
+}
+
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
        struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -701,12 +726,9 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
                return;
 
        memset(&props, 0, sizeof(struct vmstorage_channel_properties));
+       new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 
-       /*
-        * The size of vmbus_requestor is an upper bound on the number of requests
-        * that can be in-progress at any one time across all channels.
-        */
-       new_sc->rqstor_size = scsi_driver.can_queue;
+       new_sc->next_request_id_callback = storvsc_next_request_id;
 
        ret = vmbus_open(new_sc,
                         storvsc_ringbuffer_size,
@@ -773,7 +795,7 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
        ret = vmbus_sendpacket(device->channel, vstor_packet,
                               (sizeof(struct vstor_packet) -
                               stor_device->vmscsi_size_delta),
-                              (unsigned long)request,
+                              VMBUS_RQST_INIT,
                               VM_PKT_DATA_INBAND,
                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
@@ -842,7 +864,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device,
        ret = vmbus_sendpacket(device->channel, vstor_packet,
                               (sizeof(struct vstor_packet) -
                               stor_device->vmscsi_size_delta),
-                              (unsigned long)request,
+                              VMBUS_RQST_INIT,
                               VM_PKT_DATA_INBAND,
                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
        if (ret != 0)
@@ -1244,6 +1266,7 @@ static void storvsc_on_channel_callback(void *context)
        const struct vmpacket_descriptor *desc;
        struct hv_device *device;
        struct storvsc_device *stor_device;
+       struct Scsi_Host *shost;
 
        if (channel->primary_channel != NULL)
                device = channel->primary_channel->device_obj;
@@ -1254,20 +1277,12 @@ static void storvsc_on_channel_callback(void *context)
        if (!stor_device)
                return;
 
-       foreach_vmbus_pkt(desc, channel) {
-               void *packet = hv_pkt_data(desc);
-               struct storvsc_cmd_request *request;
-               u64 cmd_rqst;
-
-               cmd_rqst = vmbus_request_addr(&channel->requestor,
-                                             desc->trans_id);
-               if (cmd_rqst == VMBUS_RQST_ERROR) {
-                       dev_err(&device->device,
-                               "Incorrect transaction id\n");
-                       continue;
-               }
+       shost = stor_device->host;
 
-               request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst;
+       foreach_vmbus_pkt(desc, channel) {
+               struct vstor_packet *packet = hv_pkt_data(desc);
+               struct storvsc_cmd_request *request = NULL;
+               u64 rqst_id = desc->trans_id;
 
                if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) -
                                stor_device->vmscsi_size_delta) {
@@ -1275,14 +1290,44 @@ static void storvsc_on_channel_callback(void *context)
                        continue;
                }
 
-               if (request == &stor_device->init_request ||
-                   request == &stor_device->reset_request) {
-                       memcpy(&request->vstor_packet, packet,
-                              (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
-                       complete(&request->wait_event);
+               if (rqst_id == VMBUS_RQST_INIT) {
+                       request = &stor_device->init_request;
+               } else if (rqst_id == VMBUS_RQST_RESET) {
+                       request = &stor_device->reset_request;
                } else {
+                       /* Hyper-V can send an unsolicited message with ID of 0 */
+                       if (rqst_id == 0) {
+                               /*
+                                * storvsc_on_receive() looks at the vstor_packet in the message
+                                * from the ring buffer.  If the operation in the vstor_packet is
+                                * COMPLETE_IO, then we call storvsc_on_io_completion(), and
+                                * dereference the guest memory address.  Make sure we don't call
+                                * storvsc_on_io_completion() with a guest memory address that is
+                                * zero if Hyper-V were to construct and send such a bogus packet.
+                                */
+                               if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) {
+                                       dev_err(&device->device, "Invalid packet with ID of 0\n");
+                                       continue;
+                               }
+                       } else {
+                               struct scsi_cmnd *scmnd;
+
+                               /* Transaction 'rqst_id' corresponds to tag 'rqst_id - 1' */
+                               scmnd = scsi_host_find_tag(shost, rqst_id - 1);
+                               if (scmnd == NULL) {
+                                       dev_err(&device->device, "Incorrect transaction ID\n");
+                                       continue;
+                               }
+                               request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
+                       }
+
                        storvsc_on_receive(stor_device, packet, request);
+                       continue;
                }
+
+               memcpy(&request->vstor_packet, packet,
+                      (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
+               complete(&request->wait_event);
        }
 }
 
@@ -1294,11 +1339,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 
        memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
-       /*
-        * The size of vmbus_requestor is an upper bound on the number of requests
-        * that can be in-progress at any one time across all channels.
-        */
-       device->channel->rqstor_size = scsi_driver.can_queue;
+       device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE;
+       device->channel->next_request_id_callback = storvsc_next_request_id;
 
        ret = vmbus_open(device->channel,
                         ring_size,
@@ -1624,7 +1666,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
        ret = vmbus_sendpacket(device->channel, vstor_packet,
                               (sizeof(struct vstor_packet) -
                                stor_device->vmscsi_size_delta),
-                              (unsigned long)&stor_device->reset_request,
+                              VMBUS_RQST_RESET,
                               VM_PKT_DATA_INBAND,
                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
        if (ret != 0)
index d1e59db..2e859d2 100644 (file)
@@ -181,6 +181,10 @@ struct hv_ring_buffer_info {
         * being freed while the ring buffer is being accessed.
         */
        struct mutex ring_buffer_mutex;
+
+       /* Buffer that holds a copy of an incoming host packet */
+       void *pkt_buffer;
+       u32 pkt_buffer_size;
 };
 
 
@@ -790,7 +794,11 @@ struct vmbus_requestor {
 
 #define VMBUS_NO_RQSTOR U64_MAX
 #define VMBUS_RQST_ERROR (U64_MAX - 1)
+/* NetVSC-specific */
 #define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2)
+/* StorVSC-specific */
+#define VMBUS_RQST_INIT (U64_MAX - 2)
+#define VMBUS_RQST_RESET (U64_MAX - 3)
 
 struct vmbus_device {
        u16  dev_type;
@@ -799,6 +807,8 @@ struct vmbus_device {
        bool allowed_in_isolated;
 };
 
+#define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
+
 struct vmbus_channel {
        struct list_head listentry;
 
@@ -1018,13 +1028,21 @@ struct vmbus_channel {
        u32 fuzz_testing_interrupt_delay;
        u32 fuzz_testing_message_delay;
 
+       /* callback to generate a request ID from a request address */
+       u64 (*next_request_id_callback)(struct vmbus_channel *channel, u64 rqst_addr);
+       /* callback to retrieve a request address from a request ID */
+       u64 (*request_addr_callback)(struct vmbus_channel *channel, u64 rqst_id);
+
        /* request/transaction ids for VMBus */
        struct vmbus_requestor requestor;
        u32 rqstor_size;
+
+       /* The max size of a packet on this channel */
+       u32 max_pkt_size;
 };
 
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id);
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr);
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id);
 
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 {
@@ -1663,31 +1681,54 @@ static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
 
 
 struct vmpacket_descriptor *
+hv_pkt_iter_first_raw(struct vmbus_channel *channel);
+
+struct vmpacket_descriptor *
 hv_pkt_iter_first(struct vmbus_channel *channel);
 
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-                  const struct vmpacket_descriptor *pkt);
+                  const struct vmpacket_descriptor *pkt,
+                  bool copy);
 
 void hv_pkt_iter_close(struct vmbus_channel *channel);
 
-/*
- * Get next packet descriptor from iterator
- * If at end of list, return NULL and update host.
- */
 static inline struct vmpacket_descriptor *
-hv_pkt_iter_next(struct vmbus_channel *channel,
-                const struct vmpacket_descriptor *pkt)
+hv_pkt_iter_next_pkt(struct vmbus_channel *channel,
+                    const struct vmpacket_descriptor *pkt,
+                    bool copy)
 {
        struct vmpacket_descriptor *nxt;
 
-       nxt = __hv_pkt_iter_next(channel, pkt);
+       nxt = __hv_pkt_iter_next(channel, pkt, copy);
        if (!nxt)
                hv_pkt_iter_close(channel);
 
        return nxt;
 }
 
+/*
+ * Get next packet descriptor without copying it out of the ring buffer
+ * If at end of list, return NULL and update host.
+ */
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next_raw(struct vmbus_channel *channel,
+                    const struct vmpacket_descriptor *pkt)
+{
+       return hv_pkt_iter_next_pkt(channel, pkt, false);
+}
+
+/*
+ * Get next packet descriptor from iterator
+ * If at end of list, return NULL and update host.
+ */
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next(struct vmbus_channel *channel,
+                const struct vmpacket_descriptor *pkt)
+{
+       return hv_pkt_iter_next_pkt(channel, pkt, true);
+}
+
 #define foreach_vmbus_pkt(pkt, channel) \
        for (pkt = hv_pkt_iter_first(channel); pkt; \
            pkt = hv_pkt_iter_next(channel, pkt))
index cc3bae2..19189cf 100644 (file)
@@ -596,7 +596,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
                return -EOPNOTSUPP;
 
        if (need_refill) {
-               hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+               hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
                ret = hvs_update_recv_data(hvs);
                if (ret)
                        return ret;
@@ -610,7 +610,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 
        hvs->recv_data_len -= to_read;
        if (hvs->recv_data_len == 0) {
-               hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+               hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc);
                if (hvs->recv_desc) {
                        ret = hvs_update_recv_data(hvs);
                        if (ret)