Drivers: hv: Introduce a policy for controlling channel affinity

author K. Y. Srinivasan <kys@exchange.microsoft.com>

Fri, 2 Sep 2016 12:58:23 +0000 (05:58 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 2 Sep 2016 15:22:51 +0000 (17:22 +0200)
author K. Y. Srinivasan <kys@exchange.microsoft.com>
Fri, 2 Sep 2016 12:58:23 +0000 (05:58 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 2 Sep 2016 15:22:51 +0000 (17:22 +0200)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c

index d8b64ba..bbd812e 100644 (file)
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -356,8 +356,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
          * We need to free the bit for init_vp_index() to work in the case
          * of sub-channel, when we reload drivers like hv_netvsc.
          */
-       cpumask_clear_cpu(channel->target_cpu,
-                         &primary_channel->alloced_cpus_in_node);
+       if (channel->affinity_policy == HV_LOCALIZED)
+               cpumask_clear_cpu(channel->target_cpu,
+                                 &primary_channel->alloced_cpus_in_node);
  
         vmbus_release_relid(relid);
  
@@ -548,17 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
         }
  
         /*
-        * We distribute primary channels evenly across all the available
-        * NUMA nodes and within the assigned NUMA node we will assign the
-        * first available CPU to the primary channel.
-        * The sub-channels will be assigned to the CPUs available in the
-        * NUMA node evenly.
+        * Based on the channel affinity policy, we will assign the NUMA
+        * nodes.
          */
-       if (!primary) {
+
+       if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
                 while (true) {
                         next_node = next_numa_node_id++;
-                       if (next_node == nr_node_ids)
+                       if (next_node == nr_node_ids) {
                                 next_node = next_numa_node_id = 0;
+                               continue;
+                       }
                         if (cpumask_empty(cpumask_of_node(next_node)))
                                 continue;
                         break;
@@ -582,15 +583,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
  
         cur_cpu = -1;
  
-       /*
-        * Normally Hyper-V host doesn't create more subchannels than there
-        * are VCPUs on the node but it is possible when not all present VCPUs
-        * on the node are initialized by guest. Clear the alloced_cpus_in_node
-        * to start over.
-        */
-       if (cpumask_equal(&primary->alloced_cpus_in_node,
-                         cpumask_of_node(primary->numa_node)))
-               cpumask_clear(&primary->alloced_cpus_in_node);
+       if (primary->affinity_policy == HV_LOCALIZED) {
+               /*
+                * Normally Hyper-V host doesn't create more subchannels
+                * than there are VCPUs on the node but it is possible when not
+                * all present VCPUs on the node are initialized by guest.
+                * Clear the alloced_cpus_in_node to start over.
+                */
+               if (cpumask_equal(&primary->alloced_cpus_in_node,
+                                 cpumask_of_node(primary->numa_node)))
+                       cpumask_clear(&primary->alloced_cpus_in_node);
+       }
  
         while (true) {
                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -601,17 +604,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
                         continue;
                 }
  
-               /*
-                * NOTE: in the case of sub-channel, we clear the sub-channel
-                * related bit(s) in primary->alloced_cpus_in_node in
-                * hv_process_channel_removal(), so when we reload drivers
-                * like hv_netvsc in SMP guest, here we're able to re-allocate
-                * bit from primary->alloced_cpus_in_node.
-                */
-               if (!cpumask_test_cpu(cur_cpu,
-                               &primary->alloced_cpus_in_node)) {
-                       cpumask_set_cpu(cur_cpu,
-                                       &primary->alloced_cpus_in_node);
+               if (primary->affinity_policy == HV_LOCALIZED) {
+                       /*
+                        * NOTE: in the case of sub-channel, we clear the
+                        * sub-channel related bit(s) in
+                        * primary->alloced_cpus_in_node in
+                        * hv_process_channel_removal(), so when we
+                        * reload drivers like hv_netvsc in SMP guest, here
+                        * we're able to re-allocate
+                        * bit from primary->alloced_cpus_in_node.
+                        */
+                       if (!cpumask_test_cpu(cur_cpu,
+                                             &primary->alloced_cpus_in_node)) {
+                               cpumask_set_cpu(cur_cpu,
+                                               &primary->alloced_cpus_in_node);
+                               cpumask_set_cpu(cur_cpu, alloced_mask);
+                               break;
+                       }
+               } else {
                         cpumask_set_cpu(cur_cpu, alloced_mask);
                         break;
                 }
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h

index e6ef571..c877e79 100644 (file)
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@ enum hv_signal_policy {
         HV_SIGNAL_POLICY_EXPLICIT,
  };
  
+enum hv_numa_policy {
+       HV_BALANCED = 0,
+       HV_LOCALIZED,
+};
+
  enum vmbus_device_type {
         HV_IDE = 0,
         HV_SCSI,
@@ -876,6 +881,18 @@ struct vmbus_channel {
          */
         bool low_latency;
  
+       /*
+        * NUMA distribution policy:
+        * We support teo policies:
+        * 1) Balanced: Here all performance critical channels are
+        *    distributed evenly amongst all the NUMA nodes.
+        *    This policy will be the default policy.
+        * 2) Localized: All channels of a given instance of a
+        *    performance critical service will be assigned CPUs
+        *    within a selected NUMA node.
+        */
+       enum hv_numa_policy affinity_policy;
+
  };
  
  static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
@@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
         c->signal_policy = policy;
  }
  
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+                                             enum hv_numa_policy policy)
+{
+       c->affinity_policy = policy;
+}
+
  static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
  {
         c->batched_reading = state;
author	K. Y. Srinivasan <kys@exchange.microsoft.com>
	Fri, 2 Sep 2016 12:58:23 +0000 (05:58 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 2 Sep 2016 15:22:51 +0000 (17:22 +0200)
drivers/hv/channel_mgmt.c		patch \| blob \| history
include/linux/hyperv.h		patch \| blob \| history