* Max I/O, the device can support.
*/
u32 max_transfer_bytes;
+ /*
+ * Number of sub-channels we will open.
+ */
+ u16 num_sc;
+ struct vmbus_channel **stor_chns;
+ /*
+ * Mask of CPUs bound to subchannels.
+ */
+ struct cpumask alloced_cpus;
/* Used for vsc/vsp channel reset process */
struct storvsc_cmd_request init_request;
struct storvsc_cmd_request reset_request;
(void *)&props,
sizeof(struct vmstorage_channel_properties),
storvsc_on_channel_callback, new_sc);
+
+ if (new_sc->state == CHANNEL_OPENED_STATE) {
+ stor_device->stor_chns[new_sc->target_cpu] = new_sc;
+ cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
+ }
}
static void handle_multichannel_storage(struct hv_device *device, int max_chns)
if (!stor_device)
return;
+ stor_device->num_sc = num_sc;
request = &stor_device->init_request;
vstor_packet = &request->vstor_packet;
* support multi-channel.
*/
max_chns = vstor_packet->storage_channel_properties.max_channel_cnt;
+
+ /*
+ * Allocate state to manage the sub-channels.
+ * We allocate an array based on the numbers of possible CPUs
+ * (Hyper-V does not support cpu online/offline).
+ * This Array will be sparseley populated with unique
+ * channels - primary + sub-channels.
+ * We will however populate all the slots to evenly distribute
+ * the load.
+ */
+ stor_device->stor_chns = kzalloc(sizeof(void *) * num_possible_cpus(),
+ GFP_KERNEL);
+ if (stor_device->stor_chns == NULL)
+ return -ENOMEM;
+
+ stor_device->stor_chns[device->channel->target_cpu] = device->channel;
+ cpumask_set_cpu(device->channel->target_cpu,
+ &stor_device->alloced_cpus);
+
if (vmstor_proto_version >= VMSTOR_PROTO_VERSION_WIN8) {
if (vstor_packet->storage_channel_properties.flags &
STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL)
/* Close the channel */
vmbus_close(device->channel);
+ kfree(stor_device->stor_chns);
kfree(stor_device);
return 0;
}
+static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
+ u16 q_num)
+{
+ u16 slot = 0;
+ u16 hash_qnum;
+ struct cpumask alloced_mask;
+ int num_channels, tgt_cpu;
+
+ if (stor_device->num_sc == 0)
+ return stor_device->device->channel;
+
+ /*
+ * Our channel array is sparsley populated and we
+ * initiated I/O on a processor/hw-q that does not
+ * currently have a designated channel. Fix this.
+ * The strategy is simple:
+ * I. Ensure NUMA locality
+ * II. Distribute evenly (best effort)
+ * III. Mapping is persistent.
+ */
+
+ cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+ cpumask_of_node(cpu_to_node(q_num)));
+
+ num_channels = cpumask_weight(&alloced_mask);
+ if (num_channels == 0)
+ return stor_device->device->channel;
+
+ hash_qnum = q_num;
+ while (hash_qnum >= num_channels)
+ hash_qnum -= num_channels;
+
+ for_each_cpu(tgt_cpu, &alloced_mask) {
+ if (slot == hash_qnum)
+ break;
+ slot++;
+ }
+
+ stor_device->stor_chns[q_num] = stor_device->stor_chns[tgt_cpu];
+
+ return stor_device->stor_chns[q_num];
+}
+
+
static int storvsc_do_io(struct hv_device *device,
- struct storvsc_cmd_request *request)
+ struct storvsc_cmd_request *request, u16 q_num)
{
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
struct vmbus_channel *outgoing_channel;
int ret = 0;
+ struct cpumask alloced_mask;
+ int tgt_cpu;
vstor_packet = &request->vstor_packet;
stor_device = get_out_stor_device(device);
* Select an an appropriate channel to send the request out.
*/
- outgoing_channel = vmbus_get_outgoing_channel(device->channel);
+ if (stor_device->stor_chns[q_num] != NULL) {
+ outgoing_channel = stor_device->stor_chns[q_num];
+ if (outgoing_channel->target_cpu == smp_processor_id()) {
+ /*
+ * Ideally, we want to pick a different channel if
+ * available on the same NUMA node.
+ */
+ cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+ cpumask_of_node(cpu_to_node(q_num)));
+ for_each_cpu(tgt_cpu, &alloced_mask) {
+ if (tgt_cpu != outgoing_channel->target_cpu) {
+ outgoing_channel =
+ stor_device->stor_chns[tgt_cpu];
+ break;
+ }
+ }
+ }
+ } else {
+ outgoing_channel = get_og_chn(stor_device, q_num);
+ }
vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
cmd_request->payload_sz = payload_sz;
/* Invokes the vsc to start an IO */
- ret = storvsc_do_io(dev, cmd_request);
+ ret = storvsc_do_io(dev, cmd_request, get_cpu());
+ put_cpu();
if (ret == -EAGAIN) {
/* no more space */
* from the host.
*/
host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
+ /*
+ * Set the number of HW queues we are supporting.
+ */
+ if (stor_device->num_sc != 0)
+ host->nr_hw_queues = stor_device->num_sc + 1;
/* Register the HBA and start the scsi bus scan */
ret = scsi_add_host(host, &device->device);
goto err_out0;
err_out1:
+ kfree(stor_device->stor_chns);
kfree(stor_device);
err_out0: