hv_netvsc: Wait for completion on request SWITCH_DATA_PATH
[platform/kernel/linux-starfive.git] / drivers / net / hyperv / netvsc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * Authors:
6  *   Haiyang Zhang <haiyangz@microsoft.com>
7  *   Hank Janssen  <hjanssen@microsoft.com>
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/wait.h>
14 #include <linux/mm.h>
15 #include <linux/delay.h>
16 #include <linux/io.h>
17 #include <linux/slab.h>
18 #include <linux/netdevice.h>
19 #include <linux/if_ether.h>
20 #include <linux/vmalloc.h>
21 #include <linux/rtnetlink.h>
22 #include <linux/prefetch.h>
23
24 #include <asm/sync_bitops.h>
25
26 #include "hyperv_net.h"
27 #include "netvsc_trace.h"
28
29 /*
30  * Switch the data path from the synthetic interface to the VF
31  * interface.
32  */
33 void netvsc_switch_datapath(struct net_device *ndev, bool vf)
34 {
35         struct net_device_context *net_device_ctx = netdev_priv(ndev);
36         struct hv_device *dev = net_device_ctx->device_ctx;
37         struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
38         struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
39
40         /* Block sending traffic to VF if it's about to be gone */
41         if (!vf)
42                 net_device_ctx->data_path_is_vf = vf;
43
44         memset(init_pkt, 0, sizeof(struct nvsp_message));
45         init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
46         if (vf)
47                 init_pkt->msg.v4_msg.active_dp.active_datapath =
48                         NVSP_DATAPATH_VF;
49         else
50                 init_pkt->msg.v4_msg.active_dp.active_datapath =
51                         NVSP_DATAPATH_SYNTHETIC;
52
53         trace_nvsp_send(ndev, init_pkt);
54
55         vmbus_sendpacket(dev->channel, init_pkt,
56                                sizeof(struct nvsp_message),
57                                (unsigned long)init_pkt,
58                                VM_PKT_DATA_INBAND,
59                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
60         wait_for_completion(&nv_dev->channel_init_wait);
61         net_device_ctx->data_path_is_vf = vf;
62 }
63
64 /* Worker to setup sub channels on initial setup
65  * Initial hotplug event occurs in softirq context
66  * and can't wait for channels.
67  */
68 static void netvsc_subchan_work(struct work_struct *w)
69 {
70         struct netvsc_device *nvdev =
71                 container_of(w, struct netvsc_device, subchan_work);
72         struct rndis_device *rdev;
73         int i, ret;
74
75         /* Avoid deadlock with device removal already under RTNL */
76         if (!rtnl_trylock()) {
77                 schedule_work(w);
78                 return;
79         }
80
81         rdev = nvdev->extension;
82         if (rdev) {
83                 ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL);
84                 if (ret == 0) {
85                         netif_device_attach(rdev->ndev);
86                 } else {
87                         /* fallback to only primary channel */
88                         for (i = 1; i < nvdev->num_chn; i++)
89                                 netif_napi_del(&nvdev->chan_table[i].napi);
90
91                         nvdev->max_chn = 1;
92                         nvdev->num_chn = 1;
93                 }
94         }
95
96         rtnl_unlock();
97 }
98
99 static struct netvsc_device *alloc_net_device(void)
100 {
101         struct netvsc_device *net_device;
102
103         net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
104         if (!net_device)
105                 return NULL;
106
107         init_waitqueue_head(&net_device->wait_drain);
108         net_device->destroy = false;
109         net_device->tx_disable = true;
110
111         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
112         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
113
114         init_completion(&net_device->channel_init_wait);
115         init_waitqueue_head(&net_device->subchan_open);
116         INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
117
118         return net_device;
119 }
120
121 static void free_netvsc_device(struct rcu_head *head)
122 {
123         struct netvsc_device *nvdev
124                 = container_of(head, struct netvsc_device, rcu);
125         int i;
126
127         kfree(nvdev->extension);
128         vfree(nvdev->recv_buf);
129         vfree(nvdev->send_buf);
130         kfree(nvdev->send_section_map);
131
132         for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
133                 xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
134                 vfree(nvdev->chan_table[i].mrc.slots);
135         }
136
137         kfree(nvdev);
138 }
139
140 static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
141 {
142         call_rcu(&nvdev->rcu, free_netvsc_device);
143 }
144
145 static void netvsc_revoke_recv_buf(struct hv_device *device,
146                                    struct netvsc_device *net_device,
147                                    struct net_device *ndev)
148 {
149         struct nvsp_message *revoke_packet;
150         int ret;
151
152         /*
153          * If we got a section count, it means we received a
154          * SendReceiveBufferComplete msg (ie sent
155          * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
156          * to send a revoke msg here
157          */
158         if (net_device->recv_section_cnt) {
159                 /* Send the revoke receive buffer */
160                 revoke_packet = &net_device->revoke_packet;
161                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
162
163                 revoke_packet->hdr.msg_type =
164                         NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
165                 revoke_packet->msg.v1_msg.
166                 revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
167
168                 trace_nvsp_send(ndev, revoke_packet);
169
170                 ret = vmbus_sendpacket(device->channel,
171                                        revoke_packet,
172                                        sizeof(struct nvsp_message),
173                                        VMBUS_RQST_ID_NO_RESPONSE,
174                                        VM_PKT_DATA_INBAND, 0);
175                 /* If the failure is because the channel is rescinded;
176                  * ignore the failure since we cannot send on a rescinded
177                  * channel. This would allow us to properly cleanup
178                  * even when the channel is rescinded.
179                  */
180                 if (device->channel->rescind)
181                         ret = 0;
182                 /*
183                  * If we failed here, we might as well return and
184                  * have a leak rather than continue and a bugchk
185                  */
186                 if (ret != 0) {
187                         netdev_err(ndev, "unable to send "
188                                 "revoke receive buffer to netvsp\n");
189                         return;
190                 }
191                 net_device->recv_section_cnt = 0;
192         }
193 }
194
195 static void netvsc_revoke_send_buf(struct hv_device *device,
196                                    struct netvsc_device *net_device,
197                                    struct net_device *ndev)
198 {
199         struct nvsp_message *revoke_packet;
200         int ret;
201
202         /* Deal with the send buffer we may have setup.
203          * If we got a  send section size, it means we received a
204          * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
205          * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
206          * to send a revoke msg here
207          */
208         if (net_device->send_section_cnt) {
209                 /* Send the revoke receive buffer */
210                 revoke_packet = &net_device->revoke_packet;
211                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
212
213                 revoke_packet->hdr.msg_type =
214                         NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
215                 revoke_packet->msg.v1_msg.revoke_send_buf.id =
216                         NETVSC_SEND_BUFFER_ID;
217
218                 trace_nvsp_send(ndev, revoke_packet);
219
220                 ret = vmbus_sendpacket(device->channel,
221                                        revoke_packet,
222                                        sizeof(struct nvsp_message),
223                                        VMBUS_RQST_ID_NO_RESPONSE,
224                                        VM_PKT_DATA_INBAND, 0);
225
226                 /* If the failure is because the channel is rescinded;
227                  * ignore the failure since we cannot send on a rescinded
228                  * channel. This would allow us to properly cleanup
229                  * even when the channel is rescinded.
230                  */
231                 if (device->channel->rescind)
232                         ret = 0;
233
234                 /* If we failed here, we might as well return and
235                  * have a leak rather than continue and a bugchk
236                  */
237                 if (ret != 0) {
238                         netdev_err(ndev, "unable to send "
239                                    "revoke send buffer to netvsp\n");
240                         return;
241                 }
242                 net_device->send_section_cnt = 0;
243         }
244 }
245
246 static void netvsc_teardown_recv_gpadl(struct hv_device *device,
247                                        struct netvsc_device *net_device,
248                                        struct net_device *ndev)
249 {
250         int ret;
251
252         if (net_device->recv_buf_gpadl_handle) {
253                 ret = vmbus_teardown_gpadl(device->channel,
254                                            net_device->recv_buf_gpadl_handle);
255
256                 /* If we failed here, we might as well return and have a leak
257                  * rather than continue and a bugchk
258                  */
259                 if (ret != 0) {
260                         netdev_err(ndev,
261                                    "unable to teardown receive buffer's gpadl\n");
262                         return;
263                 }
264                 net_device->recv_buf_gpadl_handle = 0;
265         }
266 }
267
268 static void netvsc_teardown_send_gpadl(struct hv_device *device,
269                                        struct netvsc_device *net_device,
270                                        struct net_device *ndev)
271 {
272         int ret;
273
274         if (net_device->send_buf_gpadl_handle) {
275                 ret = vmbus_teardown_gpadl(device->channel,
276                                            net_device->send_buf_gpadl_handle);
277
278                 /* If we failed here, we might as well return and have a leak
279                  * rather than continue and a bugchk
280                  */
281                 if (ret != 0) {
282                         netdev_err(ndev,
283                                    "unable to teardown send buffer's gpadl\n");
284                         return;
285                 }
286                 net_device->send_buf_gpadl_handle = 0;
287         }
288 }
289
290 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
291 {
292         struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
293         int node = cpu_to_node(nvchan->channel->target_cpu);
294         size_t size;
295
296         size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
297         nvchan->mrc.slots = vzalloc_node(size, node);
298         if (!nvchan->mrc.slots)
299                 nvchan->mrc.slots = vzalloc(size);
300
301         return nvchan->mrc.slots ? 0 : -ENOMEM;
302 }
303
304 static int netvsc_init_buf(struct hv_device *device,
305                            struct netvsc_device *net_device,
306                            const struct netvsc_device_info *device_info)
307 {
308         struct nvsp_1_message_send_receive_buffer_complete *resp;
309         struct net_device *ndev = hv_get_drvdata(device);
310         struct nvsp_message *init_packet;
311         unsigned int buf_size;
312         size_t map_words;
313         int ret = 0;
314
315         /* Get receive buffer area. */
316         buf_size = device_info->recv_sections * device_info->recv_section_size;
317         buf_size = roundup(buf_size, PAGE_SIZE);
318
319         /* Legacy hosts only allow smaller receive buffer */
320         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
321                 buf_size = min_t(unsigned int, buf_size,
322                                  NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
323
324         net_device->recv_buf = vzalloc(buf_size);
325         if (!net_device->recv_buf) {
326                 netdev_err(ndev,
327                            "unable to allocate receive buffer of size %u\n",
328                            buf_size);
329                 ret = -ENOMEM;
330                 goto cleanup;
331         }
332
333         net_device->recv_buf_size = buf_size;
334
335         /*
336          * Establish the gpadl handle for this buffer on this
337          * channel.  Note: This call uses the vmbus connection rather
338          * than the channel to establish the gpadl handle.
339          */
340         ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
341                                     buf_size,
342                                     &net_device->recv_buf_gpadl_handle);
343         if (ret != 0) {
344                 netdev_err(ndev,
345                         "unable to establish receive buffer's gpadl\n");
346                 goto cleanup;
347         }
348
349         /* Notify the NetVsp of the gpadl handle */
350         init_packet = &net_device->channel_init_pkt;
351         memset(init_packet, 0, sizeof(struct nvsp_message));
352         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
353         init_packet->msg.v1_msg.send_recv_buf.
354                 gpadl_handle = net_device->recv_buf_gpadl_handle;
355         init_packet->msg.v1_msg.
356                 send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
357
358         trace_nvsp_send(ndev, init_packet);
359
360         /* Send the gpadl notification request */
361         ret = vmbus_sendpacket(device->channel, init_packet,
362                                sizeof(struct nvsp_message),
363                                (unsigned long)init_packet,
364                                VM_PKT_DATA_INBAND,
365                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
366         if (ret != 0) {
367                 netdev_err(ndev,
368                         "unable to send receive buffer's gpadl to netvsp\n");
369                 goto cleanup;
370         }
371
372         wait_for_completion(&net_device->channel_init_wait);
373
374         /* Check the response */
375         resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
376         if (resp->status != NVSP_STAT_SUCCESS) {
377                 netdev_err(ndev,
378                            "Unable to complete receive buffer initialization with NetVsp - status %d\n",
379                            resp->status);
380                 ret = -EINVAL;
381                 goto cleanup;
382         }
383
384         /* Parse the response */
385         netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
386                    resp->num_sections, resp->sections[0].sub_alloc_size,
387                    resp->sections[0].num_sub_allocs);
388
389         /* There should only be one section for the entire receive buffer */
390         if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
391                 ret = -EINVAL;
392                 goto cleanup;
393         }
394
395         net_device->recv_section_size = resp->sections[0].sub_alloc_size;
396         net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
397
398         /* Ensure buffer will not overflow */
399         if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
400             (u64)net_device->recv_section_cnt > (u64)buf_size) {
401                 netdev_err(ndev, "invalid recv_section_size %u\n",
402                            net_device->recv_section_size);
403                 ret = -EINVAL;
404                 goto cleanup;
405         }
406
407         /* Setup receive completion ring.
408          * Add 1 to the recv_section_cnt because at least one entry in a
409          * ring buffer has to be empty.
410          */
411         net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
412         ret = netvsc_alloc_recv_comp_ring(net_device, 0);
413         if (ret)
414                 goto cleanup;
415
416         /* Now setup the send buffer. */
417         buf_size = device_info->send_sections * device_info->send_section_size;
418         buf_size = round_up(buf_size, PAGE_SIZE);
419
420         net_device->send_buf = vzalloc(buf_size);
421         if (!net_device->send_buf) {
422                 netdev_err(ndev, "unable to allocate send buffer of size %u\n",
423                            buf_size);
424                 ret = -ENOMEM;
425                 goto cleanup;
426         }
427
428         /* Establish the gpadl handle for this buffer on this
429          * channel.  Note: This call uses the vmbus connection rather
430          * than the channel to establish the gpadl handle.
431          */
432         ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
433                                     buf_size,
434                                     &net_device->send_buf_gpadl_handle);
435         if (ret != 0) {
436                 netdev_err(ndev,
437                            "unable to establish send buffer's gpadl\n");
438                 goto cleanup;
439         }
440
441         /* Notify the NetVsp of the gpadl handle */
442         init_packet = &net_device->channel_init_pkt;
443         memset(init_packet, 0, sizeof(struct nvsp_message));
444         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
445         init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
446                 net_device->send_buf_gpadl_handle;
447         init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
448
449         trace_nvsp_send(ndev, init_packet);
450
451         /* Send the gpadl notification request */
452         ret = vmbus_sendpacket(device->channel, init_packet,
453                                sizeof(struct nvsp_message),
454                                (unsigned long)init_packet,
455                                VM_PKT_DATA_INBAND,
456                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
457         if (ret != 0) {
458                 netdev_err(ndev,
459                            "unable to send send buffer's gpadl to netvsp\n");
460                 goto cleanup;
461         }
462
463         wait_for_completion(&net_device->channel_init_wait);
464
465         /* Check the response */
466         if (init_packet->msg.v1_msg.
467             send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
468                 netdev_err(ndev, "Unable to complete send buffer "
469                            "initialization with NetVsp - status %d\n",
470                            init_packet->msg.v1_msg.
471                            send_send_buf_complete.status);
472                 ret = -EINVAL;
473                 goto cleanup;
474         }
475
476         /* Parse the response */
477         net_device->send_section_size = init_packet->msg.
478                                 v1_msg.send_send_buf_complete.section_size;
479         if (net_device->send_section_size < NETVSC_MTU_MIN) {
480                 netdev_err(ndev, "invalid send_section_size %u\n",
481                            net_device->send_section_size);
482                 ret = -EINVAL;
483                 goto cleanup;
484         }
485
486         /* Section count is simply the size divided by the section size. */
487         net_device->send_section_cnt = buf_size / net_device->send_section_size;
488
489         netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
490                    net_device->send_section_size, net_device->send_section_cnt);
491
492         /* Setup state for managing the send buffer. */
493         map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
494
495         net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
496         if (net_device->send_section_map == NULL) {
497                 ret = -ENOMEM;
498                 goto cleanup;
499         }
500
501         goto exit;
502
503 cleanup:
504         netvsc_revoke_recv_buf(device, net_device, ndev);
505         netvsc_revoke_send_buf(device, net_device, ndev);
506         netvsc_teardown_recv_gpadl(device, net_device, ndev);
507         netvsc_teardown_send_gpadl(device, net_device, ndev);
508
509 exit:
510         return ret;
511 }
512
513 /* Negotiate NVSP protocol version */
514 static int negotiate_nvsp_ver(struct hv_device *device,
515                               struct netvsc_device *net_device,
516                               struct nvsp_message *init_packet,
517                               u32 nvsp_ver)
518 {
519         struct net_device *ndev = hv_get_drvdata(device);
520         int ret;
521
522         memset(init_packet, 0, sizeof(struct nvsp_message));
523         init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
524         init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
525         init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
526         trace_nvsp_send(ndev, init_packet);
527
528         /* Send the init request */
529         ret = vmbus_sendpacket(device->channel, init_packet,
530                                sizeof(struct nvsp_message),
531                                (unsigned long)init_packet,
532                                VM_PKT_DATA_INBAND,
533                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
534
535         if (ret != 0)
536                 return ret;
537
538         wait_for_completion(&net_device->channel_init_wait);
539
540         if (init_packet->msg.init_msg.init_complete.status !=
541             NVSP_STAT_SUCCESS)
542                 return -EINVAL;
543
544         if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
545                 return 0;
546
547         /* NVSPv2 or later: Send NDIS config */
548         memset(init_packet, 0, sizeof(struct nvsp_message));
549         init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
550         init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
551         init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
552
553         if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
554                 init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
555
556                 /* Teaming bit is needed to receive link speed updates */
557                 init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
558         }
559
560         if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
561                 init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
562
563         trace_nvsp_send(ndev, init_packet);
564
565         ret = vmbus_sendpacket(device->channel, init_packet,
566                                 sizeof(struct nvsp_message),
567                                 VMBUS_RQST_ID_NO_RESPONSE,
568                                 VM_PKT_DATA_INBAND, 0);
569
570         return ret;
571 }
572
573 static int netvsc_connect_vsp(struct hv_device *device,
574                               struct netvsc_device *net_device,
575                               const struct netvsc_device_info *device_info)
576 {
577         struct net_device *ndev = hv_get_drvdata(device);
578         static const u32 ver_list[] = {
579                 NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
580                 NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
581                 NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
582         };
583         struct nvsp_message *init_packet;
584         int ndis_version, i, ret;
585
586         init_packet = &net_device->channel_init_pkt;
587
588         /* Negotiate the latest NVSP protocol supported */
589         for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
590                 if (negotiate_nvsp_ver(device, net_device, init_packet,
591                                        ver_list[i])  == 0) {
592                         net_device->nvsp_version = ver_list[i];
593                         break;
594                 }
595
596         if (i < 0) {
597                 ret = -EPROTO;
598                 goto cleanup;
599         }
600
601         pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
602
603         /* Send the ndis version */
604         memset(init_packet, 0, sizeof(struct nvsp_message));
605
606         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
607                 ndis_version = 0x00060001;
608         else
609                 ndis_version = 0x0006001e;
610
611         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
612         init_packet->msg.v1_msg.
613                 send_ndis_ver.ndis_major_ver =
614                                 (ndis_version & 0xFFFF0000) >> 16;
615         init_packet->msg.v1_msg.
616                 send_ndis_ver.ndis_minor_ver =
617                                 ndis_version & 0xFFFF;
618
619         trace_nvsp_send(ndev, init_packet);
620
621         /* Send the init request */
622         ret = vmbus_sendpacket(device->channel, init_packet,
623                                 sizeof(struct nvsp_message),
624                                 VMBUS_RQST_ID_NO_RESPONSE,
625                                 VM_PKT_DATA_INBAND, 0);
626         if (ret != 0)
627                 goto cleanup;
628
629
630         ret = netvsc_init_buf(device, net_device, device_info);
631
632 cleanup:
633         return ret;
634 }
635
636 /*
637  * netvsc_device_remove - Callback when the root bus device is removed
638  */
639 void netvsc_device_remove(struct hv_device *device)
640 {
641         struct net_device *ndev = hv_get_drvdata(device);
642         struct net_device_context *net_device_ctx = netdev_priv(ndev);
643         struct netvsc_device *net_device
644                 = rtnl_dereference(net_device_ctx->nvdev);
645         int i;
646
647         /*
648          * Revoke receive buffer. If host is pre-Win2016 then tear down
649          * receive buffer GPADL. Do the same for send buffer.
650          */
651         netvsc_revoke_recv_buf(device, net_device, ndev);
652         if (vmbus_proto_version < VERSION_WIN10)
653                 netvsc_teardown_recv_gpadl(device, net_device, ndev);
654
655         netvsc_revoke_send_buf(device, net_device, ndev);
656         if (vmbus_proto_version < VERSION_WIN10)
657                 netvsc_teardown_send_gpadl(device, net_device, ndev);
658
659         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
660
661         /* Disable NAPI and disassociate its context from the device. */
662         for (i = 0; i < net_device->num_chn; i++) {
663                 /* See also vmbus_reset_channel_cb(). */
664                 napi_disable(&net_device->chan_table[i].napi);
665                 netif_napi_del(&net_device->chan_table[i].napi);
666         }
667
668         /*
669          * At this point, no one should be accessing net_device
670          * except in here
671          */
672         netdev_dbg(ndev, "net device safe to remove\n");
673
674         /* Now, we can close the channel safely */
675         vmbus_close(device->channel);
676
677         /*
678          * If host is Win2016 or higher then we do the GPADL tear down
679          * here after VMBus is closed.
680         */
681         if (vmbus_proto_version >= VERSION_WIN10) {
682                 netvsc_teardown_recv_gpadl(device, net_device, ndev);
683                 netvsc_teardown_send_gpadl(device, net_device, ndev);
684         }
685
686         /* Release all resources */
687         free_netvsc_device_rcu(net_device);
688 }
689
690 #define RING_AVAIL_PERCENT_HIWATER 20
691 #define RING_AVAIL_PERCENT_LOWATER 10
692
693 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
694                                          u32 index)
695 {
696         sync_change_bit(index, net_device->send_section_map);
697 }
698
699 static void netvsc_send_tx_complete(struct net_device *ndev,
700                                     struct netvsc_device *net_device,
701                                     struct vmbus_channel *channel,
702                                     const struct vmpacket_descriptor *desc,
703                                     int budget)
704 {
705         struct net_device_context *ndev_ctx = netdev_priv(ndev);
706         struct sk_buff *skb;
707         u16 q_idx = 0;
708         int queue_sends;
709         u64 cmd_rqst;
710
711         cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
712         if (cmd_rqst == VMBUS_RQST_ERROR) {
713                 netdev_err(ndev, "Incorrect transaction id\n");
714                 return;
715         }
716
717         skb = (struct sk_buff *)(unsigned long)cmd_rqst;
718
719         /* Notify the layer above us */
720         if (likely(skb)) {
721                 const struct hv_netvsc_packet *packet
722                         = (struct hv_netvsc_packet *)skb->cb;
723                 u32 send_index = packet->send_buf_index;
724                 struct netvsc_stats *tx_stats;
725
726                 if (send_index != NETVSC_INVALID_INDEX)
727                         netvsc_free_send_slot(net_device, send_index);
728                 q_idx = packet->q_idx;
729
730                 tx_stats = &net_device->chan_table[q_idx].tx_stats;
731
732                 u64_stats_update_begin(&tx_stats->syncp);
733                 tx_stats->packets += packet->total_packets;
734                 tx_stats->bytes += packet->total_bytes;
735                 u64_stats_update_end(&tx_stats->syncp);
736
737                 napi_consume_skb(skb, budget);
738         }
739
740         queue_sends =
741                 atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
742
743         if (unlikely(net_device->destroy)) {
744                 if (queue_sends == 0)
745                         wake_up(&net_device->wait_drain);
746         } else {
747                 struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
748
749                 if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
750                     (hv_get_avail_to_write_percent(&channel->outbound) >
751                      RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
752                         netif_tx_wake_queue(txq);
753                         ndev_ctx->eth_stats.wake_queue++;
754                 }
755         }
756 }
757
758 static void netvsc_send_completion(struct net_device *ndev,
759                                    struct netvsc_device *net_device,
760                                    struct vmbus_channel *incoming_channel,
761                                    const struct vmpacket_descriptor *desc,
762                                    int budget)
763 {
764         const struct nvsp_message *nvsp_packet;
765         u32 msglen = hv_pkt_datalen(desc);
766         struct nvsp_message *pkt_rqst;
767         u64 cmd_rqst;
768
769         /* First check if this is a VMBUS completion without data payload */
770         if (!msglen) {
771                 cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
772                                               (u64)desc->trans_id);
773                 if (cmd_rqst == VMBUS_RQST_ERROR) {
774                         netdev_err(ndev, "Invalid transaction id\n");
775                         return;
776                 }
777
778                 pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
779                 switch (pkt_rqst->hdr.msg_type) {
780                 case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
781                         complete(&net_device->channel_init_wait);
782                         break;
783
784                 default:
785                         netdev_err(ndev, "Unexpected VMBUS completion!!\n");
786                 }
787                 return;
788         }
789
790         /* Ensure packet is big enough to read header fields */
791         if (msglen < sizeof(struct nvsp_message_header)) {
792                 netdev_err(ndev, "nvsp_message length too small: %u\n", msglen);
793                 return;
794         }
795
796         nvsp_packet = hv_pkt_data(desc);
797         switch (nvsp_packet->hdr.msg_type) {
798         case NVSP_MSG_TYPE_INIT_COMPLETE:
799                 if (msglen < sizeof(struct nvsp_message_header) +
800                                 sizeof(struct nvsp_message_init_complete)) {
801                         netdev_err(ndev, "nvsp_msg length too small: %u\n",
802                                    msglen);
803                         return;
804                 }
805                 fallthrough;
806
807         case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
808                 if (msglen < sizeof(struct nvsp_message_header) +
809                                 sizeof(struct nvsp_1_message_send_receive_buffer_complete)) {
810                         netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
811                                    msglen);
812                         return;
813                 }
814                 fallthrough;
815
816         case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
817                 if (msglen < sizeof(struct nvsp_message_header) +
818                                 sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
819                         netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
820                                    msglen);
821                         return;
822                 }
823                 fallthrough;
824
825         case NVSP_MSG5_TYPE_SUBCHANNEL:
826                 if (msglen < sizeof(struct nvsp_message_header) +
827                                 sizeof(struct nvsp_5_subchannel_complete)) {
828                         netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
829                                    msglen);
830                         return;
831                 }
832                 /* Copy the response back */
833                 memcpy(&net_device->channel_init_pkt, nvsp_packet,
834                        sizeof(struct nvsp_message));
835                 complete(&net_device->channel_init_wait);
836                 break;
837
838         case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
839                 netvsc_send_tx_complete(ndev, net_device, incoming_channel,
840                                         desc, budget);
841                 break;
842
843         default:
844                 netdev_err(ndev,
845                            "Unknown send completion type %d received!!\n",
846                            nvsp_packet->hdr.msg_type);
847         }
848 }
849
850 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
851 {
852         unsigned long *map_addr = net_device->send_section_map;
853         unsigned int i;
854
855         for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
856                 if (sync_test_and_set_bit(i, map_addr) == 0)
857                         return i;
858         }
859
860         return NETVSC_INVALID_INDEX;
861 }
862
863 static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
864                                     unsigned int section_index,
865                                     u32 pend_size,
866                                     struct hv_netvsc_packet *packet,
867                                     struct rndis_message *rndis_msg,
868                                     struct hv_page_buffer *pb,
869                                     bool xmit_more)
870 {
871         char *start = net_device->send_buf;
872         char *dest = start + (section_index * net_device->send_section_size)
873                      + pend_size;
874         int i;
875         u32 padding = 0;
876         u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
877                 packet->page_buf_cnt;
878         u32 remain;
879
880         /* Add padding */
881         remain = packet->total_data_buflen & (net_device->pkt_align - 1);
882         if (xmit_more && remain) {
883                 padding = net_device->pkt_align - remain;
884                 rndis_msg->msg_len += padding;
885                 packet->total_data_buflen += padding;
886         }
887
888         for (i = 0; i < page_count; i++) {
889                 char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
890                 u32 offset = pb[i].offset;
891                 u32 len = pb[i].len;
892
893                 memcpy(dest, (src + offset), len);
894                 dest += len;
895         }
896
897         if (padding)
898                 memset(dest, 0, padding);
899 }
900
901 static inline int netvsc_send_pkt(
902         struct hv_device *device,
903         struct hv_netvsc_packet *packet,
904         struct netvsc_device *net_device,
905         struct hv_page_buffer *pb,
906         struct sk_buff *skb)
907 {
908         struct nvsp_message nvmsg;
909         struct nvsp_1_message_send_rndis_packet *rpkt =
910                 &nvmsg.msg.v1_msg.send_rndis_pkt;
911         struct netvsc_channel * const nvchan =
912                 &net_device->chan_table[packet->q_idx];
913         struct vmbus_channel *out_channel = nvchan->channel;
914         struct net_device *ndev = hv_get_drvdata(device);
915         struct net_device_context *ndev_ctx = netdev_priv(ndev);
916         struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
917         u64 req_id;
918         int ret;
919         u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
920
921         nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
922         if (skb)
923                 rpkt->channel_type = 0;         /* 0 is RMC_DATA */
924         else
925                 rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
926
927         rpkt->send_buf_section_index = packet->send_buf_index;
928         if (packet->send_buf_index == NETVSC_INVALID_INDEX)
929                 rpkt->send_buf_section_size = 0;
930         else
931                 rpkt->send_buf_section_size = packet->total_data_buflen;
932
933         req_id = (ulong)skb;
934
935         if (out_channel->rescind)
936                 return -ENODEV;
937
938         trace_nvsp_send_pkt(ndev, out_channel, rpkt);
939
940         if (packet->page_buf_cnt) {
941                 if (packet->cp_partial)
942                         pb += packet->rmsg_pgcnt;
943
944                 ret = vmbus_sendpacket_pagebuffer(out_channel,
945                                                   pb, packet->page_buf_cnt,
946                                                   &nvmsg, sizeof(nvmsg),
947                                                   req_id);
948         } else {
949                 ret = vmbus_sendpacket(out_channel,
950                                        &nvmsg, sizeof(nvmsg),
951                                        req_id, VM_PKT_DATA_INBAND,
952                                        VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
953         }
954
955         if (ret == 0) {
956                 atomic_inc_return(&nvchan->queue_sends);
957
958                 if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
959                         netif_tx_stop_queue(txq);
960                         ndev_ctx->eth_stats.stop_queue++;
961                 }
962         } else if (ret == -EAGAIN) {
963                 netif_tx_stop_queue(txq);
964                 ndev_ctx->eth_stats.stop_queue++;
965         } else {
966                 netdev_err(ndev,
967                            "Unable to send packet pages %u len %u, ret %d\n",
968                            packet->page_buf_cnt, packet->total_data_buflen,
969                            ret);
970         }
971
972         if (netif_tx_queue_stopped(txq) &&
973             atomic_read(&nvchan->queue_sends) < 1 &&
974             !net_device->tx_disable) {
975                 netif_tx_wake_queue(txq);
976                 ndev_ctx->eth_stats.wake_queue++;
977                 if (ret == -EAGAIN)
978                         ret = -ENOSPC;
979         }
980
981         return ret;
982 }
983
984 /* Move packet out of multi send data (msd), and clear msd */
985 static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
986                                 struct sk_buff **msd_skb,
987                                 struct multi_send_data *msdp)
988 {
989         *msd_skb = msdp->skb;
990         *msd_send = msdp->pkt;
991         msdp->skb = NULL;
992         msdp->pkt = NULL;
993         msdp->count = 0;
994 }
995
996 /* RCU already held by caller */
997 int netvsc_send(struct net_device *ndev,
998                 struct hv_netvsc_packet *packet,
999                 struct rndis_message *rndis_msg,
1000                 struct hv_page_buffer *pb,
1001                 struct sk_buff *skb,
1002                 bool xdp_tx)
1003 {
1004         struct net_device_context *ndev_ctx = netdev_priv(ndev);
1005         struct netvsc_device *net_device
1006                 = rcu_dereference_bh(ndev_ctx->nvdev);
1007         struct hv_device *device = ndev_ctx->device_ctx;
1008         int ret = 0;
1009         struct netvsc_channel *nvchan;
1010         u32 pktlen = packet->total_data_buflen, msd_len = 0;
1011         unsigned int section_index = NETVSC_INVALID_INDEX;
1012         struct multi_send_data *msdp;
1013         struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
1014         struct sk_buff *msd_skb = NULL;
1015         bool try_batch, xmit_more;
1016
1017         /* If device is rescinded, return error and packet will get dropped. */
1018         if (unlikely(!net_device || net_device->destroy))
1019                 return -ENODEV;
1020
1021         nvchan = &net_device->chan_table[packet->q_idx];
1022         packet->send_buf_index = NETVSC_INVALID_INDEX;
1023         packet->cp_partial = false;
1024
1025         /* Send a control message or XDP packet directly without accessing
1026          * msd (Multi-Send Data) field which may be changed during data packet
1027          * processing.
1028          */
1029         if (!skb || xdp_tx)
1030                 return netvsc_send_pkt(device, packet, net_device, pb, skb);
1031
1032         /* batch packets in send buffer if possible */
1033         msdp = &nvchan->msd;
1034         if (msdp->pkt)
1035                 msd_len = msdp->pkt->total_data_buflen;
1036
1037         try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
1038         if (try_batch && msd_len + pktlen + net_device->pkt_align <
1039             net_device->send_section_size) {
1040                 section_index = msdp->pkt->send_buf_index;
1041
1042         } else if (try_batch && msd_len + packet->rmsg_size <
1043                    net_device->send_section_size) {
1044                 section_index = msdp->pkt->send_buf_index;
1045                 packet->cp_partial = true;
1046
1047         } else if (pktlen + net_device->pkt_align <
1048                    net_device->send_section_size) {
1049                 section_index = netvsc_get_next_send_section(net_device);
1050                 if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
1051                         ++ndev_ctx->eth_stats.tx_send_full;
1052                 } else {
1053                         move_pkt_msd(&msd_send, &msd_skb, msdp);
1054                         msd_len = 0;
1055                 }
1056         }
1057
1058         /* Keep aggregating only if stack says more data is coming
1059          * and not doing mixed modes send and not flow blocked
1060          */
1061         xmit_more = netdev_xmit_more() &&
1062                 !packet->cp_partial &&
1063                 !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
1064
1065         if (section_index != NETVSC_INVALID_INDEX) {
1066                 netvsc_copy_to_send_buf(net_device,
1067                                         section_index, msd_len,
1068                                         packet, rndis_msg, pb, xmit_more);
1069
1070                 packet->send_buf_index = section_index;
1071
1072                 if (packet->cp_partial) {
1073                         packet->page_buf_cnt -= packet->rmsg_pgcnt;
1074                         packet->total_data_buflen = msd_len + packet->rmsg_size;
1075                 } else {
1076                         packet->page_buf_cnt = 0;
1077                         packet->total_data_buflen += msd_len;
1078                 }
1079
1080                 if (msdp->pkt) {
1081                         packet->total_packets += msdp->pkt->total_packets;
1082                         packet->total_bytes += msdp->pkt->total_bytes;
1083                 }
1084
1085                 if (msdp->skb)
1086                         dev_consume_skb_any(msdp->skb);
1087
1088                 if (xmit_more) {
1089                         msdp->skb = skb;
1090                         msdp->pkt = packet;
1091                         msdp->count++;
1092                 } else {
1093                         cur_send = packet;
1094                         msdp->skb = NULL;
1095                         msdp->pkt = NULL;
1096                         msdp->count = 0;
1097                 }
1098         } else {
1099                 move_pkt_msd(&msd_send, &msd_skb, msdp);
1100                 cur_send = packet;
1101         }
1102
1103         if (msd_send) {
1104                 int m_ret = netvsc_send_pkt(device, msd_send, net_device,
1105                                             NULL, msd_skb);
1106
1107                 if (m_ret != 0) {
1108                         netvsc_free_send_slot(net_device,
1109                                               msd_send->send_buf_index);
1110                         dev_kfree_skb_any(msd_skb);
1111                 }
1112         }
1113
1114         if (cur_send)
1115                 ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
1116
1117         if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
1118                 netvsc_free_send_slot(net_device, section_index);
1119
1120         return ret;
1121 }
1122
1123 /* Send pending recv completions */
1124 static int send_recv_completions(struct net_device *ndev,
1125                                  struct netvsc_device *nvdev,
1126                                  struct netvsc_channel *nvchan)
1127 {
1128         struct multi_recv_comp *mrc = &nvchan->mrc;
1129         struct recv_comp_msg {
1130                 struct nvsp_message_header hdr;
1131                 u32 status;
1132         }  __packed;
1133         struct recv_comp_msg msg = {
1134                 .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1135         };
1136         int ret;
1137
1138         while (mrc->first != mrc->next) {
1139                 const struct recv_comp_data *rcd
1140                         = mrc->slots + mrc->first;
1141
1142                 msg.status = rcd->status;
1143                 ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1144                                        rcd->tid, VM_PKT_COMP, 0);
1145                 if (unlikely(ret)) {
1146                         struct net_device_context *ndev_ctx = netdev_priv(ndev);
1147
1148                         ++ndev_ctx->eth_stats.rx_comp_busy;
1149                         return ret;
1150                 }
1151
1152                 if (++mrc->first == nvdev->recv_completion_cnt)
1153                         mrc->first = 0;
1154         }
1155
1156         /* receive completion ring has been emptied */
1157         if (unlikely(nvdev->destroy))
1158                 wake_up(&nvdev->wait_drain);
1159
1160         return 0;
1161 }
1162
1163 /* Count how many receive completions are outstanding */
1164 static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1165                                  const struct multi_recv_comp *mrc,
1166                                  u32 *filled, u32 *avail)
1167 {
1168         u32 count = nvdev->recv_completion_cnt;
1169
1170         if (mrc->next >= mrc->first)
1171                 *filled = mrc->next - mrc->first;
1172         else
1173                 *filled = (count - mrc->first) + mrc->next;
1174
1175         *avail = count - *filled - 1;
1176 }
1177
1178 /* Add receive complete to ring to send to host. */
1179 static void enq_receive_complete(struct net_device *ndev,
1180                                  struct netvsc_device *nvdev, u16 q_idx,
1181                                  u64 tid, u32 status)
1182 {
1183         struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1184         struct multi_recv_comp *mrc = &nvchan->mrc;
1185         struct recv_comp_data *rcd;
1186         u32 filled, avail;
1187
1188         recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1189
1190         if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1191                 send_recv_completions(ndev, nvdev, nvchan);
1192                 recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1193         }
1194
1195         if (unlikely(!avail)) {
1196                 netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1197                            q_idx, tid);
1198                 return;
1199         }
1200
1201         rcd = mrc->slots + mrc->next;
1202         rcd->tid = tid;
1203         rcd->status = status;
1204
1205         if (++mrc->next == nvdev->recv_completion_cnt)
1206                 mrc->next = 0;
1207 }
1208
1209 static int netvsc_receive(struct net_device *ndev,
1210                           struct netvsc_device *net_device,
1211                           struct netvsc_channel *nvchan,
1212                           const struct vmpacket_descriptor *desc)
1213 {
1214         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1215         struct vmbus_channel *channel = nvchan->channel;
1216         const struct vmtransfer_page_packet_header *vmxferpage_packet
1217                 = container_of(desc, const struct vmtransfer_page_packet_header, d);
1218         const struct nvsp_message *nvsp = hv_pkt_data(desc);
1219         u32 msglen = hv_pkt_datalen(desc);
1220         u16 q_idx = channel->offermsg.offer.sub_channel_index;
1221         char *recv_buf = net_device->recv_buf;
1222         u32 status = NVSP_STAT_SUCCESS;
1223         int i;
1224         int count = 0;
1225
1226         /* Ensure packet is big enough to read header fields */
1227         if (msglen < sizeof(struct nvsp_message_header)) {
1228                 netif_err(net_device_ctx, rx_err, ndev,
1229                           "invalid nvsp header, length too small: %u\n",
1230                           msglen);
1231                 return 0;
1232         }
1233
1234         /* Make sure this is a valid nvsp packet */
1235         if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1236                 netif_err(net_device_ctx, rx_err, ndev,
1237                           "Unknown nvsp packet type received %u\n",
1238                           nvsp->hdr.msg_type);
1239                 return 0;
1240         }
1241
1242         /* Validate xfer page pkt header */
1243         if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) {
1244                 netif_err(net_device_ctx, rx_err, ndev,
1245                           "Invalid xfer page pkt, offset too small: %u\n",
1246                           desc->offset8 << 3);
1247                 return 0;
1248         }
1249
1250         if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1251                 netif_err(net_device_ctx, rx_err, ndev,
1252                           "Invalid xfer page set id - expecting %x got %x\n",
1253                           NETVSC_RECEIVE_BUFFER_ID,
1254                           vmxferpage_packet->xfer_pageset_id);
1255                 return 0;
1256         }
1257
1258         count = vmxferpage_packet->range_cnt;
1259
1260         /* Check count for a valid value */
1261         if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
1262                 netif_err(net_device_ctx, rx_err, ndev,
1263                           "Range count is not valid: %d\n",
1264                           count);
1265                 return 0;
1266         }
1267
1268         /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1269         for (i = 0; i < count; i++) {
1270                 u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1271                 u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1272                 void *data;
1273                 int ret;
1274
1275                 if (unlikely(offset > net_device->recv_buf_size ||
1276                              buflen > net_device->recv_buf_size - offset)) {
1277                         nvchan->rsc.cnt = 0;
1278                         status = NVSP_STAT_FAIL;
1279                         netif_err(net_device_ctx, rx_err, ndev,
1280                                   "Packet offset:%u + len:%u too big\n",
1281                                   offset, buflen);
1282
1283                         continue;
1284                 }
1285
1286                 data = recv_buf + offset;
1287
1288                 nvchan->rsc.is_last = (i == count - 1);
1289
1290                 trace_rndis_recv(ndev, q_idx, data);
1291
1292                 /* Pass it to the upper layer */
1293                 ret = rndis_filter_receive(ndev, net_device,
1294                                            nvchan, data, buflen);
1295
1296                 if (unlikely(ret != NVSP_STAT_SUCCESS))
1297                         status = NVSP_STAT_FAIL;
1298         }
1299
1300         enq_receive_complete(ndev, net_device, q_idx,
1301                              vmxferpage_packet->d.trans_id, status);
1302
1303         return count;
1304 }
1305
1306 static void netvsc_send_table(struct net_device *ndev,
1307                               struct netvsc_device *nvscdev,
1308                               const struct nvsp_message *nvmsg,
1309                               u32 msglen)
1310 {
1311         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1312         u32 count, offset, *tab;
1313         int i;
1314
1315         /* Ensure packet is big enough to read send_table fields */
1316         if (msglen < sizeof(struct nvsp_message_header) +
1317                      sizeof(struct nvsp_5_send_indirect_table)) {
1318                 netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen);
1319                 return;
1320         }
1321
1322         count = nvmsg->msg.v5_msg.send_table.count;
1323         offset = nvmsg->msg.v5_msg.send_table.offset;
1324
1325         if (count != VRSS_SEND_TAB_SIZE) {
1326                 netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1327                 return;
1328         }
1329
1330         /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be
1331          * wrong due to a host bug. So fix the offset here.
1332          */
1333         if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
1334             msglen >= sizeof(struct nvsp_message_header) +
1335             sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
1336                 offset = sizeof(struct nvsp_message_header) +
1337                          sizeof(union nvsp_6_message_uber);
1338
1339         /* Boundary check for all versions */
1340         if (offset > msglen - count * sizeof(u32)) {
1341                 netdev_err(ndev, "Received send-table offset too big:%u\n",
1342                            offset);
1343                 return;
1344         }
1345
1346         tab = (void *)nvmsg + offset;
1347
1348         for (i = 0; i < count; i++)
1349                 net_device_ctx->tx_table[i] = tab[i];
1350 }
1351
1352 static void netvsc_send_vf(struct net_device *ndev,
1353                            const struct nvsp_message *nvmsg,
1354                            u32 msglen)
1355 {
1356         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1357
1358         /* Ensure packet is big enough to read its fields */
1359         if (msglen < sizeof(struct nvsp_message_header) +
1360                      sizeof(struct nvsp_4_send_vf_association)) {
1361                 netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen);
1362                 return;
1363         }
1364
1365         net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1366         net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1367         netdev_info(ndev, "VF slot %u %s\n",
1368                     net_device_ctx->vf_serial,
1369                     net_device_ctx->vf_alloc ? "added" : "removed");
1370 }
1371
1372 static void netvsc_receive_inband(struct net_device *ndev,
1373                                   struct netvsc_device *nvscdev,
1374                                   const struct vmpacket_descriptor *desc)
1375 {
1376         const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1377         u32 msglen = hv_pkt_datalen(desc);
1378
1379         /* Ensure packet is big enough to read header fields */
1380         if (msglen < sizeof(struct nvsp_message_header)) {
1381                 netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen);
1382                 return;
1383         }
1384
1385         switch (nvmsg->hdr.msg_type) {
1386         case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1387                 netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
1388                 break;
1389
1390         case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1391                 netvsc_send_vf(ndev, nvmsg, msglen);
1392                 break;
1393         }
1394 }
1395
1396 static int netvsc_process_raw_pkt(struct hv_device *device,
1397                                   struct netvsc_channel *nvchan,
1398                                   struct netvsc_device *net_device,
1399                                   struct net_device *ndev,
1400                                   const struct vmpacket_descriptor *desc,
1401                                   int budget)
1402 {
1403         struct vmbus_channel *channel = nvchan->channel;
1404         const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1405
1406         trace_nvsp_recv(ndev, channel, nvmsg);
1407
1408         switch (desc->type) {
1409         case VM_PKT_COMP:
1410                 netvsc_send_completion(ndev, net_device, channel, desc, budget);
1411                 break;
1412
1413         case VM_PKT_DATA_USING_XFER_PAGES:
1414                 return netvsc_receive(ndev, net_device, nvchan, desc);
1415                 break;
1416
1417         case VM_PKT_DATA_INBAND:
1418                 netvsc_receive_inband(ndev, net_device, desc);
1419                 break;
1420
1421         default:
1422                 netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1423                            desc->type, desc->trans_id);
1424                 break;
1425         }
1426
1427         return 0;
1428 }
1429
1430 static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1431 {
1432         struct vmbus_channel *primary = channel->primary_channel;
1433
1434         return primary ? primary->device_obj : channel->device_obj;
1435 }
1436
1437 /* Network processing softirq
1438  * Process data in incoming ring buffer from host
1439  * Stops when ring is empty or budget is met or exceeded.
1440  */
1441 int netvsc_poll(struct napi_struct *napi, int budget)
1442 {
1443         struct netvsc_channel *nvchan
1444                 = container_of(napi, struct netvsc_channel, napi);
1445         struct netvsc_device *net_device = nvchan->net_device;
1446         struct vmbus_channel *channel = nvchan->channel;
1447         struct hv_device *device = netvsc_channel_to_device(channel);
1448         struct net_device *ndev = hv_get_drvdata(device);
1449         int work_done = 0;
1450         int ret;
1451
1452         /* If starting a new interval */
1453         if (!nvchan->desc)
1454                 nvchan->desc = hv_pkt_iter_first(channel);
1455
1456         while (nvchan->desc && work_done < budget) {
1457                 work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
1458                                                     ndev, nvchan->desc, budget);
1459                 nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1460         }
1461
1462         /* Send any pending receive completions */
1463         ret = send_recv_completions(ndev, net_device, nvchan);
1464
1465         /* If it did not exhaust NAPI budget this time
1466          *  and not doing busy poll
1467          * then re-enable host interrupts
1468          *  and reschedule if ring is not empty
1469          *   or sending receive completion failed.
1470          */
1471         if (work_done < budget &&
1472             napi_complete_done(napi, work_done) &&
1473             (ret || hv_end_read(&channel->inbound)) &&
1474             napi_schedule_prep(napi)) {
1475                 hv_begin_read(&channel->inbound);
1476                 __napi_schedule(napi);
1477         }
1478
1479         /* Driver may overshoot since multiple packets per descriptor */
1480         return min(work_done, budget);
1481 }
1482
1483 /* Call back when data is available in host ring buffer.
1484  * Processing is deferred until network softirq (NAPI)
1485  */
1486 void netvsc_channel_cb(void *context)
1487 {
1488         struct netvsc_channel *nvchan = context;
1489         struct vmbus_channel *channel = nvchan->channel;
1490         struct hv_ring_buffer_info *rbi = &channel->inbound;
1491
1492         /* preload first vmpacket descriptor */
1493         prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1494
1495         if (napi_schedule_prep(&nvchan->napi)) {
1496                 /* disable interrupts from host */
1497                 hv_begin_read(rbi);
1498
1499                 __napi_schedule_irqoff(&nvchan->napi);
1500         }
1501 }
1502
1503 /*
1504  * netvsc_device_add - Callback when the device belonging to this
1505  * driver is added
1506  */
1507 struct netvsc_device *netvsc_device_add(struct hv_device *device,
1508                                 const struct netvsc_device_info *device_info)
1509 {
1510         int i, ret = 0;
1511         struct netvsc_device *net_device;
1512         struct net_device *ndev = hv_get_drvdata(device);
1513         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1514
1515         net_device = alloc_net_device();
1516         if (!net_device)
1517                 return ERR_PTR(-ENOMEM);
1518
1519         for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1520                 net_device_ctx->tx_table[i] = 0;
1521
1522         /* Because the device uses NAPI, all the interrupt batching and
1523          * control is done via Net softirq, not the channel handling
1524          */
1525         set_channel_read_mode(device->channel, HV_CALL_ISR);
1526
1527         /* If we're reopening the device we may have multiple queues, fill the
1528          * chn_table with the default channel to use it before subchannels are
1529          * opened.
1530          * Initialize the channel state before we open;
1531          * we can be interrupted as soon as we open the channel.
1532          */
1533
1534         for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1535                 struct netvsc_channel *nvchan = &net_device->chan_table[i];
1536
1537                 nvchan->channel = device->channel;
1538                 nvchan->net_device = net_device;
1539                 u64_stats_init(&nvchan->tx_stats.syncp);
1540                 u64_stats_init(&nvchan->rx_stats.syncp);
1541
1542                 ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i, 0);
1543
1544                 if (ret) {
1545                         netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
1546                         goto cleanup2;
1547                 }
1548
1549                 ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
1550                                                  MEM_TYPE_PAGE_SHARED, NULL);
1551
1552                 if (ret) {
1553                         netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
1554                         goto cleanup2;
1555                 }
1556         }
1557
1558         /* Enable NAPI handler before init callbacks */
1559         netif_napi_add(ndev, &net_device->chan_table[0].napi,
1560                        netvsc_poll, NAPI_POLL_WEIGHT);
1561
1562         /* Open the channel */
1563         device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
1564         ret = vmbus_open(device->channel, netvsc_ring_bytes,
1565                          netvsc_ring_bytes,  NULL, 0,
1566                          netvsc_channel_cb, net_device->chan_table);
1567
1568         if (ret != 0) {
1569                 netdev_err(ndev, "unable to open channel: %d\n", ret);
1570                 goto cleanup;
1571         }
1572
1573         /* Channel is opened */
1574         netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1575
1576         napi_enable(&net_device->chan_table[0].napi);
1577
1578         /* Connect with the NetVsp */
1579         ret = netvsc_connect_vsp(device, net_device, device_info);
1580         if (ret != 0) {
1581                 netdev_err(ndev,
1582                         "unable to connect to NetVSP - %d\n", ret);
1583                 goto close;
1584         }
1585
1586         /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1587          * populated.
1588          */
1589         rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1590
1591         return net_device;
1592
1593 close:
1594         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1595         napi_disable(&net_device->chan_table[0].napi);
1596
1597         /* Now, we can close the channel safely */
1598         vmbus_close(device->channel);
1599
1600 cleanup:
1601         netif_napi_del(&net_device->chan_table[0].napi);
1602
1603 cleanup2:
1604         free_netvsc_device(&net_device->rcu);
1605
1606         return ERR_PTR(ret);
1607 }