net: openvswitch: fix upcall counter access before allocation
[platform/kernel/linux-starfive.git] / net / openvswitch / datapath.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2007-2014 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_vlan.h>
12 #include <linux/in.h>
13 #include <linux/ip.h>
14 #include <linux/jhash.h>
15 #include <linux/delay.h>
16 #include <linux/time.h>
17 #include <linux/etherdevice.h>
18 #include <linux/genetlink.h>
19 #include <linux/kernel.h>
20 #include <linux/kthread.h>
21 #include <linux/mutex.h>
22 #include <linux/percpu.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/ethtool.h>
27 #include <linux/wait.h>
28 #include <asm/div64.h>
29 #include <linux/highmem.h>
30 #include <linux/netfilter_bridge.h>
31 #include <linux/netfilter_ipv4.h>
32 #include <linux/inetdevice.h>
33 #include <linux/list.h>
34 #include <linux/openvswitch.h>
35 #include <linux/rculist.h>
36 #include <linux/dmi.h>
37 #include <net/genetlink.h>
38 #include <net/net_namespace.h>
39 #include <net/netns/generic.h>
40 #include <net/pkt_cls.h>
41
42 #include "datapath.h"
43 #include "flow.h"
44 #include "flow_table.h"
45 #include "flow_netlink.h"
46 #include "meter.h"
47 #include "openvswitch_trace.h"
48 #include "vport-internal_dev.h"
49 #include "vport-netdev.h"
50
51 unsigned int ovs_net_id __read_mostly;
52
53 static struct genl_family dp_packet_genl_family;
54 static struct genl_family dp_flow_genl_family;
55 static struct genl_family dp_datapath_genl_family;
56
57 static const struct nla_policy flow_policy[];
58
59 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
60         .name = OVS_FLOW_MCGROUP,
61 };
62
63 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
64         .name = OVS_DATAPATH_MCGROUP,
65 };
66
67 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
68         .name = OVS_VPORT_MCGROUP,
69 };
70
71 /* Check if need to build a reply message.
72  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
73 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
74                             unsigned int group)
75 {
76         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
77                genl_has_listeners(family, genl_info_net(info), group);
78 }
79
80 static void ovs_notify(struct genl_family *family,
81                        struct sk_buff *skb, struct genl_info *info)
82 {
83         genl_notify(family, skb, info, 0, GFP_KERNEL);
84 }
85
86 /**
87  * DOC: Locking:
88  *
89  * All writes e.g. Writes to device state (add/remove datapath, port, set
90  * operations on vports, etc.), Writes to other state (flow table
91  * modifications, set miscellaneous datapath parameters, etc.) are protected
92  * by ovs_lock.
93  *
94  * Reads are protected by RCU.
95  *
96  * There are a few special cases (mostly stats) that have their own
97  * synchronization but they nest under all of above and don't interact with
98  * each other.
99  *
100  * The RTNL lock nests inside ovs_mutex.
101  */
102
103 static DEFINE_MUTEX(ovs_mutex);
104
105 void ovs_lock(void)
106 {
107         mutex_lock(&ovs_mutex);
108 }
109
110 void ovs_unlock(void)
111 {
112         mutex_unlock(&ovs_mutex);
113 }
114
115 #ifdef CONFIG_LOCKDEP
116 int lockdep_ovsl_is_held(void)
117 {
118         if (debug_locks)
119                 return lockdep_is_held(&ovs_mutex);
120         else
121                 return 1;
122 }
123 #endif
124
125 static struct vport *new_vport(const struct vport_parms *);
126 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
127                              const struct sw_flow_key *,
128                              const struct dp_upcall_info *,
129                              uint32_t cutlen);
130 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
131                                   const struct sw_flow_key *,
132                                   const struct dp_upcall_info *,
133                                   uint32_t cutlen);
134
135 static void ovs_dp_masks_rebalance(struct work_struct *work);
136
137 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
138
139 /* Must be called with rcu_read_lock or ovs_mutex. */
140 const char *ovs_dp_name(const struct datapath *dp)
141 {
142         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
143         return ovs_vport_name(vport);
144 }
145
146 static int get_dpifindex(const struct datapath *dp)
147 {
148         struct vport *local;
149         int ifindex;
150
151         rcu_read_lock();
152
153         local = ovs_vport_rcu(dp, OVSP_LOCAL);
154         if (local)
155                 ifindex = local->dev->ifindex;
156         else
157                 ifindex = 0;
158
159         rcu_read_unlock();
160
161         return ifindex;
162 }
163
164 static void destroy_dp_rcu(struct rcu_head *rcu)
165 {
166         struct datapath *dp = container_of(rcu, struct datapath, rcu);
167
168         ovs_flow_tbl_destroy(&dp->table);
169         free_percpu(dp->stats_percpu);
170         kfree(dp->ports);
171         ovs_meters_exit(dp);
172         kfree(rcu_dereference_raw(dp->upcall_portids));
173         kfree(dp);
174 }
175
176 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
177                                             u16 port_no)
178 {
179         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
180 }
181
182 /* Called with ovs_mutex or RCU read lock. */
183 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
184 {
185         struct vport *vport;
186         struct hlist_head *head;
187
188         head = vport_hash_bucket(dp, port_no);
189         hlist_for_each_entry_rcu(vport, head, dp_hash_node,
190                                  lockdep_ovsl_is_held()) {
191                 if (vport->port_no == port_no)
192                         return vport;
193         }
194         return NULL;
195 }
196
197 /* Called with ovs_mutex. */
198 static struct vport *new_vport(const struct vport_parms *parms)
199 {
200         struct vport *vport;
201
202         vport = ovs_vport_add(parms);
203         if (!IS_ERR(vport)) {
204                 struct datapath *dp = parms->dp;
205                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
206
207                 hlist_add_head_rcu(&vport->dp_hash_node, head);
208         }
209         return vport;
210 }
211
212 static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
213                                           const struct dp_upcall_info *upcall_info,
214                                           bool upcall_result)
215 {
216         struct vport *p = OVS_CB(skb)->input_vport;
217         struct vport_upcall_stats_percpu *stats;
218
219         if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
220             upcall_info->cmd != OVS_PACKET_CMD_ACTION)
221                 return;
222
223         stats = this_cpu_ptr(p->upcall_stats);
224         u64_stats_update_begin(&stats->syncp);
225         if (upcall_result)
226                 u64_stats_inc(&stats->n_success);
227         else
228                 u64_stats_inc(&stats->n_fail);
229         u64_stats_update_end(&stats->syncp);
230 }
231
232 void ovs_dp_detach_port(struct vport *p)
233 {
234         ASSERT_OVSL();
235
236         /* First drop references to device. */
237         hlist_del_rcu(&p->dp_hash_node);
238
239         /* Then destroy it. */
240         ovs_vport_del(p);
241 }
242
243 /* Must be called with rcu_read_lock. */
244 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
245 {
246         const struct vport *p = OVS_CB(skb)->input_vport;
247         struct datapath *dp = p->dp;
248         struct sw_flow *flow;
249         struct sw_flow_actions *sf_acts;
250         struct dp_stats_percpu *stats;
251         u64 *stats_counter;
252         u32 n_mask_hit;
253         u32 n_cache_hit;
254         int error;
255
256         stats = this_cpu_ptr(dp->stats_percpu);
257
258         /* Look up flow. */
259         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
260                                          &n_mask_hit, &n_cache_hit);
261         if (unlikely(!flow)) {
262                 struct dp_upcall_info upcall;
263
264                 memset(&upcall, 0, sizeof(upcall));
265                 upcall.cmd = OVS_PACKET_CMD_MISS;
266
267                 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
268                         upcall.portid =
269                             ovs_dp_get_upcall_portid(dp, smp_processor_id());
270                 else
271                         upcall.portid = ovs_vport_find_upcall_portid(p, skb);
272
273                 upcall.mru = OVS_CB(skb)->mru;
274                 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
275                 switch (error) {
276                 case 0:
277                 case -EAGAIN:
278                 case -ERESTARTSYS:
279                 case -EINTR:
280                         consume_skb(skb);
281                         break;
282                 default:
283                         kfree_skb(skb);
284                         break;
285                 }
286                 stats_counter = &stats->n_missed;
287                 goto out;
288         }
289
290         ovs_flow_stats_update(flow, key->tp.flags, skb);
291         sf_acts = rcu_dereference(flow->sf_acts);
292         error = ovs_execute_actions(dp, skb, sf_acts, key);
293         if (unlikely(error))
294                 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
295                                     ovs_dp_name(dp), error);
296
297         stats_counter = &stats->n_hit;
298
299 out:
300         /* Update datapath statistics. */
301         u64_stats_update_begin(&stats->syncp);
302         (*stats_counter)++;
303         stats->n_mask_hit += n_mask_hit;
304         stats->n_cache_hit += n_cache_hit;
305         u64_stats_update_end(&stats->syncp);
306 }
307
308 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
309                   const struct sw_flow_key *key,
310                   const struct dp_upcall_info *upcall_info,
311                   uint32_t cutlen)
312 {
313         struct dp_stats_percpu *stats;
314         int err;
315
316         if (trace_ovs_dp_upcall_enabled())
317                 trace_ovs_dp_upcall(dp, skb, key, upcall_info);
318
319         if (upcall_info->portid == 0) {
320                 err = -ENOTCONN;
321                 goto err;
322         }
323
324         if (!skb_is_gso(skb))
325                 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
326         else
327                 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
328
329         ovs_vport_update_upcall_stats(skb, upcall_info, !err);
330         if (err)
331                 goto err;
332
333         return 0;
334
335 err:
336         stats = this_cpu_ptr(dp->stats_percpu);
337
338         u64_stats_update_begin(&stats->syncp);
339         stats->n_lost++;
340         u64_stats_update_end(&stats->syncp);
341
342         return err;
343 }
344
345 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
346                              const struct sw_flow_key *key,
347                              const struct dp_upcall_info *upcall_info,
348                              uint32_t cutlen)
349 {
350         unsigned int gso_type = skb_shinfo(skb)->gso_type;
351         struct sw_flow_key later_key;
352         struct sk_buff *segs, *nskb;
353         int err;
354
355         BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
356         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
357         if (IS_ERR(segs))
358                 return PTR_ERR(segs);
359         if (segs == NULL)
360                 return -EINVAL;
361
362         if (gso_type & SKB_GSO_UDP) {
363                 /* The initial flow key extracted by ovs_flow_key_extract()
364                  * in this case is for a first fragment, so we need to
365                  * properly mark later fragments.
366                  */
367                 later_key = *key;
368                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
369         }
370
371         /* Queue all of the segments. */
372         skb_list_walk_safe(segs, skb, nskb) {
373                 if (gso_type & SKB_GSO_UDP && skb != segs)
374                         key = &later_key;
375
376                 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
377                 if (err)
378                         break;
379
380         }
381
382         /* Free all of the segments. */
383         skb_list_walk_safe(segs, skb, nskb) {
384                 if (err)
385                         kfree_skb(skb);
386                 else
387                         consume_skb(skb);
388         }
389         return err;
390 }
391
392 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
393                               unsigned int hdrlen, int actions_attrlen)
394 {
395         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
396                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
397                 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
398                 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
399                 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
400
401         /* OVS_PACKET_ATTR_USERDATA */
402         if (upcall_info->userdata)
403                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
404
405         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
406         if (upcall_info->egress_tun_info)
407                 size += nla_total_size(ovs_tun_key_attr_size());
408
409         /* OVS_PACKET_ATTR_ACTIONS */
410         if (upcall_info->actions_len)
411                 size += nla_total_size(actions_attrlen);
412
413         /* OVS_PACKET_ATTR_MRU */
414         if (upcall_info->mru)
415                 size += nla_total_size(sizeof(upcall_info->mru));
416
417         return size;
418 }
419
420 static void pad_packet(struct datapath *dp, struct sk_buff *skb)
421 {
422         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
423                 size_t plen = NLA_ALIGN(skb->len) - skb->len;
424
425                 if (plen > 0)
426                         skb_put_zero(skb, plen);
427         }
428 }
429
430 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
431                                   const struct sw_flow_key *key,
432                                   const struct dp_upcall_info *upcall_info,
433                                   uint32_t cutlen)
434 {
435         struct ovs_header *upcall;
436         struct sk_buff *nskb = NULL;
437         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
438         struct nlattr *nla;
439         size_t len;
440         unsigned int hlen;
441         int err, dp_ifindex;
442         u64 hash;
443
444         dp_ifindex = get_dpifindex(dp);
445         if (!dp_ifindex)
446                 return -ENODEV;
447
448         if (skb_vlan_tag_present(skb)) {
449                 nskb = skb_clone(skb, GFP_ATOMIC);
450                 if (!nskb)
451                         return -ENOMEM;
452
453                 nskb = __vlan_hwaccel_push_inside(nskb);
454                 if (!nskb)
455                         return -ENOMEM;
456
457                 skb = nskb;
458         }
459
460         if (nla_attr_size(skb->len) > USHRT_MAX) {
461                 err = -EFBIG;
462                 goto out;
463         }
464
465         /* Complete checksum if needed */
466         if (skb->ip_summed == CHECKSUM_PARTIAL &&
467             (err = skb_csum_hwoffload_help(skb, 0)))
468                 goto out;
469
470         /* Older versions of OVS user space enforce alignment of the last
471          * Netlink attribute to NLA_ALIGNTO which would require extensive
472          * padding logic. Only perform zerocopy if padding is not required.
473          */
474         if (dp->user_features & OVS_DP_F_UNALIGNED)
475                 hlen = skb_zerocopy_headlen(skb);
476         else
477                 hlen = skb->len;
478
479         len = upcall_msg_size(upcall_info, hlen - cutlen,
480                               OVS_CB(skb)->acts_origlen);
481         user_skb = genlmsg_new(len, GFP_ATOMIC);
482         if (!user_skb) {
483                 err = -ENOMEM;
484                 goto out;
485         }
486
487         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
488                              0, upcall_info->cmd);
489         if (!upcall) {
490                 err = -EINVAL;
491                 goto out;
492         }
493         upcall->dp_ifindex = dp_ifindex;
494
495         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
496         if (err)
497                 goto out;
498
499         if (upcall_info->userdata)
500                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
501                           nla_len(upcall_info->userdata),
502                           nla_data(upcall_info->userdata));
503
504         if (upcall_info->egress_tun_info) {
505                 nla = nla_nest_start_noflag(user_skb,
506                                             OVS_PACKET_ATTR_EGRESS_TUN_KEY);
507                 if (!nla) {
508                         err = -EMSGSIZE;
509                         goto out;
510                 }
511                 err = ovs_nla_put_tunnel_info(user_skb,
512                                               upcall_info->egress_tun_info);
513                 if (err)
514                         goto out;
515
516                 nla_nest_end(user_skb, nla);
517         }
518
519         if (upcall_info->actions_len) {
520                 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
521                 if (!nla) {
522                         err = -EMSGSIZE;
523                         goto out;
524                 }
525                 err = ovs_nla_put_actions(upcall_info->actions,
526                                           upcall_info->actions_len,
527                                           user_skb);
528                 if (!err)
529                         nla_nest_end(user_skb, nla);
530                 else
531                         nla_nest_cancel(user_skb, nla);
532         }
533
534         /* Add OVS_PACKET_ATTR_MRU */
535         if (upcall_info->mru &&
536             nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
537                 err = -ENOBUFS;
538                 goto out;
539         }
540
541         /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
542         if (cutlen > 0 &&
543             nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
544                 err = -ENOBUFS;
545                 goto out;
546         }
547
548         /* Add OVS_PACKET_ATTR_HASH */
549         hash = skb_get_hash_raw(skb);
550         if (skb->sw_hash)
551                 hash |= OVS_PACKET_HASH_SW_BIT;
552
553         if (skb->l4_hash)
554                 hash |= OVS_PACKET_HASH_L4_BIT;
555
556         if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
557                 err = -ENOBUFS;
558                 goto out;
559         }
560
561         /* Only reserve room for attribute header, packet data is added
562          * in skb_zerocopy() */
563         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
564                 err = -ENOBUFS;
565                 goto out;
566         }
567         nla->nla_len = nla_attr_size(skb->len - cutlen);
568
569         err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
570         if (err)
571                 goto out;
572
573         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
574         pad_packet(dp, user_skb);
575
576         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
577
578         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
579         user_skb = NULL;
580 out:
581         if (err)
582                 skb_tx_error(skb);
583         consume_skb(user_skb);
584         consume_skb(nskb);
585
586         return err;
587 }
588
589 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
590 {
591         struct ovs_header *ovs_header = info->userhdr;
592         struct net *net = sock_net(skb->sk);
593         struct nlattr **a = info->attrs;
594         struct sw_flow_actions *acts;
595         struct sk_buff *packet;
596         struct sw_flow *flow;
597         struct sw_flow_actions *sf_acts;
598         struct datapath *dp;
599         struct vport *input_vport;
600         u16 mru = 0;
601         u64 hash;
602         int len;
603         int err;
604         bool log = !a[OVS_PACKET_ATTR_PROBE];
605
606         err = -EINVAL;
607         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
608             !a[OVS_PACKET_ATTR_ACTIONS])
609                 goto err;
610
611         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
612         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
613         err = -ENOMEM;
614         if (!packet)
615                 goto err;
616         skb_reserve(packet, NET_IP_ALIGN);
617
618         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
619
620         /* Set packet's mru */
621         if (a[OVS_PACKET_ATTR_MRU]) {
622                 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
623                 packet->ignore_df = 1;
624         }
625         OVS_CB(packet)->mru = mru;
626
627         if (a[OVS_PACKET_ATTR_HASH]) {
628                 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
629
630                 __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
631                                !!(hash & OVS_PACKET_HASH_SW_BIT),
632                                !!(hash & OVS_PACKET_HASH_L4_BIT));
633         }
634
635         /* Build an sw_flow for sending this packet. */
636         flow = ovs_flow_alloc();
637         err = PTR_ERR(flow);
638         if (IS_ERR(flow))
639                 goto err_kfree_skb;
640
641         err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
642                                              packet, &flow->key, log);
643         if (err)
644                 goto err_flow_free;
645
646         err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
647                                    &flow->key, &acts, log);
648         if (err)
649                 goto err_flow_free;
650
651         rcu_assign_pointer(flow->sf_acts, acts);
652         packet->priority = flow->key.phy.priority;
653         packet->mark = flow->key.phy.skb_mark;
654
655         rcu_read_lock();
656         dp = get_dp_rcu(net, ovs_header->dp_ifindex);
657         err = -ENODEV;
658         if (!dp)
659                 goto err_unlock;
660
661         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
662         if (!input_vport)
663                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
664
665         if (!input_vport)
666                 goto err_unlock;
667
668         packet->dev = input_vport->dev;
669         OVS_CB(packet)->input_vport = input_vport;
670         sf_acts = rcu_dereference(flow->sf_acts);
671
672         local_bh_disable();
673         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
674         local_bh_enable();
675         rcu_read_unlock();
676
677         ovs_flow_free(flow, false);
678         return err;
679
680 err_unlock:
681         rcu_read_unlock();
682 err_flow_free:
683         ovs_flow_free(flow, false);
684 err_kfree_skb:
685         kfree_skb(packet);
686 err:
687         return err;
688 }
689
690 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
691         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
692         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
693         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
694         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
695         [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
696         [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
697 };
698
699 static const struct genl_small_ops dp_packet_genl_ops[] = {
700         { .cmd = OVS_PACKET_CMD_EXECUTE,
701           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
702           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
703           .doit = ovs_packet_cmd_execute
704         }
705 };
706
707 static struct genl_family dp_packet_genl_family __ro_after_init = {
708         .hdrsize = sizeof(struct ovs_header),
709         .name = OVS_PACKET_FAMILY,
710         .version = OVS_PACKET_VERSION,
711         .maxattr = OVS_PACKET_ATTR_MAX,
712         .policy = packet_policy,
713         .netnsok = true,
714         .parallel_ops = true,
715         .small_ops = dp_packet_genl_ops,
716         .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
717         .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
718         .module = THIS_MODULE,
719 };
720
721 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
722                          struct ovs_dp_megaflow_stats *mega_stats)
723 {
724         int i;
725
726         memset(mega_stats, 0, sizeof(*mega_stats));
727
728         stats->n_flows = ovs_flow_tbl_count(&dp->table);
729         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
730
731         stats->n_hit = stats->n_missed = stats->n_lost = 0;
732
733         for_each_possible_cpu(i) {
734                 const struct dp_stats_percpu *percpu_stats;
735                 struct dp_stats_percpu local_stats;
736                 unsigned int start;
737
738                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
739
740                 do {
741                         start = u64_stats_fetch_begin(&percpu_stats->syncp);
742                         local_stats = *percpu_stats;
743                 } while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
744
745                 stats->n_hit += local_stats.n_hit;
746                 stats->n_missed += local_stats.n_missed;
747                 stats->n_lost += local_stats.n_lost;
748                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
749                 mega_stats->n_cache_hit += local_stats.n_cache_hit;
750         }
751 }
752
753 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
754 {
755         return ovs_identifier_is_ufid(sfid) &&
756                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
757 }
758
759 static bool should_fill_mask(uint32_t ufid_flags)
760 {
761         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
762 }
763
764 static bool should_fill_actions(uint32_t ufid_flags)
765 {
766         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
767 }
768
769 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
770                                     const struct sw_flow_id *sfid,
771                                     uint32_t ufid_flags)
772 {
773         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
774
775         /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
776          * see ovs_nla_put_identifier()
777          */
778         if (sfid && ovs_identifier_is_ufid(sfid))
779                 len += nla_total_size(sfid->ufid_len);
780         else
781                 len += nla_total_size(ovs_key_attr_size());
782
783         /* OVS_FLOW_ATTR_KEY */
784         if (!sfid || should_fill_key(sfid, ufid_flags))
785                 len += nla_total_size(ovs_key_attr_size());
786
787         /* OVS_FLOW_ATTR_MASK */
788         if (should_fill_mask(ufid_flags))
789                 len += nla_total_size(ovs_key_attr_size());
790
791         /* OVS_FLOW_ATTR_ACTIONS */
792         if (should_fill_actions(ufid_flags))
793                 len += nla_total_size(acts->orig_len);
794
795         return len
796                 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
797                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
798                 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
799 }
800
801 /* Called with ovs_mutex or RCU read lock. */
802 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
803                                    struct sk_buff *skb)
804 {
805         struct ovs_flow_stats stats;
806         __be16 tcp_flags;
807         unsigned long used;
808
809         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
810
811         if (used &&
812             nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
813                               OVS_FLOW_ATTR_PAD))
814                 return -EMSGSIZE;
815
816         if (stats.n_packets &&
817             nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
818                           sizeof(struct ovs_flow_stats), &stats,
819                           OVS_FLOW_ATTR_PAD))
820                 return -EMSGSIZE;
821
822         if ((u8)ntohs(tcp_flags) &&
823              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
824                 return -EMSGSIZE;
825
826         return 0;
827 }
828
829 /* Called with ovs_mutex or RCU read lock. */
830 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
831                                      struct sk_buff *skb, int skb_orig_len)
832 {
833         struct nlattr *start;
834         int err;
835
836         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
837          * this is the first flow to be dumped into 'skb'.  This is unusual for
838          * Netlink but individual action lists can be longer than
839          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
840          * The userspace caller can always fetch the actions separately if it
841          * really wants them.  (Most userspace callers in fact don't care.)
842          *
843          * This can only fail for dump operations because the skb is always
844          * properly sized for single flows.
845          */
846         start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
847         if (start) {
848                 const struct sw_flow_actions *sf_acts;
849
850                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
851                 err = ovs_nla_put_actions(sf_acts->actions,
852                                           sf_acts->actions_len, skb);
853
854                 if (!err)
855                         nla_nest_end(skb, start);
856                 else {
857                         if (skb_orig_len)
858                                 return err;
859
860                         nla_nest_cancel(skb, start);
861                 }
862         } else if (skb_orig_len) {
863                 return -EMSGSIZE;
864         }
865
866         return 0;
867 }
868
869 /* Called with ovs_mutex or RCU read lock. */
870 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
871                                   struct sk_buff *skb, u32 portid,
872                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
873 {
874         const int skb_orig_len = skb->len;
875         struct ovs_header *ovs_header;
876         int err;
877
878         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
879                                  flags, cmd);
880         if (!ovs_header)
881                 return -EMSGSIZE;
882
883         ovs_header->dp_ifindex = dp_ifindex;
884
885         err = ovs_nla_put_identifier(flow, skb);
886         if (err)
887                 goto error;
888
889         if (should_fill_key(&flow->id, ufid_flags)) {
890                 err = ovs_nla_put_masked_key(flow, skb);
891                 if (err)
892                         goto error;
893         }
894
895         if (should_fill_mask(ufid_flags)) {
896                 err = ovs_nla_put_mask(flow, skb);
897                 if (err)
898                         goto error;
899         }
900
901         err = ovs_flow_cmd_fill_stats(flow, skb);
902         if (err)
903                 goto error;
904
905         if (should_fill_actions(ufid_flags)) {
906                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
907                 if (err)
908                         goto error;
909         }
910
911         genlmsg_end(skb, ovs_header);
912         return 0;
913
914 error:
915         genlmsg_cancel(skb, ovs_header);
916         return err;
917 }
918
919 /* May not be called with RCU read lock. */
920 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
921                                                const struct sw_flow_id *sfid,
922                                                struct genl_info *info,
923                                                bool always,
924                                                uint32_t ufid_flags)
925 {
926         struct sk_buff *skb;
927         size_t len;
928
929         if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
930                 return NULL;
931
932         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
933         skb = genlmsg_new(len, GFP_KERNEL);
934         if (!skb)
935                 return ERR_PTR(-ENOMEM);
936
937         return skb;
938 }
939
940 /* Called with ovs_mutex. */
941 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
942                                                int dp_ifindex,
943                                                struct genl_info *info, u8 cmd,
944                                                bool always, u32 ufid_flags)
945 {
946         struct sk_buff *skb;
947         int retval;
948
949         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
950                                       &flow->id, info, always, ufid_flags);
951         if (IS_ERR_OR_NULL(skb))
952                 return skb;
953
954         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
955                                         info->snd_portid, info->snd_seq, 0,
956                                         cmd, ufid_flags);
957         if (WARN_ON_ONCE(retval < 0)) {
958                 kfree_skb(skb);
959                 skb = ERR_PTR(retval);
960         }
961         return skb;
962 }
963
964 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
965 {
966         struct net *net = sock_net(skb->sk);
967         struct nlattr **a = info->attrs;
968         struct ovs_header *ovs_header = info->userhdr;
969         struct sw_flow *flow = NULL, *new_flow;
970         struct sw_flow_mask mask;
971         struct sk_buff *reply;
972         struct datapath *dp;
973         struct sw_flow_key *key;
974         struct sw_flow_actions *acts;
975         struct sw_flow_match match;
976         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
977         int error;
978         bool log = !a[OVS_FLOW_ATTR_PROBE];
979
980         /* Must have key and actions. */
981         error = -EINVAL;
982         if (!a[OVS_FLOW_ATTR_KEY]) {
983                 OVS_NLERR(log, "Flow key attr not present in new flow.");
984                 goto error;
985         }
986         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
987                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
988                 goto error;
989         }
990
991         /* Most of the time we need to allocate a new flow, do it before
992          * locking.
993          */
994         new_flow = ovs_flow_alloc();
995         if (IS_ERR(new_flow)) {
996                 error = PTR_ERR(new_flow);
997                 goto error;
998         }
999
1000         /* Extract key. */
1001         key = kzalloc(sizeof(*key), GFP_KERNEL);
1002         if (!key) {
1003                 error = -ENOMEM;
1004                 goto err_kfree_flow;
1005         }
1006
1007         ovs_match_init(&match, key, false, &mask);
1008         error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1009                                   a[OVS_FLOW_ATTR_MASK], log);
1010         if (error)
1011                 goto err_kfree_key;
1012
1013         ovs_flow_mask_key(&new_flow->key, key, true, &mask);
1014
1015         /* Extract flow identifier. */
1016         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
1017                                        key, log);
1018         if (error)
1019                 goto err_kfree_key;
1020
1021         /* Validate actions. */
1022         error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
1023                                      &new_flow->key, &acts, log);
1024         if (error) {
1025                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
1026                 goto err_kfree_key;
1027         }
1028
1029         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
1030                                         ufid_flags);
1031         if (IS_ERR(reply)) {
1032                 error = PTR_ERR(reply);
1033                 goto err_kfree_acts;
1034         }
1035
1036         ovs_lock();
1037         dp = get_dp(net, ovs_header->dp_ifindex);
1038         if (unlikely(!dp)) {
1039                 error = -ENODEV;
1040                 goto err_unlock_ovs;
1041         }
1042
1043         /* Check if this is a duplicate flow */
1044         if (ovs_identifier_is_ufid(&new_flow->id))
1045                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1046         if (!flow)
1047                 flow = ovs_flow_tbl_lookup(&dp->table, key);
1048         if (likely(!flow)) {
1049                 rcu_assign_pointer(new_flow->sf_acts, acts);
1050
1051                 /* Put flow in bucket. */
1052                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1053                 if (unlikely(error)) {
1054                         acts = NULL;
1055                         goto err_unlock_ovs;
1056                 }
1057
1058                 if (unlikely(reply)) {
1059                         error = ovs_flow_cmd_fill_info(new_flow,
1060                                                        ovs_header->dp_ifindex,
1061                                                        reply, info->snd_portid,
1062                                                        info->snd_seq, 0,
1063                                                        OVS_FLOW_CMD_NEW,
1064                                                        ufid_flags);
1065                         BUG_ON(error < 0);
1066                 }
1067                 ovs_unlock();
1068         } else {
1069                 struct sw_flow_actions *old_acts;
1070
1071                 /* Bail out if we're not allowed to modify an existing flow.
1072                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1073                  * because Generic Netlink treats the latter as a dump
1074                  * request.  We also accept NLM_F_EXCL in case that bug ever
1075                  * gets fixed.
1076                  */
1077                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1078                                                          | NLM_F_EXCL))) {
1079                         error = -EEXIST;
1080                         goto err_unlock_ovs;
1081                 }
1082                 /* The flow identifier has to be the same for flow updates.
1083                  * Look for any overlapping flow.
1084                  */
1085                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1086                         if (ovs_identifier_is_key(&flow->id))
1087                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1088                                                                  &match);
1089                         else /* UFID matches but key is different */
1090                                 flow = NULL;
1091                         if (!flow) {
1092                                 error = -ENOENT;
1093                                 goto err_unlock_ovs;
1094                         }
1095                 }
1096                 /* Update actions. */
1097                 old_acts = ovsl_dereference(flow->sf_acts);
1098                 rcu_assign_pointer(flow->sf_acts, acts);
1099
1100                 if (unlikely(reply)) {
1101                         error = ovs_flow_cmd_fill_info(flow,
1102                                                        ovs_header->dp_ifindex,
1103                                                        reply, info->snd_portid,
1104                                                        info->snd_seq, 0,
1105                                                        OVS_FLOW_CMD_NEW,
1106                                                        ufid_flags);
1107                         BUG_ON(error < 0);
1108                 }
1109                 ovs_unlock();
1110
1111                 ovs_nla_free_flow_actions_rcu(old_acts);
1112                 ovs_flow_free(new_flow, false);
1113         }
1114
1115         if (reply)
1116                 ovs_notify(&dp_flow_genl_family, reply, info);
1117
1118         kfree(key);
1119         return 0;
1120
1121 err_unlock_ovs:
1122         ovs_unlock();
1123         kfree_skb(reply);
1124 err_kfree_acts:
1125         ovs_nla_free_flow_actions(acts);
1126 err_kfree_key:
1127         kfree(key);
1128 err_kfree_flow:
1129         ovs_flow_free(new_flow, false);
1130 error:
1131         return error;
1132 }
1133
1134 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1135 static noinline_for_stack
1136 struct sw_flow_actions *get_flow_actions(struct net *net,
1137                                          const struct nlattr *a,
1138                                          const struct sw_flow_key *key,
1139                                          const struct sw_flow_mask *mask,
1140                                          bool log)
1141 {
1142         struct sw_flow_actions *acts;
1143         struct sw_flow_key masked_key;
1144         int error;
1145
1146         ovs_flow_mask_key(&masked_key, key, true, mask);
1147         error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1148         if (error) {
1149                 OVS_NLERR(log,
1150                           "Actions may not be safe on all matching packets");
1151                 return ERR_PTR(error);
1152         }
1153
1154         return acts;
1155 }
1156
1157 /* Factor out match-init and action-copy to avoid
1158  * "Wframe-larger-than=1024" warning. Because mask is only
1159  * used to get actions, we new a function to save some
1160  * stack space.
1161  *
1162  * If there are not key and action attrs, we return 0
1163  * directly. In the case, the caller will also not use the
1164  * match as before. If there is action attr, we try to get
1165  * actions and save them to *acts. Before returning from
1166  * the function, we reset the match->mask pointer. Because
1167  * we should not to return match object with dangling reference
1168  * to mask.
1169  * */
1170 static noinline_for_stack int
1171 ovs_nla_init_match_and_action(struct net *net,
1172                               struct sw_flow_match *match,
1173                               struct sw_flow_key *key,
1174                               struct nlattr **a,
1175                               struct sw_flow_actions **acts,
1176                               bool log)
1177 {
1178         struct sw_flow_mask mask;
1179         int error = 0;
1180
1181         if (a[OVS_FLOW_ATTR_KEY]) {
1182                 ovs_match_init(match, key, true, &mask);
1183                 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1184                                           a[OVS_FLOW_ATTR_MASK], log);
1185                 if (error)
1186                         goto error;
1187         }
1188
1189         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1190                 if (!a[OVS_FLOW_ATTR_KEY]) {
1191                         OVS_NLERR(log,
1192                                   "Flow key attribute not present in set flow.");
1193                         error = -EINVAL;
1194                         goto error;
1195                 }
1196
1197                 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1198                                          &mask, log);
1199                 if (IS_ERR(*acts)) {
1200                         error = PTR_ERR(*acts);
1201                         goto error;
1202                 }
1203         }
1204
1205         /* On success, error is 0. */
1206 error:
1207         match->mask = NULL;
1208         return error;
1209 }
1210
1211 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1212 {
1213         struct net *net = sock_net(skb->sk);
1214         struct nlattr **a = info->attrs;
1215         struct ovs_header *ovs_header = info->userhdr;
1216         struct sw_flow_key key;
1217         struct sw_flow *flow;
1218         struct sk_buff *reply = NULL;
1219         struct datapath *dp;
1220         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1221         struct sw_flow_match match;
1222         struct sw_flow_id sfid;
1223         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1224         int error = 0;
1225         bool log = !a[OVS_FLOW_ATTR_PROBE];
1226         bool ufid_present;
1227
1228         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1229         if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1230                 OVS_NLERR(log,
1231                           "Flow set message rejected, Key attribute missing.");
1232                 return -EINVAL;
1233         }
1234
1235         error = ovs_nla_init_match_and_action(net, &match, &key, a,
1236                                               &acts, log);
1237         if (error)
1238                 goto error;
1239
1240         if (acts) {
1241                 /* Can allocate before locking if have acts. */
1242                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1243                                                 ufid_flags);
1244                 if (IS_ERR(reply)) {
1245                         error = PTR_ERR(reply);
1246                         goto err_kfree_acts;
1247                 }
1248         }
1249
1250         ovs_lock();
1251         dp = get_dp(net, ovs_header->dp_ifindex);
1252         if (unlikely(!dp)) {
1253                 error = -ENODEV;
1254                 goto err_unlock_ovs;
1255         }
1256         /* Check that the flow exists. */
1257         if (ufid_present)
1258                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1259         else
1260                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1261         if (unlikely(!flow)) {
1262                 error = -ENOENT;
1263                 goto err_unlock_ovs;
1264         }
1265
1266         /* Update actions, if present. */
1267         if (likely(acts)) {
1268                 old_acts = ovsl_dereference(flow->sf_acts);
1269                 rcu_assign_pointer(flow->sf_acts, acts);
1270
1271                 if (unlikely(reply)) {
1272                         error = ovs_flow_cmd_fill_info(flow,
1273                                                        ovs_header->dp_ifindex,
1274                                                        reply, info->snd_portid,
1275                                                        info->snd_seq, 0,
1276                                                        OVS_FLOW_CMD_SET,
1277                                                        ufid_flags);
1278                         BUG_ON(error < 0);
1279                 }
1280         } else {
1281                 /* Could not alloc without acts before locking. */
1282                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1283                                                 info, OVS_FLOW_CMD_SET, false,
1284                                                 ufid_flags);
1285
1286                 if (IS_ERR(reply)) {
1287                         error = PTR_ERR(reply);
1288                         goto err_unlock_ovs;
1289                 }
1290         }
1291
1292         /* Clear stats. */
1293         if (a[OVS_FLOW_ATTR_CLEAR])
1294                 ovs_flow_stats_clear(flow);
1295         ovs_unlock();
1296
1297         if (reply)
1298                 ovs_notify(&dp_flow_genl_family, reply, info);
1299         if (old_acts)
1300                 ovs_nla_free_flow_actions_rcu(old_acts);
1301
1302         return 0;
1303
1304 err_unlock_ovs:
1305         ovs_unlock();
1306         kfree_skb(reply);
1307 err_kfree_acts:
1308         ovs_nla_free_flow_actions(acts);
1309 error:
1310         return error;
1311 }
1312
1313 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1314 {
1315         struct nlattr **a = info->attrs;
1316         struct ovs_header *ovs_header = info->userhdr;
1317         struct net *net = sock_net(skb->sk);
1318         struct sw_flow_key key;
1319         struct sk_buff *reply;
1320         struct sw_flow *flow;
1321         struct datapath *dp;
1322         struct sw_flow_match match;
1323         struct sw_flow_id ufid;
1324         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1325         int err = 0;
1326         bool log = !a[OVS_FLOW_ATTR_PROBE];
1327         bool ufid_present;
1328
1329         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1330         if (a[OVS_FLOW_ATTR_KEY]) {
1331                 ovs_match_init(&match, &key, true, NULL);
1332                 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1333                                         log);
1334         } else if (!ufid_present) {
1335                 OVS_NLERR(log,
1336                           "Flow get message rejected, Key attribute missing.");
1337                 err = -EINVAL;
1338         }
1339         if (err)
1340                 return err;
1341
1342         ovs_lock();
1343         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1344         if (!dp) {
1345                 err = -ENODEV;
1346                 goto unlock;
1347         }
1348
1349         if (ufid_present)
1350                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1351         else
1352                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1353         if (!flow) {
1354                 err = -ENOENT;
1355                 goto unlock;
1356         }
1357
1358         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1359                                         OVS_FLOW_CMD_GET, true, ufid_flags);
1360         if (IS_ERR(reply)) {
1361                 err = PTR_ERR(reply);
1362                 goto unlock;
1363         }
1364
1365         ovs_unlock();
1366         return genlmsg_reply(reply, info);
1367 unlock:
1368         ovs_unlock();
1369         return err;
1370 }
1371
1372 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1373 {
1374         struct nlattr **a = info->attrs;
1375         struct ovs_header *ovs_header = info->userhdr;
1376         struct net *net = sock_net(skb->sk);
1377         struct sw_flow_key key;
1378         struct sk_buff *reply;
1379         struct sw_flow *flow = NULL;
1380         struct datapath *dp;
1381         struct sw_flow_match match;
1382         struct sw_flow_id ufid;
1383         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1384         int err;
1385         bool log = !a[OVS_FLOW_ATTR_PROBE];
1386         bool ufid_present;
1387
1388         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1389         if (a[OVS_FLOW_ATTR_KEY]) {
1390                 ovs_match_init(&match, &key, true, NULL);
1391                 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1392                                         NULL, log);
1393                 if (unlikely(err))
1394                         return err;
1395         }
1396
1397         ovs_lock();
1398         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1399         if (unlikely(!dp)) {
1400                 err = -ENODEV;
1401                 goto unlock;
1402         }
1403
1404         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1405                 err = ovs_flow_tbl_flush(&dp->table);
1406                 goto unlock;
1407         }
1408
1409         if (ufid_present)
1410                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1411         else
1412                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1413         if (unlikely(!flow)) {
1414                 err = -ENOENT;
1415                 goto unlock;
1416         }
1417
1418         ovs_flow_tbl_remove(&dp->table, flow);
1419         ovs_unlock();
1420
1421         reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1422                                         &flow->id, info, false, ufid_flags);
1423         if (likely(reply)) {
1424                 if (!IS_ERR(reply)) {
1425                         rcu_read_lock();        /*To keep RCU checker happy. */
1426                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1427                                                      reply, info->snd_portid,
1428                                                      info->snd_seq, 0,
1429                                                      OVS_FLOW_CMD_DEL,
1430                                                      ufid_flags);
1431                         rcu_read_unlock();
1432                         if (WARN_ON_ONCE(err < 0)) {
1433                                 kfree_skb(reply);
1434                                 goto out_free;
1435                         }
1436
1437                         ovs_notify(&dp_flow_genl_family, reply, info);
1438                 } else {
1439                         netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1440                                         PTR_ERR(reply));
1441                 }
1442         }
1443
1444 out_free:
1445         ovs_flow_free(flow, true);
1446         return 0;
1447 unlock:
1448         ovs_unlock();
1449         return err;
1450 }
1451
1452 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1453 {
1454         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1455         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1456         struct table_instance *ti;
1457         struct datapath *dp;
1458         u32 ufid_flags;
1459         int err;
1460
1461         err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1462                                        OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1463         if (err)
1464                 return err;
1465         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1466
1467         rcu_read_lock();
1468         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1469         if (!dp) {
1470                 rcu_read_unlock();
1471                 return -ENODEV;
1472         }
1473
1474         ti = rcu_dereference(dp->table.ti);
1475         for (;;) {
1476                 struct sw_flow *flow;
1477                 u32 bucket, obj;
1478
1479                 bucket = cb->args[0];
1480                 obj = cb->args[1];
1481                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1482                 if (!flow)
1483                         break;
1484
1485                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1486                                            NETLINK_CB(cb->skb).portid,
1487                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1488                                            OVS_FLOW_CMD_GET, ufid_flags) < 0)
1489                         break;
1490
1491                 cb->args[0] = bucket;
1492                 cb->args[1] = obj;
1493         }
1494         rcu_read_unlock();
1495         return skb->len;
1496 }
1497
1498 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1499         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1500         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1501         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1502         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1503         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1504         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1505         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1506 };
1507
1508 static const struct genl_small_ops dp_flow_genl_ops[] = {
1509         { .cmd = OVS_FLOW_CMD_NEW,
1510           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1511           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1512           .doit = ovs_flow_cmd_new
1513         },
1514         { .cmd = OVS_FLOW_CMD_DEL,
1515           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1516           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1517           .doit = ovs_flow_cmd_del
1518         },
1519         { .cmd = OVS_FLOW_CMD_GET,
1520           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1521           .flags = 0,               /* OK for unprivileged users. */
1522           .doit = ovs_flow_cmd_get,
1523           .dumpit = ovs_flow_cmd_dump
1524         },
1525         { .cmd = OVS_FLOW_CMD_SET,
1526           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1527           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1528           .doit = ovs_flow_cmd_set,
1529         },
1530 };
1531
1532 static struct genl_family dp_flow_genl_family __ro_after_init = {
1533         .hdrsize = sizeof(struct ovs_header),
1534         .name = OVS_FLOW_FAMILY,
1535         .version = OVS_FLOW_VERSION,
1536         .maxattr = OVS_FLOW_ATTR_MAX,
1537         .policy = flow_policy,
1538         .netnsok = true,
1539         .parallel_ops = true,
1540         .small_ops = dp_flow_genl_ops,
1541         .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1542         .resv_start_op = OVS_FLOW_CMD_SET + 1,
1543         .mcgrps = &ovs_dp_flow_multicast_group,
1544         .n_mcgrps = 1,
1545         .module = THIS_MODULE,
1546 };
1547
1548 static size_t ovs_dp_cmd_msg_size(void)
1549 {
1550         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1551
1552         msgsize += nla_total_size(IFNAMSIZ);
1553         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1554         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1555         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1556         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1557         msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
1558
1559         return msgsize;
1560 }
1561
1562 /* Called with ovs_mutex. */
1563 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1564                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1565 {
1566         struct ovs_header *ovs_header;
1567         struct ovs_dp_stats dp_stats;
1568         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1569         struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
1570         int err, pids_len;
1571
1572         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1573                                  flags, cmd);
1574         if (!ovs_header)
1575                 goto error;
1576
1577         ovs_header->dp_ifindex = get_dpifindex(dp);
1578
1579         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1580         if (err)
1581                 goto nla_put_failure;
1582
1583         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1584         if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1585                           &dp_stats, OVS_DP_ATTR_PAD))
1586                 goto nla_put_failure;
1587
1588         if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1589                           sizeof(struct ovs_dp_megaflow_stats),
1590                           &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1591                 goto nla_put_failure;
1592
1593         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1594                 goto nla_put_failure;
1595
1596         if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1597                         ovs_flow_tbl_masks_cache_size(&dp->table)))
1598                 goto nla_put_failure;
1599
1600         if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
1601                 pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
1602                 if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
1603                         goto nla_put_failure;
1604         }
1605
1606         genlmsg_end(skb, ovs_header);
1607         return 0;
1608
1609 nla_put_failure:
1610         genlmsg_cancel(skb, ovs_header);
1611 error:
1612         return -EMSGSIZE;
1613 }
1614
1615 static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1616 {
1617         return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1618 }
1619
1620 /* Called with rcu_read_lock or ovs_mutex. */
1621 static struct datapath *lookup_datapath(struct net *net,
1622                                         const struct ovs_header *ovs_header,
1623                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1624 {
1625         struct datapath *dp;
1626
1627         if (!a[OVS_DP_ATTR_NAME])
1628                 dp = get_dp(net, ovs_header->dp_ifindex);
1629         else {
1630                 struct vport *vport;
1631
1632                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1633                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1634         }
1635         return dp ? dp : ERR_PTR(-ENODEV);
1636 }
1637
1638 static void ovs_dp_reset_user_features(struct sk_buff *skb,
1639                                        struct genl_info *info)
1640 {
1641         struct datapath *dp;
1642
1643         dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1644                              info->attrs);
1645         if (IS_ERR(dp))
1646                 return;
1647
1648         pr_warn("%s: Dropping previously announced user features\n",
1649                 ovs_dp_name(dp));
1650         dp->user_features = 0;
1651 }
1652
1653 static int ovs_dp_set_upcall_portids(struct datapath *dp,
1654                               const struct nlattr *ids)
1655 {
1656         struct dp_nlsk_pids *old, *dp_nlsk_pids;
1657
1658         if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1659                 return -EINVAL;
1660
1661         old = ovsl_dereference(dp->upcall_portids);
1662
1663         dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1664                                GFP_KERNEL);
1665         if (!dp_nlsk_pids)
1666                 return -ENOMEM;
1667
1668         dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1669         nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1670
1671         rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1672
1673         kfree_rcu(old, rcu);
1674
1675         return 0;
1676 }
1677
1678 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1679 {
1680         struct dp_nlsk_pids *dp_nlsk_pids;
1681
1682         dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1683
1684         if (dp_nlsk_pids) {
1685                 if (cpu_id < dp_nlsk_pids->n_pids) {
1686                         return dp_nlsk_pids->pids[cpu_id];
1687                 } else if (dp_nlsk_pids->n_pids > 0 &&
1688                            cpu_id >= dp_nlsk_pids->n_pids) {
1689                         /* If the number of netlink PIDs is mismatched with
1690                          * the number of CPUs as seen by the kernel, log this
1691                          * and send the upcall to an arbitrary socket (0) in
1692                          * order to not drop packets
1693                          */
1694                         pr_info_ratelimited("cpu_id mismatch with handler threads");
1695                         return dp_nlsk_pids->pids[cpu_id %
1696                                                   dp_nlsk_pids->n_pids];
1697                 } else {
1698                         return 0;
1699                 }
1700         } else {
1701                 return 0;
1702         }
1703 }
1704
1705 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1706 {
1707         u32 user_features = 0, old_features = dp->user_features;
1708         int err;
1709
1710         if (a[OVS_DP_ATTR_USER_FEATURES]) {
1711                 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1712
1713                 if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1714                                       OVS_DP_F_UNALIGNED |
1715                                       OVS_DP_F_TC_RECIRC_SHARING |
1716                                       OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1717                         return -EOPNOTSUPP;
1718
1719 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1720                 if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1721                         return -EOPNOTSUPP;
1722 #endif
1723         }
1724
1725         if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1726                 int err;
1727                 u32 cache_size;
1728
1729                 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1730                 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1731                 if (err)
1732                         return err;
1733         }
1734
1735         dp->user_features = user_features;
1736
1737         if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1738             a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1739                 /* Upcall Netlink Port IDs have been updated */
1740                 err = ovs_dp_set_upcall_portids(dp,
1741                                                 a[OVS_DP_ATTR_PER_CPU_PIDS]);
1742                 if (err)
1743                         return err;
1744         }
1745
1746         if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1747             !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1748                 tc_skb_ext_tc_enable();
1749         else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1750                  (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1751                 tc_skb_ext_tc_disable();
1752
1753         return 0;
1754 }
1755
1756 static int ovs_dp_stats_init(struct datapath *dp)
1757 {
1758         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1759         if (!dp->stats_percpu)
1760                 return -ENOMEM;
1761
1762         return 0;
1763 }
1764
1765 static int ovs_dp_vport_init(struct datapath *dp)
1766 {
1767         int i;
1768
1769         dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1770                                   sizeof(struct hlist_head),
1771                                   GFP_KERNEL);
1772         if (!dp->ports)
1773                 return -ENOMEM;
1774
1775         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1776                 INIT_HLIST_HEAD(&dp->ports[i]);
1777
1778         return 0;
1779 }
1780
1781 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1782 {
1783         struct nlattr **a = info->attrs;
1784         struct vport_parms parms;
1785         struct sk_buff *reply;
1786         struct datapath *dp;
1787         struct vport *vport;
1788         struct ovs_net *ovs_net;
1789         int err;
1790
1791         err = -EINVAL;
1792         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1793                 goto err;
1794
1795         reply = ovs_dp_cmd_alloc_info();
1796         if (!reply)
1797                 return -ENOMEM;
1798
1799         err = -ENOMEM;
1800         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1801         if (dp == NULL)
1802                 goto err_destroy_reply;
1803
1804         ovs_dp_set_net(dp, sock_net(skb->sk));
1805
1806         /* Allocate table. */
1807         err = ovs_flow_tbl_init(&dp->table);
1808         if (err)
1809                 goto err_destroy_dp;
1810
1811         err = ovs_dp_stats_init(dp);
1812         if (err)
1813                 goto err_destroy_table;
1814
1815         err = ovs_dp_vport_init(dp);
1816         if (err)
1817                 goto err_destroy_stats;
1818
1819         err = ovs_meters_init(dp);
1820         if (err)
1821                 goto err_destroy_ports;
1822
1823         /* Set up our datapath device. */
1824         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1825         parms.type = OVS_VPORT_TYPE_INTERNAL;
1826         parms.options = NULL;
1827         parms.dp = dp;
1828         parms.port_no = OVSP_LOCAL;
1829         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1830         parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
1831                 ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
1832
1833         /* So far only local changes have been made, now need the lock. */
1834         ovs_lock();
1835
1836         err = ovs_dp_change(dp, a);
1837         if (err)
1838                 goto err_unlock_and_destroy_meters;
1839
1840         vport = new_vport(&parms);
1841         if (IS_ERR(vport)) {
1842                 err = PTR_ERR(vport);
1843                 if (err == -EBUSY)
1844                         err = -EEXIST;
1845
1846                 if (err == -EEXIST) {
1847                         /* An outdated user space instance that does not understand
1848                          * the concept of user_features has attempted to create a new
1849                          * datapath and is likely to reuse it. Drop all user features.
1850                          */
1851                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1852                                 ovs_dp_reset_user_features(skb, info);
1853                 }
1854
1855                 goto err_destroy_portids;
1856         }
1857
1858         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1859                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1860         BUG_ON(err < 0);
1861
1862         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1863         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1864
1865         ovs_unlock();
1866
1867         ovs_notify(&dp_datapath_genl_family, reply, info);
1868         return 0;
1869
1870 err_destroy_portids:
1871         kfree(rcu_dereference_raw(dp->upcall_portids));
1872 err_unlock_and_destroy_meters:
1873         ovs_unlock();
1874         ovs_meters_exit(dp);
1875 err_destroy_ports:
1876         kfree(dp->ports);
1877 err_destroy_stats:
1878         free_percpu(dp->stats_percpu);
1879 err_destroy_table:
1880         ovs_flow_tbl_destroy(&dp->table);
1881 err_destroy_dp:
1882         kfree(dp);
1883 err_destroy_reply:
1884         kfree_skb(reply);
1885 err:
1886         return err;
1887 }
1888
1889 /* Called with ovs_mutex. */
1890 static void __dp_destroy(struct datapath *dp)
1891 {
1892         struct flow_table *table = &dp->table;
1893         int i;
1894
1895         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1896                 tc_skb_ext_tc_disable();
1897
1898         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1899                 struct vport *vport;
1900                 struct hlist_node *n;
1901
1902                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1903                         if (vport->port_no != OVSP_LOCAL)
1904                                 ovs_dp_detach_port(vport);
1905         }
1906
1907         list_del_rcu(&dp->list_node);
1908
1909         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1910          * all ports in datapath are destroyed first before freeing datapath.
1911          */
1912         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1913
1914         /* Flush sw_flow in the tables. RCU cb only releases resource
1915          * such as dp, ports and tables. That may avoid some issues
1916          * such as RCU usage warning.
1917          */
1918         table_instance_flow_flush(table, ovsl_dereference(table->ti),
1919                                   ovsl_dereference(table->ufid_ti));
1920
1921         /* RCU destroy the ports, meters and flow tables. */
1922         call_rcu(&dp->rcu, destroy_dp_rcu);
1923 }
1924
1925 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1926 {
1927         struct sk_buff *reply;
1928         struct datapath *dp;
1929         int err;
1930
1931         reply = ovs_dp_cmd_alloc_info();
1932         if (!reply)
1933                 return -ENOMEM;
1934
1935         ovs_lock();
1936         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1937         err = PTR_ERR(dp);
1938         if (IS_ERR(dp))
1939                 goto err_unlock_free;
1940
1941         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1942                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1943         BUG_ON(err < 0);
1944
1945         __dp_destroy(dp);
1946         ovs_unlock();
1947
1948         ovs_notify(&dp_datapath_genl_family, reply, info);
1949
1950         return 0;
1951
1952 err_unlock_free:
1953         ovs_unlock();
1954         kfree_skb(reply);
1955         return err;
1956 }
1957
1958 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1959 {
1960         struct sk_buff *reply;
1961         struct datapath *dp;
1962         int err;
1963
1964         reply = ovs_dp_cmd_alloc_info();
1965         if (!reply)
1966                 return -ENOMEM;
1967
1968         ovs_lock();
1969         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1970         err = PTR_ERR(dp);
1971         if (IS_ERR(dp))
1972                 goto err_unlock_free;
1973
1974         err = ovs_dp_change(dp, info->attrs);
1975         if (err)
1976                 goto err_unlock_free;
1977
1978         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1979                                    info->snd_seq, 0, OVS_DP_CMD_SET);
1980         BUG_ON(err < 0);
1981
1982         ovs_unlock();
1983         ovs_notify(&dp_datapath_genl_family, reply, info);
1984
1985         return 0;
1986
1987 err_unlock_free:
1988         ovs_unlock();
1989         kfree_skb(reply);
1990         return err;
1991 }
1992
1993 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1994 {
1995         struct sk_buff *reply;
1996         struct datapath *dp;
1997         int err;
1998
1999         reply = ovs_dp_cmd_alloc_info();
2000         if (!reply)
2001                 return -ENOMEM;
2002
2003         ovs_lock();
2004         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
2005         if (IS_ERR(dp)) {
2006                 err = PTR_ERR(dp);
2007                 goto err_unlock_free;
2008         }
2009         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2010                                    info->snd_seq, 0, OVS_DP_CMD_GET);
2011         BUG_ON(err < 0);
2012         ovs_unlock();
2013
2014         return genlmsg_reply(reply, info);
2015
2016 err_unlock_free:
2017         ovs_unlock();
2018         kfree_skb(reply);
2019         return err;
2020 }
2021
2022 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2023 {
2024         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
2025         struct datapath *dp;
2026         int skip = cb->args[0];
2027         int i = 0;
2028
2029         ovs_lock();
2030         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2031                 if (i >= skip &&
2032                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
2033                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2034                                          OVS_DP_CMD_GET) < 0)
2035                         break;
2036                 i++;
2037         }
2038         ovs_unlock();
2039
2040         cb->args[0] = i;
2041
2042         return skb->len;
2043 }
2044
2045 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
2046         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2047         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2048         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
2049         [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
2050                 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
2051         [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
2052 };
2053
2054 static const struct genl_small_ops dp_datapath_genl_ops[] = {
2055         { .cmd = OVS_DP_CMD_NEW,
2056           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2057           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2058           .doit = ovs_dp_cmd_new
2059         },
2060         { .cmd = OVS_DP_CMD_DEL,
2061           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2062           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2063           .doit = ovs_dp_cmd_del
2064         },
2065         { .cmd = OVS_DP_CMD_GET,
2066           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2067           .flags = 0,               /* OK for unprivileged users. */
2068           .doit = ovs_dp_cmd_get,
2069           .dumpit = ovs_dp_cmd_dump
2070         },
2071         { .cmd = OVS_DP_CMD_SET,
2072           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2073           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2074           .doit = ovs_dp_cmd_set,
2075         },
2076 };
2077
2078 static struct genl_family dp_datapath_genl_family __ro_after_init = {
2079         .hdrsize = sizeof(struct ovs_header),
2080         .name = OVS_DATAPATH_FAMILY,
2081         .version = OVS_DATAPATH_VERSION,
2082         .maxattr = OVS_DP_ATTR_MAX,
2083         .policy = datapath_policy,
2084         .netnsok = true,
2085         .parallel_ops = true,
2086         .small_ops = dp_datapath_genl_ops,
2087         .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2088         .resv_start_op = OVS_DP_CMD_SET + 1,
2089         .mcgrps = &ovs_dp_datapath_multicast_group,
2090         .n_mcgrps = 1,
2091         .module = THIS_MODULE,
2092 };
2093
2094 /* Called with ovs_mutex or RCU read lock. */
2095 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2096                                    struct net *net, u32 portid, u32 seq,
2097                                    u32 flags, u8 cmd, gfp_t gfp)
2098 {
2099         struct ovs_header *ovs_header;
2100         struct ovs_vport_stats vport_stats;
2101         int err;
2102
2103         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2104                                  flags, cmd);
2105         if (!ovs_header)
2106                 return -EMSGSIZE;
2107
2108         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2109
2110         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2111             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2112             nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2113                            ovs_vport_name(vport)) ||
2114             nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2115                 goto nla_put_failure;
2116
2117         if (!net_eq(net, dev_net(vport->dev))) {
2118                 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
2119
2120                 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2121                         goto nla_put_failure;
2122         }
2123
2124         ovs_vport_get_stats(vport, &vport_stats);
2125         if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2126                           sizeof(struct ovs_vport_stats), &vport_stats,
2127                           OVS_VPORT_ATTR_PAD))
2128                 goto nla_put_failure;
2129
2130         if (ovs_vport_get_upcall_stats(vport, skb))
2131                 goto nla_put_failure;
2132
2133         if (ovs_vport_get_upcall_portids(vport, skb))
2134                 goto nla_put_failure;
2135
2136         err = ovs_vport_get_options(vport, skb);
2137         if (err == -EMSGSIZE)
2138                 goto error;
2139
2140         genlmsg_end(skb, ovs_header);
2141         return 0;
2142
2143 nla_put_failure:
2144         err = -EMSGSIZE;
2145 error:
2146         genlmsg_cancel(skb, ovs_header);
2147         return err;
2148 }
2149
2150 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2151 {
2152         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2153 }
2154
2155 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2156 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2157                                          u32 portid, u32 seq, u8 cmd)
2158 {
2159         struct sk_buff *skb;
2160         int retval;
2161
2162         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2163         if (!skb)
2164                 return ERR_PTR(-ENOMEM);
2165
2166         retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2167                                          GFP_KERNEL);
2168         BUG_ON(retval < 0);
2169
2170         return skb;
2171 }
2172
2173 /* Called with ovs_mutex or RCU read lock. */
2174 static struct vport *lookup_vport(struct net *net,
2175                                   const struct ovs_header *ovs_header,
2176                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2177 {
2178         struct datapath *dp;
2179         struct vport *vport;
2180
2181         if (a[OVS_VPORT_ATTR_IFINDEX])
2182                 return ERR_PTR(-EOPNOTSUPP);
2183         if (a[OVS_VPORT_ATTR_NAME]) {
2184                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2185                 if (!vport)
2186                         return ERR_PTR(-ENODEV);
2187                 if (ovs_header->dp_ifindex &&
2188                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2189                         return ERR_PTR(-ENODEV);
2190                 return vport;
2191         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2192                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2193
2194                 if (port_no >= DP_MAX_PORTS)
2195                         return ERR_PTR(-EFBIG);
2196
2197                 dp = get_dp(net, ovs_header->dp_ifindex);
2198                 if (!dp)
2199                         return ERR_PTR(-ENODEV);
2200
2201                 vport = ovs_vport_ovsl_rcu(dp, port_no);
2202                 if (!vport)
2203                         return ERR_PTR(-ENODEV);
2204                 return vport;
2205         } else
2206                 return ERR_PTR(-EINVAL);
2207
2208 }
2209
2210 static unsigned int ovs_get_max_headroom(struct datapath *dp)
2211 {
2212         unsigned int dev_headroom, max_headroom = 0;
2213         struct net_device *dev;
2214         struct vport *vport;
2215         int i;
2216
2217         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2218                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2219                                          lockdep_ovsl_is_held()) {
2220                         dev = vport->dev;
2221                         dev_headroom = netdev_get_fwd_headroom(dev);
2222                         if (dev_headroom > max_headroom)
2223                                 max_headroom = dev_headroom;
2224                 }
2225         }
2226
2227         return max_headroom;
2228 }
2229
2230 /* Called with ovs_mutex */
2231 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2232 {
2233         struct vport *vport;
2234         int i;
2235
2236         dp->max_headroom = new_headroom;
2237         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2238                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2239                                          lockdep_ovsl_is_held())
2240                         netdev_set_rx_headroom(vport->dev, new_headroom);
2241         }
2242 }
2243
2244 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2245 {
2246         struct nlattr **a = info->attrs;
2247         struct ovs_header *ovs_header = info->userhdr;
2248         struct vport_parms parms;
2249         struct sk_buff *reply;
2250         struct vport *vport;
2251         struct datapath *dp;
2252         unsigned int new_headroom;
2253         u32 port_no;
2254         int err;
2255
2256         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2257             !a[OVS_VPORT_ATTR_UPCALL_PID])
2258                 return -EINVAL;
2259
2260         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2261
2262         if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
2263                 return -EOPNOTSUPP;
2264
2265         port_no = a[OVS_VPORT_ATTR_PORT_NO]
2266                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2267         if (port_no >= DP_MAX_PORTS)
2268                 return -EFBIG;
2269
2270         reply = ovs_vport_cmd_alloc_info();
2271         if (!reply)
2272                 return -ENOMEM;
2273
2274         ovs_lock();
2275 restart:
2276         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2277         err = -ENODEV;
2278         if (!dp)
2279                 goto exit_unlock_free;
2280
2281         if (port_no) {
2282                 vport = ovs_vport_ovsl(dp, port_no);
2283                 err = -EBUSY;
2284                 if (vport)
2285                         goto exit_unlock_free;
2286         } else {
2287                 for (port_no = 1; ; port_no++) {
2288                         if (port_no >= DP_MAX_PORTS) {
2289                                 err = -EFBIG;
2290                                 goto exit_unlock_free;
2291                         }
2292                         vport = ovs_vport_ovsl(dp, port_no);
2293                         if (!vport)
2294                                 break;
2295                 }
2296         }
2297
2298         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2299         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2300         parms.dp = dp;
2301         parms.port_no = port_no;
2302         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2303         parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
2304                 ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
2305
2306         vport = new_vport(&parms);
2307         err = PTR_ERR(vport);
2308         if (IS_ERR(vport)) {
2309                 if (err == -EAGAIN)
2310                         goto restart;
2311                 goto exit_unlock_free;
2312         }
2313
2314         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2315                                       info->snd_portid, info->snd_seq, 0,
2316                                       OVS_VPORT_CMD_NEW, GFP_KERNEL);
2317
2318         new_headroom = netdev_get_fwd_headroom(vport->dev);
2319
2320         if (new_headroom > dp->max_headroom)
2321                 ovs_update_headroom(dp, new_headroom);
2322         else
2323                 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2324
2325         BUG_ON(err < 0);
2326         ovs_unlock();
2327
2328         ovs_notify(&dp_vport_genl_family, reply, info);
2329         return 0;
2330
2331 exit_unlock_free:
2332         ovs_unlock();
2333         kfree_skb(reply);
2334         return err;
2335 }
2336
2337 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2338 {
2339         struct nlattr **a = info->attrs;
2340         struct sk_buff *reply;
2341         struct vport *vport;
2342         int err;
2343
2344         reply = ovs_vport_cmd_alloc_info();
2345         if (!reply)
2346                 return -ENOMEM;
2347
2348         ovs_lock();
2349         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2350         err = PTR_ERR(vport);
2351         if (IS_ERR(vport))
2352                 goto exit_unlock_free;
2353
2354         if (a[OVS_VPORT_ATTR_TYPE] &&
2355             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2356                 err = -EINVAL;
2357                 goto exit_unlock_free;
2358         }
2359
2360         if (a[OVS_VPORT_ATTR_OPTIONS]) {
2361                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2362                 if (err)
2363                         goto exit_unlock_free;
2364         }
2365
2366
2367         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2368                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2369
2370                 err = ovs_vport_set_upcall_portids(vport, ids);
2371                 if (err)
2372                         goto exit_unlock_free;
2373         }
2374
2375         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2376                                       info->snd_portid, info->snd_seq, 0,
2377                                       OVS_VPORT_CMD_SET, GFP_KERNEL);
2378         BUG_ON(err < 0);
2379
2380         ovs_unlock();
2381         ovs_notify(&dp_vport_genl_family, reply, info);
2382         return 0;
2383
2384 exit_unlock_free:
2385         ovs_unlock();
2386         kfree_skb(reply);
2387         return err;
2388 }
2389
2390 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2391 {
2392         bool update_headroom = false;
2393         struct nlattr **a = info->attrs;
2394         struct sk_buff *reply;
2395         struct datapath *dp;
2396         struct vport *vport;
2397         unsigned int new_headroom;
2398         int err;
2399
2400         reply = ovs_vport_cmd_alloc_info();
2401         if (!reply)
2402                 return -ENOMEM;
2403
2404         ovs_lock();
2405         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2406         err = PTR_ERR(vport);
2407         if (IS_ERR(vport))
2408                 goto exit_unlock_free;
2409
2410         if (vport->port_no == OVSP_LOCAL) {
2411                 err = -EINVAL;
2412                 goto exit_unlock_free;
2413         }
2414
2415         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2416                                       info->snd_portid, info->snd_seq, 0,
2417                                       OVS_VPORT_CMD_DEL, GFP_KERNEL);
2418         BUG_ON(err < 0);
2419
2420         /* the vport deletion may trigger dp headroom update */
2421         dp = vport->dp;
2422         if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2423                 update_headroom = true;
2424
2425         netdev_reset_rx_headroom(vport->dev);
2426         ovs_dp_detach_port(vport);
2427
2428         if (update_headroom) {
2429                 new_headroom = ovs_get_max_headroom(dp);
2430
2431                 if (new_headroom < dp->max_headroom)
2432                         ovs_update_headroom(dp, new_headroom);
2433         }
2434         ovs_unlock();
2435
2436         ovs_notify(&dp_vport_genl_family, reply, info);
2437         return 0;
2438
2439 exit_unlock_free:
2440         ovs_unlock();
2441         kfree_skb(reply);
2442         return err;
2443 }
2444
2445 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2446 {
2447         struct nlattr **a = info->attrs;
2448         struct ovs_header *ovs_header = info->userhdr;
2449         struct sk_buff *reply;
2450         struct vport *vport;
2451         int err;
2452
2453         reply = ovs_vport_cmd_alloc_info();
2454         if (!reply)
2455                 return -ENOMEM;
2456
2457         rcu_read_lock();
2458         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2459         err = PTR_ERR(vport);
2460         if (IS_ERR(vport))
2461                 goto exit_unlock_free;
2462         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2463                                       info->snd_portid, info->snd_seq, 0,
2464                                       OVS_VPORT_CMD_GET, GFP_ATOMIC);
2465         BUG_ON(err < 0);
2466         rcu_read_unlock();
2467
2468         return genlmsg_reply(reply, info);
2469
2470 exit_unlock_free:
2471         rcu_read_unlock();
2472         kfree_skb(reply);
2473         return err;
2474 }
2475
2476 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2477 {
2478         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2479         struct datapath *dp;
2480         int bucket = cb->args[0], skip = cb->args[1];
2481         int i, j = 0;
2482
2483         rcu_read_lock();
2484         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2485         if (!dp) {
2486                 rcu_read_unlock();
2487                 return -ENODEV;
2488         }
2489         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2490                 struct vport *vport;
2491
2492                 j = 0;
2493                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2494                         if (j >= skip &&
2495                             ovs_vport_cmd_fill_info(vport, skb,
2496                                                     sock_net(skb->sk),
2497                                                     NETLINK_CB(cb->skb).portid,
2498                                                     cb->nlh->nlmsg_seq,
2499                                                     NLM_F_MULTI,
2500                                                     OVS_VPORT_CMD_GET,
2501                                                     GFP_ATOMIC) < 0)
2502                                 goto out;
2503
2504                         j++;
2505                 }
2506                 skip = 0;
2507         }
2508 out:
2509         rcu_read_unlock();
2510
2511         cb->args[0] = i;
2512         cb->args[1] = j;
2513
2514         return skb->len;
2515 }
2516
2517 static void ovs_dp_masks_rebalance(struct work_struct *work)
2518 {
2519         struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2520                                                masks_rebalance.work);
2521         struct datapath *dp;
2522
2523         ovs_lock();
2524
2525         list_for_each_entry(dp, &ovs_net->dps, list_node)
2526                 ovs_flow_masks_rebalance(&dp->table);
2527
2528         ovs_unlock();
2529
2530         schedule_delayed_work(&ovs_net->masks_rebalance,
2531                               msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2532 }
2533
2534 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2535         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2536         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2537         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2538         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2539         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2540         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2541         [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2542         [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2543         [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
2544 };
2545
2546 static const struct genl_small_ops dp_vport_genl_ops[] = {
2547         { .cmd = OVS_VPORT_CMD_NEW,
2548           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2549           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2550           .doit = ovs_vport_cmd_new
2551         },
2552         { .cmd = OVS_VPORT_CMD_DEL,
2553           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2554           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2555           .doit = ovs_vport_cmd_del
2556         },
2557         { .cmd = OVS_VPORT_CMD_GET,
2558           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2559           .flags = 0,               /* OK for unprivileged users. */
2560           .doit = ovs_vport_cmd_get,
2561           .dumpit = ovs_vport_cmd_dump
2562         },
2563         { .cmd = OVS_VPORT_CMD_SET,
2564           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2565           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2566           .doit = ovs_vport_cmd_set,
2567         },
2568 };
2569
2570 struct genl_family dp_vport_genl_family __ro_after_init = {
2571         .hdrsize = sizeof(struct ovs_header),
2572         .name = OVS_VPORT_FAMILY,
2573         .version = OVS_VPORT_VERSION,
2574         .maxattr = OVS_VPORT_ATTR_MAX,
2575         .policy = vport_policy,
2576         .netnsok = true,
2577         .parallel_ops = true,
2578         .small_ops = dp_vport_genl_ops,
2579         .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2580         .resv_start_op = OVS_VPORT_CMD_SET + 1,
2581         .mcgrps = &ovs_dp_vport_multicast_group,
2582         .n_mcgrps = 1,
2583         .module = THIS_MODULE,
2584 };
2585
2586 static struct genl_family * const dp_genl_families[] = {
2587         &dp_datapath_genl_family,
2588         &dp_vport_genl_family,
2589         &dp_flow_genl_family,
2590         &dp_packet_genl_family,
2591         &dp_meter_genl_family,
2592 #if     IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2593         &dp_ct_limit_genl_family,
2594 #endif
2595 };
2596
2597 static void dp_unregister_genl(int n_families)
2598 {
2599         int i;
2600
2601         for (i = 0; i < n_families; i++)
2602                 genl_unregister_family(dp_genl_families[i]);
2603 }
2604
2605 static int __init dp_register_genl(void)
2606 {
2607         int err;
2608         int i;
2609
2610         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2611
2612                 err = genl_register_family(dp_genl_families[i]);
2613                 if (err)
2614                         goto error;
2615         }
2616
2617         return 0;
2618
2619 error:
2620         dp_unregister_genl(i);
2621         return err;
2622 }
2623
2624 static int __net_init ovs_init_net(struct net *net)
2625 {
2626         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2627         int err;
2628
2629         INIT_LIST_HEAD(&ovs_net->dps);
2630         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2631         INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2632
2633         err = ovs_ct_init(net);
2634         if (err)
2635                 return err;
2636
2637         schedule_delayed_work(&ovs_net->masks_rebalance,
2638                               msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2639         return 0;
2640 }
2641
2642 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2643                                             struct list_head *head)
2644 {
2645         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2646         struct datapath *dp;
2647
2648         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2649                 int i;
2650
2651                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2652                         struct vport *vport;
2653
2654                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2655                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2656                                         continue;
2657
2658                                 if (dev_net(vport->dev) == dnet)
2659                                         list_add(&vport->detach_list, head);
2660                         }
2661                 }
2662         }
2663 }
2664
2665 static void __net_exit ovs_exit_net(struct net *dnet)
2666 {
2667         struct datapath *dp, *dp_next;
2668         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2669         struct vport *vport, *vport_next;
2670         struct net *net;
2671         LIST_HEAD(head);
2672
2673         ovs_lock();
2674
2675         ovs_ct_exit(dnet);
2676
2677         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2678                 __dp_destroy(dp);
2679
2680         down_read(&net_rwsem);
2681         for_each_net(net)
2682                 list_vports_from_net(net, dnet, &head);
2683         up_read(&net_rwsem);
2684
2685         /* Detach all vports from given namespace. */
2686         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2687                 list_del(&vport->detach_list);
2688                 ovs_dp_detach_port(vport);
2689         }
2690
2691         ovs_unlock();
2692
2693         cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2694         cancel_work_sync(&ovs_net->dp_notify_work);
2695 }
2696
2697 static struct pernet_operations ovs_net_ops = {
2698         .init = ovs_init_net,
2699         .exit = ovs_exit_net,
2700         .id   = &ovs_net_id,
2701         .size = sizeof(struct ovs_net),
2702 };
2703
2704 static int __init dp_init(void)
2705 {
2706         int err;
2707
2708         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2709                      sizeof_field(struct sk_buff, cb));
2710
2711         pr_info("Open vSwitch switching datapath\n");
2712
2713         err = action_fifos_init();
2714         if (err)
2715                 goto error;
2716
2717         err = ovs_internal_dev_rtnl_link_register();
2718         if (err)
2719                 goto error_action_fifos_exit;
2720
2721         err = ovs_flow_init();
2722         if (err)
2723                 goto error_unreg_rtnl_link;
2724
2725         err = ovs_vport_init();
2726         if (err)
2727                 goto error_flow_exit;
2728
2729         err = register_pernet_device(&ovs_net_ops);
2730         if (err)
2731                 goto error_vport_exit;
2732
2733         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2734         if (err)
2735                 goto error_netns_exit;
2736
2737         err = ovs_netdev_init();
2738         if (err)
2739                 goto error_unreg_notifier;
2740
2741         err = dp_register_genl();
2742         if (err < 0)
2743                 goto error_unreg_netdev;
2744
2745         return 0;
2746
2747 error_unreg_netdev:
2748         ovs_netdev_exit();
2749 error_unreg_notifier:
2750         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2751 error_netns_exit:
2752         unregister_pernet_device(&ovs_net_ops);
2753 error_vport_exit:
2754         ovs_vport_exit();
2755 error_flow_exit:
2756         ovs_flow_exit();
2757 error_unreg_rtnl_link:
2758         ovs_internal_dev_rtnl_link_unregister();
2759 error_action_fifos_exit:
2760         action_fifos_exit();
2761 error:
2762         return err;
2763 }
2764
2765 static void dp_cleanup(void)
2766 {
2767         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2768         ovs_netdev_exit();
2769         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2770         unregister_pernet_device(&ovs_net_ops);
2771         rcu_barrier();
2772         ovs_vport_exit();
2773         ovs_flow_exit();
2774         ovs_internal_dev_rtnl_link_unregister();
2775         action_fifos_exit();
2776 }
2777
2778 module_init(dp_init);
2779 module_exit(dp_cleanup);
2780
2781 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2782 MODULE_LICENSE("GPL");
2783 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2784 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2785 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2786 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2787 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2788 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);