Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[platform/kernel/linux-rpi.git] / net / openvswitch / flow_netlink.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2007-2017 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include "flow.h"
9 #include "datapath.h"
10 #include <linux/uaccess.h>
11 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/if_ether.h>
14 #include <linux/if_vlan.h>
15 #include <net/llc_pdu.h>
16 #include <linux/kernel.h>
17 #include <linux/jhash.h>
18 #include <linux/jiffies.h>
19 #include <linux/llc.h>
20 #include <linux/module.h>
21 #include <linux/in.h>
22 #include <linux/rcupdate.h>
23 #include <linux/if_arp.h>
24 #include <linux/ip.h>
25 #include <linux/ipv6.h>
26 #include <linux/sctp.h>
27 #include <linux/tcp.h>
28 #include <linux/udp.h>
29 #include <linux/icmp.h>
30 #include <linux/icmpv6.h>
31 #include <linux/rculist.h>
32 #include <net/geneve.h>
33 #include <net/ip.h>
34 #include <net/ipv6.h>
35 #include <net/ndisc.h>
36 #include <net/mpls.h>
37 #include <net/vxlan.h>
38 #include <net/tun_proto.h>
39 #include <net/erspan.h>
40
41 #include "drop.h"
42 #include "flow_netlink.h"
43
44 struct ovs_len_tbl {
45         int len;
46         const struct ovs_len_tbl *next;
47 };
48
49 #define OVS_ATTR_NESTED -1
50 #define OVS_ATTR_VARIABLE -2
51
52 static bool actions_may_change_flow(const struct nlattr *actions)
53 {
54         struct nlattr *nla;
55         int rem;
56
57         nla_for_each_nested(nla, actions, rem) {
58                 u16 action = nla_type(nla);
59
60                 switch (action) {
61                 case OVS_ACTION_ATTR_OUTPUT:
62                 case OVS_ACTION_ATTR_RECIRC:
63                 case OVS_ACTION_ATTR_TRUNC:
64                 case OVS_ACTION_ATTR_USERSPACE:
65                 case OVS_ACTION_ATTR_DROP:
66                         break;
67
68                 case OVS_ACTION_ATTR_CT:
69                 case OVS_ACTION_ATTR_CT_CLEAR:
70                 case OVS_ACTION_ATTR_HASH:
71                 case OVS_ACTION_ATTR_POP_ETH:
72                 case OVS_ACTION_ATTR_POP_MPLS:
73                 case OVS_ACTION_ATTR_POP_NSH:
74                 case OVS_ACTION_ATTR_POP_VLAN:
75                 case OVS_ACTION_ATTR_PUSH_ETH:
76                 case OVS_ACTION_ATTR_PUSH_MPLS:
77                 case OVS_ACTION_ATTR_PUSH_NSH:
78                 case OVS_ACTION_ATTR_PUSH_VLAN:
79                 case OVS_ACTION_ATTR_SAMPLE:
80                 case OVS_ACTION_ATTR_SET:
81                 case OVS_ACTION_ATTR_SET_MASKED:
82                 case OVS_ACTION_ATTR_METER:
83                 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
84                 case OVS_ACTION_ATTR_ADD_MPLS:
85                 case OVS_ACTION_ATTR_DEC_TTL:
86                 default:
87                         return true;
88                 }
89         }
90         return false;
91 }
92
93 static void update_range(struct sw_flow_match *match,
94                          size_t offset, size_t size, bool is_mask)
95 {
96         struct sw_flow_key_range *range;
97         size_t start = rounddown(offset, sizeof(long));
98         size_t end = roundup(offset + size, sizeof(long));
99
100         if (!is_mask)
101                 range = &match->range;
102         else
103                 range = &match->mask->range;
104
105         if (range->start == range->end) {
106                 range->start = start;
107                 range->end = end;
108                 return;
109         }
110
111         if (range->start > start)
112                 range->start = start;
113
114         if (range->end < end)
115                 range->end = end;
116 }
117
118 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
119         do { \
120                 update_range(match, offsetof(struct sw_flow_key, field),    \
121                              sizeof((match)->key->field), is_mask);         \
122                 if (is_mask)                                                \
123                         (match)->mask->key.field = value;                   \
124                 else                                                        \
125                         (match)->key->field = value;                        \
126         } while (0)
127
128 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
129         do {                                                                \
130                 update_range(match, offset, len, is_mask);                  \
131                 if (is_mask)                                                \
132                         memcpy((u8 *)&(match)->mask->key + offset, value_p, \
133                                len);                                       \
134                 else                                                        \
135                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
136         } while (0)
137
138 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
139         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
140                                   value_p, len, is_mask)
141
142 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
143         do {                                                                \
144                 update_range(match, offsetof(struct sw_flow_key, field),    \
145                              sizeof((match)->key->field), is_mask);         \
146                 if (is_mask)                                                \
147                         memset((u8 *)&(match)->mask->key.field, value,      \
148                                sizeof((match)->mask->key.field));           \
149                 else                                                        \
150                         memset((u8 *)&(match)->key->field, value,           \
151                                sizeof((match)->key->field));                \
152         } while (0)
153
154 static bool match_validate(const struct sw_flow_match *match,
155                            u64 key_attrs, u64 mask_attrs, bool log)
156 {
157         u64 key_expected = 0;
158         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
159
160         /* The following mask attributes allowed only if they
161          * pass the validation tests. */
162         mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
163                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
164                         | (1 << OVS_KEY_ATTR_IPV6)
165                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
166                         | (1 << OVS_KEY_ATTR_TCP)
167                         | (1 << OVS_KEY_ATTR_TCP_FLAGS)
168                         | (1 << OVS_KEY_ATTR_UDP)
169                         | (1 << OVS_KEY_ATTR_SCTP)
170                         | (1 << OVS_KEY_ATTR_ICMP)
171                         | (1 << OVS_KEY_ATTR_ICMPV6)
172                         | (1 << OVS_KEY_ATTR_ARP)
173                         | (1 << OVS_KEY_ATTR_ND)
174                         | (1 << OVS_KEY_ATTR_MPLS)
175                         | (1 << OVS_KEY_ATTR_NSH));
176
177         /* Always allowed mask fields. */
178         mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
179                        | (1 << OVS_KEY_ATTR_IN_PORT)
180                        | (1 << OVS_KEY_ATTR_ETHERTYPE));
181
182         /* Check key attributes. */
183         if (match->key->eth.type == htons(ETH_P_ARP)
184                         || match->key->eth.type == htons(ETH_P_RARP)) {
185                 key_expected |= 1 << OVS_KEY_ATTR_ARP;
186                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
187                         mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
188         }
189
190         if (eth_p_mpls(match->key->eth.type)) {
191                 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
192                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
193                         mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
194         }
195
196         if (match->key->eth.type == htons(ETH_P_IP)) {
197                 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
198                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
199                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
200                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
201                 }
202
203                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
204                         if (match->key->ip.proto == IPPROTO_UDP) {
205                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
206                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
207                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
208                         }
209
210                         if (match->key->ip.proto == IPPROTO_SCTP) {
211                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
212                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
213                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
214                         }
215
216                         if (match->key->ip.proto == IPPROTO_TCP) {
217                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
218                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
219                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
220                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
221                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
222                                 }
223                         }
224
225                         if (match->key->ip.proto == IPPROTO_ICMP) {
226                                 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
227                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
228                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
229                         }
230                 }
231         }
232
233         if (match->key->eth.type == htons(ETH_P_IPV6)) {
234                 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
235                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
236                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
237                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
238                 }
239
240                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
241                         if (match->key->ip.proto == IPPROTO_UDP) {
242                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
243                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
244                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
245                         }
246
247                         if (match->key->ip.proto == IPPROTO_SCTP) {
248                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
249                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
250                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
251                         }
252
253                         if (match->key->ip.proto == IPPROTO_TCP) {
254                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
255                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
256                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
257                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
258                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
259                                 }
260                         }
261
262                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
263                                 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
264                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
265                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
266
267                                 if (match->key->tp.src ==
268                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
269                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
270                                         key_expected |= 1 << OVS_KEY_ATTR_ND;
271                                         /* Original direction conntrack tuple
272                                          * uses the same space as the ND fields
273                                          * in the key, so both are not allowed
274                                          * at the same time.
275                                          */
276                                         mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
277                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
278                                                 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
279                                 }
280                         }
281                 }
282         }
283
284         if (match->key->eth.type == htons(ETH_P_NSH)) {
285                 key_expected |= 1 << OVS_KEY_ATTR_NSH;
286                 if (match->mask &&
287                     match->mask->key.eth.type == htons(0xffff)) {
288                         mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
289                 }
290         }
291
292         if ((key_attrs & key_expected) != key_expected) {
293                 /* Key attributes check failed. */
294                 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
295                           (unsigned long long)key_attrs,
296                           (unsigned long long)key_expected);
297                 return false;
298         }
299
300         if ((mask_attrs & mask_allowed) != mask_attrs) {
301                 /* Mask attributes check failed. */
302                 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
303                           (unsigned long long)mask_attrs,
304                           (unsigned long long)mask_allowed);
305                 return false;
306         }
307
308         return true;
309 }
310
311 size_t ovs_tun_key_attr_size(void)
312 {
313         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
314          * updating this function.
315          */
316         return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
317                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
318                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
319                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
320                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
321                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
322                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
323                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
324                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
325                 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
326                  * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
327                  * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
328                  */
329                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
330                 + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
331 }
332
333 static size_t ovs_nsh_key_attr_size(void)
334 {
335         /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
336          * updating this function.
337          */
338         return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
339                 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
340                  * mutually exclusive, so the bigger one can cover
341                  * the small one.
342                  */
343                 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
344 }
345
346 size_t ovs_key_attr_size(void)
347 {
348         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
349          * updating this function.
350          */
351         BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 32);
352
353         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
354                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
355                   + ovs_tun_key_attr_size()
356                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
357                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
358                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
359                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
360                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
361                 + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
362                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
363                 + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
364                 + nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
365                 + nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
366                   + ovs_nsh_key_attr_size()
367                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
368                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
369                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
370                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
371                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
372                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
373                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
374                 + nla_total_size(28)  /* OVS_KEY_ATTR_ND */
375                 + nla_total_size(2);  /* OVS_KEY_ATTR_IPV6_EXTHDRS */
376 }
377
378 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
379         [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
380 };
381
382 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
383         [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
384         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
385         [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
386         [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
387         [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
388         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
389         [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
390         [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
391         [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
392         [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
393         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
394         [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
395                                                 .next = ovs_vxlan_ext_key_lens },
396         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
397         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
398         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
399         [OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE]   = { .len = 0 },
400 };
401
402 static const struct ovs_len_tbl
403 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
404         [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
405         [OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
406         [OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
407 };
408
409 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
410 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
411         [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
412         [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
413         [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
414         [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
415         [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
416         [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
417         [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
418         [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
419         [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
420         [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
421         [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
422         [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
423         [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
424         [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
425         [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
426         [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
427         [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
428         [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
429         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
430         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
431                                      .next = ovs_tunnel_key_lens, },
432         [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
433         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
434         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
435         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
436         [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
437         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
438                 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
439         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
440                 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
441         [OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
442                                      .next = ovs_nsh_key_attr_lens, },
443         [OVS_KEY_ATTR_IPV6_EXTHDRS] = {
444                 .len = sizeof(struct ovs_key_ipv6_exthdrs) },
445 };
446
447 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
448 {
449         return expected_len == attr_len ||
450                expected_len == OVS_ATTR_NESTED ||
451                expected_len == OVS_ATTR_VARIABLE;
452 }
453
454 static bool is_all_zero(const u8 *fp, size_t size)
455 {
456         int i;
457
458         if (!fp)
459                 return false;
460
461         for (i = 0; i < size; i++)
462                 if (fp[i])
463                         return false;
464
465         return true;
466 }
467
468 static int __parse_flow_nlattrs(const struct nlattr *attr,
469                                 const struct nlattr *a[],
470                                 u64 *attrsp, bool log, bool nz)
471 {
472         const struct nlattr *nla;
473         u64 attrs;
474         int rem;
475
476         attrs = *attrsp;
477         nla_for_each_nested(nla, attr, rem) {
478                 u16 type = nla_type(nla);
479                 int expected_len;
480
481                 if (type > OVS_KEY_ATTR_MAX) {
482                         OVS_NLERR(log, "Key type %d is out of range max %d",
483                                   type, OVS_KEY_ATTR_MAX);
484                         return -EINVAL;
485                 }
486
487                 if (type == OVS_KEY_ATTR_PACKET_TYPE ||
488                     type == OVS_KEY_ATTR_ND_EXTENSIONS ||
489                     type == OVS_KEY_ATTR_TUNNEL_INFO) {
490                         OVS_NLERR(log, "Key type %d is not supported", type);
491                         return -EINVAL;
492                 }
493
494                 if (attrs & (1ULL << type)) {
495                         OVS_NLERR(log, "Duplicate key (type %d).", type);
496                         return -EINVAL;
497                 }
498
499                 expected_len = ovs_key_lens[type].len;
500                 if (!check_attr_len(nla_len(nla), expected_len)) {
501                         OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
502                                   type, nla_len(nla), expected_len);
503                         return -EINVAL;
504                 }
505
506                 if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
507                         attrs |= 1ULL << type;
508                         a[type] = nla;
509                 }
510         }
511         if (rem) {
512                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
513                 return -EINVAL;
514         }
515
516         *attrsp = attrs;
517         return 0;
518 }
519
520 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
521                                    const struct nlattr *a[], u64 *attrsp,
522                                    bool log)
523 {
524         return __parse_flow_nlattrs(attr, a, attrsp, log, true);
525 }
526
527 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
528                        u64 *attrsp, bool log)
529 {
530         return __parse_flow_nlattrs(attr, a, attrsp, log, false);
531 }
532
533 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
534                                      struct sw_flow_match *match, bool is_mask,
535                                      bool log)
536 {
537         unsigned long opt_key_offset;
538
539         if (nla_len(a) > sizeof(match->key->tun_opts)) {
540                 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
541                           nla_len(a), sizeof(match->key->tun_opts));
542                 return -EINVAL;
543         }
544
545         if (nla_len(a) % 4 != 0) {
546                 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
547                           nla_len(a));
548                 return -EINVAL;
549         }
550
551         /* We need to record the length of the options passed
552          * down, otherwise packets with the same format but
553          * additional options will be silently matched.
554          */
555         if (!is_mask) {
556                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
557                                 false);
558         } else {
559                 /* This is somewhat unusual because it looks at
560                  * both the key and mask while parsing the
561                  * attributes (and by extension assumes the key
562                  * is parsed first). Normally, we would verify
563                  * that each is the correct length and that the
564                  * attributes line up in the validate function.
565                  * However, that is difficult because this is
566                  * variable length and we won't have the
567                  * information later.
568                  */
569                 if (match->key->tun_opts_len != nla_len(a)) {
570                         OVS_NLERR(log, "Geneve option len %d != mask len %d",
571                                   match->key->tun_opts_len, nla_len(a));
572                         return -EINVAL;
573                 }
574
575                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
576         }
577
578         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
579         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
580                                   nla_len(a), is_mask);
581         return 0;
582 }
583
584 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
585                                      struct sw_flow_match *match, bool is_mask,
586                                      bool log)
587 {
588         struct nlattr *a;
589         int rem;
590         unsigned long opt_key_offset;
591         struct vxlan_metadata opts;
592
593         BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
594
595         memset(&opts, 0, sizeof(opts));
596         nla_for_each_nested(a, attr, rem) {
597                 int type = nla_type(a);
598
599                 if (type > OVS_VXLAN_EXT_MAX) {
600                         OVS_NLERR(log, "VXLAN extension %d out of range max %d",
601                                   type, OVS_VXLAN_EXT_MAX);
602                         return -EINVAL;
603                 }
604
605                 if (!check_attr_len(nla_len(a),
606                                     ovs_vxlan_ext_key_lens[type].len)) {
607                         OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
608                                   type, nla_len(a),
609                                   ovs_vxlan_ext_key_lens[type].len);
610                         return -EINVAL;
611                 }
612
613                 switch (type) {
614                 case OVS_VXLAN_EXT_GBP:
615                         opts.gbp = nla_get_u32(a);
616                         break;
617                 default:
618                         OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
619                                   type);
620                         return -EINVAL;
621                 }
622         }
623         if (rem) {
624                 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
625                           rem);
626                 return -EINVAL;
627         }
628
629         if (!is_mask)
630                 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
631         else
632                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
633
634         opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
635         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
636                                   is_mask);
637         return 0;
638 }
639
640 static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
641                                       struct sw_flow_match *match, bool is_mask,
642                                       bool log)
643 {
644         unsigned long opt_key_offset;
645
646         BUILD_BUG_ON(sizeof(struct erspan_metadata) >
647                      sizeof(match->key->tun_opts));
648
649         if (nla_len(a) > sizeof(match->key->tun_opts)) {
650                 OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
651                           nla_len(a), sizeof(match->key->tun_opts));
652                 return -EINVAL;
653         }
654
655         if (!is_mask)
656                 SW_FLOW_KEY_PUT(match, tun_opts_len,
657                                 sizeof(struct erspan_metadata), false);
658         else
659                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
660
661         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
662         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
663                                   nla_len(a), is_mask);
664         return 0;
665 }
666
667 static int ip_tun_from_nlattr(const struct nlattr *attr,
668                               struct sw_flow_match *match, bool is_mask,
669                               bool log)
670 {
671         bool ttl = false, ipv4 = false, ipv6 = false;
672         bool info_bridge_mode = false;
673         __be16 tun_flags = 0;
674         int opts_type = 0;
675         struct nlattr *a;
676         int rem;
677
678         nla_for_each_nested(a, attr, rem) {
679                 int type = nla_type(a);
680                 int err;
681
682                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
683                         OVS_NLERR(log, "Tunnel attr %d out of range max %d",
684                                   type, OVS_TUNNEL_KEY_ATTR_MAX);
685                         return -EINVAL;
686                 }
687
688                 if (!check_attr_len(nla_len(a),
689                                     ovs_tunnel_key_lens[type].len)) {
690                         OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
691                                   type, nla_len(a), ovs_tunnel_key_lens[type].len);
692                         return -EINVAL;
693                 }
694
695                 switch (type) {
696                 case OVS_TUNNEL_KEY_ATTR_ID:
697                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
698                                         nla_get_be64(a), is_mask);
699                         tun_flags |= TUNNEL_KEY;
700                         break;
701                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
702                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
703                                         nla_get_in_addr(a), is_mask);
704                         ipv4 = true;
705                         break;
706                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
707                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
708                                         nla_get_in_addr(a), is_mask);
709                         ipv4 = true;
710                         break;
711                 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
712                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
713                                         nla_get_in6_addr(a), is_mask);
714                         ipv6 = true;
715                         break;
716                 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
717                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
718                                         nla_get_in6_addr(a), is_mask);
719                         ipv6 = true;
720                         break;
721                 case OVS_TUNNEL_KEY_ATTR_TOS:
722                         SW_FLOW_KEY_PUT(match, tun_key.tos,
723                                         nla_get_u8(a), is_mask);
724                         break;
725                 case OVS_TUNNEL_KEY_ATTR_TTL:
726                         SW_FLOW_KEY_PUT(match, tun_key.ttl,
727                                         nla_get_u8(a), is_mask);
728                         ttl = true;
729                         break;
730                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
731                         tun_flags |= TUNNEL_DONT_FRAGMENT;
732                         break;
733                 case OVS_TUNNEL_KEY_ATTR_CSUM:
734                         tun_flags |= TUNNEL_CSUM;
735                         break;
736                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
737                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
738                                         nla_get_be16(a), is_mask);
739                         break;
740                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
741                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
742                                         nla_get_be16(a), is_mask);
743                         break;
744                 case OVS_TUNNEL_KEY_ATTR_OAM:
745                         tun_flags |= TUNNEL_OAM;
746                         break;
747                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
748                         if (opts_type) {
749                                 OVS_NLERR(log, "Multiple metadata blocks provided");
750                                 return -EINVAL;
751                         }
752
753                         err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
754                         if (err)
755                                 return err;
756
757                         tun_flags |= TUNNEL_GENEVE_OPT;
758                         opts_type = type;
759                         break;
760                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
761                         if (opts_type) {
762                                 OVS_NLERR(log, "Multiple metadata blocks provided");
763                                 return -EINVAL;
764                         }
765
766                         err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
767                         if (err)
768                                 return err;
769
770                         tun_flags |= TUNNEL_VXLAN_OPT;
771                         opts_type = type;
772                         break;
773                 case OVS_TUNNEL_KEY_ATTR_PAD:
774                         break;
775                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
776                         if (opts_type) {
777                                 OVS_NLERR(log, "Multiple metadata blocks provided");
778                                 return -EINVAL;
779                         }
780
781                         err = erspan_tun_opt_from_nlattr(a, match, is_mask,
782                                                          log);
783                         if (err)
784                                 return err;
785
786                         tun_flags |= TUNNEL_ERSPAN_OPT;
787                         opts_type = type;
788                         break;
789                 case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
790                         info_bridge_mode = true;
791                         ipv4 = true;
792                         break;
793                 default:
794                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
795                                   type);
796                         return -EINVAL;
797                 }
798         }
799
800         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
801         if (is_mask)
802                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
803         else
804                 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
805                                 false);
806
807         if (rem > 0) {
808                 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
809                           rem);
810                 return -EINVAL;
811         }
812
813         if (ipv4 && ipv6) {
814                 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
815                 return -EINVAL;
816         }
817
818         if (!is_mask) {
819                 if (!ipv4 && !ipv6) {
820                         OVS_NLERR(log, "IP tunnel dst address not specified");
821                         return -EINVAL;
822                 }
823                 if (ipv4) {
824                         if (info_bridge_mode) {
825                                 if (match->key->tun_key.u.ipv4.src ||
826                                     match->key->tun_key.u.ipv4.dst ||
827                                     match->key->tun_key.tp_src ||
828                                     match->key->tun_key.tp_dst ||
829                                     match->key->tun_key.ttl ||
830                                     match->key->tun_key.tos ||
831                                     tun_flags & ~TUNNEL_KEY) {
832                                         OVS_NLERR(log, "IPv4 tun info is not correct");
833                                         return -EINVAL;
834                                 }
835                         } else if (!match->key->tun_key.u.ipv4.dst) {
836                                 OVS_NLERR(log, "IPv4 tunnel dst address is zero");
837                                 return -EINVAL;
838                         }
839                 }
840                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
841                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
842                         return -EINVAL;
843                 }
844
845                 if (!ttl && !info_bridge_mode) {
846                         OVS_NLERR(log, "IP tunnel TTL not specified.");
847                         return -EINVAL;
848                 }
849         }
850
851         return opts_type;
852 }
853
854 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
855                                const void *tun_opts, int swkey_tun_opts_len)
856 {
857         const struct vxlan_metadata *opts = tun_opts;
858         struct nlattr *nla;
859
860         nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
861         if (!nla)
862                 return -EMSGSIZE;
863
864         if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
865                 return -EMSGSIZE;
866
867         nla_nest_end(skb, nla);
868         return 0;
869 }
870
871 static int __ip_tun_to_nlattr(struct sk_buff *skb,
872                               const struct ip_tunnel_key *output,
873                               const void *tun_opts, int swkey_tun_opts_len,
874                               unsigned short tun_proto, u8 mode)
875 {
876         if (output->tun_flags & TUNNEL_KEY &&
877             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
878                          OVS_TUNNEL_KEY_ATTR_PAD))
879                 return -EMSGSIZE;
880
881         if (mode & IP_TUNNEL_INFO_BRIDGE)
882                 return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)
883                        ? -EMSGSIZE : 0;
884
885         switch (tun_proto) {
886         case AF_INET:
887                 if (output->u.ipv4.src &&
888                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
889                                     output->u.ipv4.src))
890                         return -EMSGSIZE;
891                 if (output->u.ipv4.dst &&
892                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
893                                     output->u.ipv4.dst))
894                         return -EMSGSIZE;
895                 break;
896         case AF_INET6:
897                 if (!ipv6_addr_any(&output->u.ipv6.src) &&
898                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
899                                      &output->u.ipv6.src))
900                         return -EMSGSIZE;
901                 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
902                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
903                                      &output->u.ipv6.dst))
904                         return -EMSGSIZE;
905                 break;
906         }
907         if (output->tos &&
908             nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
909                 return -EMSGSIZE;
910         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
911                 return -EMSGSIZE;
912         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
913             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
914                 return -EMSGSIZE;
915         if ((output->tun_flags & TUNNEL_CSUM) &&
916             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
917                 return -EMSGSIZE;
918         if (output->tp_src &&
919             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
920                 return -EMSGSIZE;
921         if (output->tp_dst &&
922             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
923                 return -EMSGSIZE;
924         if ((output->tun_flags & TUNNEL_OAM) &&
925             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
926                 return -EMSGSIZE;
927         if (swkey_tun_opts_len) {
928                 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
929                     nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
930                             swkey_tun_opts_len, tun_opts))
931                         return -EMSGSIZE;
932                 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
933                          vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
934                         return -EMSGSIZE;
935                 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
936                          nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
937                                  swkey_tun_opts_len, tun_opts))
938                         return -EMSGSIZE;
939         }
940
941         return 0;
942 }
943
944 static int ip_tun_to_nlattr(struct sk_buff *skb,
945                             const struct ip_tunnel_key *output,
946                             const void *tun_opts, int swkey_tun_opts_len,
947                             unsigned short tun_proto, u8 mode)
948 {
949         struct nlattr *nla;
950         int err;
951
952         nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL);
953         if (!nla)
954                 return -EMSGSIZE;
955
956         err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
957                                  tun_proto, mode);
958         if (err)
959                 return err;
960
961         nla_nest_end(skb, nla);
962         return 0;
963 }
964
965 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
966                             struct ip_tunnel_info *tun_info)
967 {
968         return __ip_tun_to_nlattr(skb, &tun_info->key,
969                                   ip_tunnel_info_opts(tun_info),
970                                   tun_info->options_len,
971                                   ip_tunnel_info_af(tun_info), tun_info->mode);
972 }
973
974 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
975                                     const struct nlattr *a[],
976                                     bool is_mask, bool inner)
977 {
978         __be16 tci = 0;
979         __be16 tpid = 0;
980
981         if (a[OVS_KEY_ATTR_VLAN])
982                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
983
984         if (a[OVS_KEY_ATTR_ETHERTYPE])
985                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
986
987         if (likely(!inner)) {
988                 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
989                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
990         } else {
991                 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
992                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
993         }
994         return 0;
995 }
996
997 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
998                                       u64 key_attrs, bool inner,
999                                       const struct nlattr **a, bool log)
1000 {
1001         __be16 tci = 0;
1002
1003         if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1004               (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1005                eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
1006                 /* Not a VLAN. */
1007                 return 0;
1008         }
1009
1010         if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1011               (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1012                 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
1013                 return -EINVAL;
1014         }
1015
1016         if (a[OVS_KEY_ATTR_VLAN])
1017                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1018
1019         if (!(tci & htons(VLAN_CFI_MASK))) {
1020                 if (tci) {
1021                         OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
1022                                   (inner) ? "C-VLAN" : "VLAN");
1023                         return -EINVAL;
1024                 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
1025                         /* Corner case for truncated VLAN header. */
1026                         OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1027                                   (inner) ? "C-VLAN" : "VLAN");
1028                         return -EINVAL;
1029                 }
1030         }
1031
1032         return 1;
1033 }
1034
1035 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1036                                            u64 key_attrs, bool inner,
1037                                            const struct nlattr **a, bool log)
1038 {
1039         __be16 tci = 0;
1040         __be16 tpid = 0;
1041         bool encap_valid = !!(match->key->eth.vlan.tci &
1042                               htons(VLAN_CFI_MASK));
1043         bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1044                                 htons(VLAN_CFI_MASK));
1045
1046         if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1047                 /* Not a VLAN. */
1048                 return 0;
1049         }
1050
1051         if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1052                 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1053                           (inner) ? "C-VLAN" : "VLAN");
1054                 return -EINVAL;
1055         }
1056
1057         if (a[OVS_KEY_ATTR_VLAN])
1058                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1059
1060         if (a[OVS_KEY_ATTR_ETHERTYPE])
1061                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1062
1063         if (tpid != htons(0xffff)) {
1064                 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1065                           (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1066                 return -EINVAL;
1067         }
1068         if (!(tci & htons(VLAN_CFI_MASK))) {
1069                 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
1070                           (inner) ? "C-VLAN" : "VLAN");
1071                 return -EINVAL;
1072         }
1073
1074         return 1;
1075 }
1076
1077 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1078                                      u64 *key_attrs, bool inner,
1079                                      const struct nlattr **a, bool is_mask,
1080                                      bool log)
1081 {
1082         int err;
1083         const struct nlattr *encap;
1084
1085         if (!is_mask)
1086                 err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1087                                                  a, log);
1088         else
1089                 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1090                                                       a, log);
1091         if (err <= 0)
1092                 return err;
1093
1094         err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1095         if (err)
1096                 return err;
1097
1098         *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1099         *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1100         *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1101
1102         encap = a[OVS_KEY_ATTR_ENCAP];
1103
1104         if (!is_mask)
1105                 err = parse_flow_nlattrs(encap, a, key_attrs, log);
1106         else
1107                 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1108
1109         return err;
1110 }
1111
1112 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1113                                    u64 *key_attrs, const struct nlattr **a,
1114                                    bool is_mask, bool log)
1115 {
1116         int err;
1117         bool encap_valid = false;
1118
1119         err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1120                                         is_mask, log);
1121         if (err)
1122                 return err;
1123
1124         encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
1125         if (encap_valid) {
1126                 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1127                                                 is_mask, log);
1128                 if (err)
1129                         return err;
1130         }
1131
1132         return 0;
1133 }
1134
1135 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1136                                        u64 *attrs, const struct nlattr **a,
1137                                        bool is_mask, bool log)
1138 {
1139         __be16 eth_type;
1140
1141         eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1142         if (is_mask) {
1143                 /* Always exact match EtherType. */
1144                 eth_type = htons(0xffff);
1145         } else if (!eth_proto_is_802_3(eth_type)) {
1146                 OVS_NLERR(log, "EtherType %x is less than min %x",
1147                                 ntohs(eth_type), ETH_P_802_3_MIN);
1148                 return -EINVAL;
1149         }
1150
1151         SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1152         *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1153         return 0;
1154 }
1155
1156 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1157                                  u64 *attrs, const struct nlattr **a,
1158                                  bool is_mask, bool log)
1159 {
1160         u8 mac_proto = MAC_PROTO_ETHERNET;
1161
1162         if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1163                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1164
1165                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1166                 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1167         }
1168
1169         if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1170                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1171
1172                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1173                 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1174         }
1175
1176         if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1177                 SW_FLOW_KEY_PUT(match, phy.priority,
1178                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1179                 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1180         }
1181
1182         if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1183                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1184
1185                 if (is_mask) {
1186                         in_port = 0xffffffff; /* Always exact match in_port. */
1187                 } else if (in_port >= DP_MAX_PORTS) {
1188                         OVS_NLERR(log, "Port %d exceeds max allowable %d",
1189                                   in_port, DP_MAX_PORTS);
1190                         return -EINVAL;
1191                 }
1192
1193                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1194                 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1195         } else if (!is_mask) {
1196                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1197         }
1198
1199         if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1200                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1201
1202                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1203                 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1204         }
1205         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1206                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1207                                        is_mask, log) < 0)
1208                         return -EINVAL;
1209                 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1210         }
1211
1212         if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1213             ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1214                 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1215
1216                 if (ct_state & ~CT_SUPPORTED_MASK) {
1217                         OVS_NLERR(log, "ct_state flags %08x unsupported",
1218                                   ct_state);
1219                         return -EINVAL;
1220                 }
1221
1222                 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1223                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1224         }
1225         if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1226             ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1227                 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1228
1229                 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1230                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1231         }
1232         if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1233             ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1234                 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1235
1236                 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1237                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1238         }
1239         if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1240             ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1241                 const struct ovs_key_ct_labels *cl;
1242
1243                 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1244                 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1245                                    sizeof(*cl), is_mask);
1246                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1247         }
1248         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1249                 const struct ovs_key_ct_tuple_ipv4 *ct;
1250
1251                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1252
1253                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1254                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1255                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1256                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1257                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1258                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1259         }
1260         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1261                 const struct ovs_key_ct_tuple_ipv6 *ct;
1262
1263                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1264
1265                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1266                                    sizeof(match->key->ipv6.ct_orig.src),
1267                                    is_mask);
1268                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1269                                    sizeof(match->key->ipv6.ct_orig.dst),
1270                                    is_mask);
1271                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1272                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1273                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1274                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1275         }
1276
1277         /* For layer 3 packets the Ethernet type is provided
1278          * and treated as metadata but no MAC addresses are provided.
1279          */
1280         if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1281             (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1282                 mac_proto = MAC_PROTO_NONE;
1283
1284         /* Always exact match mac_proto */
1285         SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1286
1287         if (mac_proto == MAC_PROTO_NONE)
1288                 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1289                                                    log);
1290
1291         return 0;
1292 }
1293
1294 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1295                         struct nshhdr *nh, size_t size)
1296 {
1297         struct nlattr *a;
1298         int rem;
1299         u8 flags = 0;
1300         u8 ttl = 0;
1301         int mdlen = 0;
1302
1303         /* validate_nsh has check this, so we needn't do duplicate check here
1304          */
1305         if (size < NSH_BASE_HDR_LEN)
1306                 return -ENOBUFS;
1307
1308         nla_for_each_nested(a, attr, rem) {
1309                 int type = nla_type(a);
1310
1311                 switch (type) {
1312                 case OVS_NSH_KEY_ATTR_BASE: {
1313                         const struct ovs_nsh_key_base *base = nla_data(a);
1314
1315                         flags = base->flags;
1316                         ttl = base->ttl;
1317                         nh->np = base->np;
1318                         nh->mdtype = base->mdtype;
1319                         nh->path_hdr = base->path_hdr;
1320                         break;
1321                 }
1322                 case OVS_NSH_KEY_ATTR_MD1:
1323                         mdlen = nla_len(a);
1324                         if (mdlen > size - NSH_BASE_HDR_LEN)
1325                                 return -ENOBUFS;
1326                         memcpy(&nh->md1, nla_data(a), mdlen);
1327                         break;
1328
1329                 case OVS_NSH_KEY_ATTR_MD2:
1330                         mdlen = nla_len(a);
1331                         if (mdlen > size - NSH_BASE_HDR_LEN)
1332                                 return -ENOBUFS;
1333                         memcpy(&nh->md2, nla_data(a), mdlen);
1334                         break;
1335
1336                 default:
1337                         return -EINVAL;
1338                 }
1339         }
1340
1341         /* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1342         nh->ver_flags_ttl_len = 0;
1343         nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1344
1345         return 0;
1346 }
1347
1348 int nsh_key_from_nlattr(const struct nlattr *attr,
1349                         struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1350 {
1351         struct nlattr *a;
1352         int rem;
1353
1354         /* validate_nsh has check this, so we needn't do duplicate check here
1355          */
1356         nla_for_each_nested(a, attr, rem) {
1357                 int type = nla_type(a);
1358
1359                 switch (type) {
1360                 case OVS_NSH_KEY_ATTR_BASE: {
1361                         const struct ovs_nsh_key_base *base = nla_data(a);
1362                         const struct ovs_nsh_key_base *base_mask = base + 1;
1363
1364                         nsh->base = *base;
1365                         nsh_mask->base = *base_mask;
1366                         break;
1367                 }
1368                 case OVS_NSH_KEY_ATTR_MD1: {
1369                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1370                         const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1371
1372                         memcpy(nsh->context, md1->context, sizeof(*md1));
1373                         memcpy(nsh_mask->context, md1_mask->context,
1374                                sizeof(*md1_mask));
1375                         break;
1376                 }
1377                 case OVS_NSH_KEY_ATTR_MD2:
1378                         /* Not supported yet */
1379                         return -ENOTSUPP;
1380                 default:
1381                         return -EINVAL;
1382                 }
1383         }
1384
1385         return 0;
1386 }
1387
1388 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1389                                    struct sw_flow_match *match, bool is_mask,
1390                                    bool is_push_nsh, bool log)
1391 {
1392         struct nlattr *a;
1393         int rem;
1394         bool has_base = false;
1395         bool has_md1 = false;
1396         bool has_md2 = false;
1397         u8 mdtype = 0;
1398         int mdlen = 0;
1399
1400         if (WARN_ON(is_push_nsh && is_mask))
1401                 return -EINVAL;
1402
1403         nla_for_each_nested(a, attr, rem) {
1404                 int type = nla_type(a);
1405                 int i;
1406
1407                 if (type > OVS_NSH_KEY_ATTR_MAX) {
1408                         OVS_NLERR(log, "nsh attr %d is out of range max %d",
1409                                   type, OVS_NSH_KEY_ATTR_MAX);
1410                         return -EINVAL;
1411                 }
1412
1413                 if (!check_attr_len(nla_len(a),
1414                                     ovs_nsh_key_attr_lens[type].len)) {
1415                         OVS_NLERR(
1416                             log,
1417                             "nsh attr %d has unexpected len %d expected %d",
1418                             type,
1419                             nla_len(a),
1420                             ovs_nsh_key_attr_lens[type].len
1421                         );
1422                         return -EINVAL;
1423                 }
1424
1425                 switch (type) {
1426                 case OVS_NSH_KEY_ATTR_BASE: {
1427                         const struct ovs_nsh_key_base *base = nla_data(a);
1428
1429                         has_base = true;
1430                         mdtype = base->mdtype;
1431                         SW_FLOW_KEY_PUT(match, nsh.base.flags,
1432                                         base->flags, is_mask);
1433                         SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1434                                         base->ttl, is_mask);
1435                         SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1436                                         base->mdtype, is_mask);
1437                         SW_FLOW_KEY_PUT(match, nsh.base.np,
1438                                         base->np, is_mask);
1439                         SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1440                                         base->path_hdr, is_mask);
1441                         break;
1442                 }
1443                 case OVS_NSH_KEY_ATTR_MD1: {
1444                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1445
1446                         has_md1 = true;
1447                         for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1448                                 SW_FLOW_KEY_PUT(match, nsh.context[i],
1449                                                 md1->context[i], is_mask);
1450                         break;
1451                 }
1452                 case OVS_NSH_KEY_ATTR_MD2:
1453                         if (!is_push_nsh) /* Not supported MD type 2 yet */
1454                                 return -ENOTSUPP;
1455
1456                         has_md2 = true;
1457                         mdlen = nla_len(a);
1458                         if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1459                                 OVS_NLERR(
1460                                     log,
1461                                     "Invalid MD length %d for MD type %d",
1462                                     mdlen,
1463                                     mdtype
1464                                 );
1465                                 return -EINVAL;
1466                         }
1467                         break;
1468                 default:
1469                         OVS_NLERR(log, "Unknown nsh attribute %d",
1470                                   type);
1471                         return -EINVAL;
1472                 }
1473         }
1474
1475         if (rem > 0) {
1476                 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1477                 return -EINVAL;
1478         }
1479
1480         if (has_md1 && has_md2) {
1481                 OVS_NLERR(
1482                     1,
1483                     "invalid nsh attribute: md1 and md2 are exclusive."
1484                 );
1485                 return -EINVAL;
1486         }
1487
1488         if (!is_mask) {
1489                 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1490                     (has_md2 && mdtype != NSH_M_TYPE2)) {
1491                         OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1492                                   mdtype);
1493                         return -EINVAL;
1494                 }
1495
1496                 if (is_push_nsh &&
1497                     (!has_base || (!has_md1 && !has_md2))) {
1498                         OVS_NLERR(
1499                             1,
1500                             "push_nsh: missing base or metadata attributes"
1501                         );
1502                         return -EINVAL;
1503                 }
1504         }
1505
1506         return 0;
1507 }
1508
1509 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1510                                 u64 attrs, const struct nlattr **a,
1511                                 bool is_mask, bool log)
1512 {
1513         int err;
1514
1515         err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1516         if (err)
1517                 return err;
1518
1519         if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1520                 const struct ovs_key_ethernet *eth_key;
1521
1522                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1523                 SW_FLOW_KEY_MEMCPY(match, eth.src,
1524                                 eth_key->eth_src, ETH_ALEN, is_mask);
1525                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1526                                 eth_key->eth_dst, ETH_ALEN, is_mask);
1527                 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1528
1529                 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1530                         /* VLAN attribute is always parsed before getting here since it
1531                          * may occur multiple times.
1532                          */
1533                         OVS_NLERR(log, "VLAN attribute unexpected.");
1534                         return -EINVAL;
1535                 }
1536
1537                 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1538                         err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1539                                                           log);
1540                         if (err)
1541                                 return err;
1542                 } else if (!is_mask) {
1543                         SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1544                 }
1545         } else if (!match->key->eth.type) {
1546                 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1547                 return -EINVAL;
1548         }
1549
1550         if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1551                 const struct ovs_key_ipv4 *ipv4_key;
1552
1553                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1554                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1555                         OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1556                                   ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1557                         return -EINVAL;
1558                 }
1559                 SW_FLOW_KEY_PUT(match, ip.proto,
1560                                 ipv4_key->ipv4_proto, is_mask);
1561                 SW_FLOW_KEY_PUT(match, ip.tos,
1562                                 ipv4_key->ipv4_tos, is_mask);
1563                 SW_FLOW_KEY_PUT(match, ip.ttl,
1564                                 ipv4_key->ipv4_ttl, is_mask);
1565                 SW_FLOW_KEY_PUT(match, ip.frag,
1566                                 ipv4_key->ipv4_frag, is_mask);
1567                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1568                                 ipv4_key->ipv4_src, is_mask);
1569                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1570                                 ipv4_key->ipv4_dst, is_mask);
1571                 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1572         }
1573
1574         if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1575                 const struct ovs_key_ipv6 *ipv6_key;
1576
1577                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1578                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1579                         OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1580                                   ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1581                         return -EINVAL;
1582                 }
1583
1584                 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1585                         OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1586                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1587                         return -EINVAL;
1588                 }
1589
1590                 SW_FLOW_KEY_PUT(match, ipv6.label,
1591                                 ipv6_key->ipv6_label, is_mask);
1592                 SW_FLOW_KEY_PUT(match, ip.proto,
1593                                 ipv6_key->ipv6_proto, is_mask);
1594                 SW_FLOW_KEY_PUT(match, ip.tos,
1595                                 ipv6_key->ipv6_tclass, is_mask);
1596                 SW_FLOW_KEY_PUT(match, ip.ttl,
1597                                 ipv6_key->ipv6_hlimit, is_mask);
1598                 SW_FLOW_KEY_PUT(match, ip.frag,
1599                                 ipv6_key->ipv6_frag, is_mask);
1600                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1601                                 ipv6_key->ipv6_src,
1602                                 sizeof(match->key->ipv6.addr.src),
1603                                 is_mask);
1604                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1605                                 ipv6_key->ipv6_dst,
1606                                 sizeof(match->key->ipv6.addr.dst),
1607                                 is_mask);
1608
1609                 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1610         }
1611
1612         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) {
1613                 const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
1614
1615                 ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]);
1616
1617                 SW_FLOW_KEY_PUT(match, ipv6.exthdrs,
1618                                 ipv6_exthdrs_key->hdrs, is_mask);
1619
1620                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS);
1621         }
1622
1623         if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1624                 const struct ovs_key_arp *arp_key;
1625
1626                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1627                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1628                         OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1629                                   arp_key->arp_op);
1630                         return -EINVAL;
1631                 }
1632
1633                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1634                                 arp_key->arp_sip, is_mask);
1635                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1636                         arp_key->arp_tip, is_mask);
1637                 SW_FLOW_KEY_PUT(match, ip.proto,
1638                                 ntohs(arp_key->arp_op), is_mask);
1639                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1640                                 arp_key->arp_sha, ETH_ALEN, is_mask);
1641                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1642                                 arp_key->arp_tha, ETH_ALEN, is_mask);
1643
1644                 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1645         }
1646
1647         if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1648                 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1649                                             is_mask, false, log) < 0)
1650                         return -EINVAL;
1651                 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1652         }
1653
1654         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1655                 const struct ovs_key_mpls *mpls_key;
1656                 u32 hdr_len;
1657                 u32 label_count, label_count_mask, i;
1658
1659                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1660                 hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
1661                 label_count = hdr_len / sizeof(struct ovs_key_mpls);
1662
1663                 if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
1664                     hdr_len % sizeof(struct ovs_key_mpls))
1665                         return -EINVAL;
1666
1667                 label_count_mask =  GENMASK(label_count - 1, 0);
1668
1669                 for (i = 0 ; i < label_count; i++)
1670                         SW_FLOW_KEY_PUT(match, mpls.lse[i],
1671                                         mpls_key[i].mpls_lse, is_mask);
1672
1673                 SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
1674                                 label_count_mask, is_mask);
1675
1676                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1677          }
1678
1679         if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1680                 const struct ovs_key_tcp *tcp_key;
1681
1682                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1683                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1684                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1685                 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1686         }
1687
1688         if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1689                 SW_FLOW_KEY_PUT(match, tp.flags,
1690                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1691                                 is_mask);
1692                 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1693         }
1694
1695         if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1696                 const struct ovs_key_udp *udp_key;
1697
1698                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1699                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1700                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1701                 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1702         }
1703
1704         if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1705                 const struct ovs_key_sctp *sctp_key;
1706
1707                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1708                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1709                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1710                 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1711         }
1712
1713         if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1714                 const struct ovs_key_icmp *icmp_key;
1715
1716                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1717                 SW_FLOW_KEY_PUT(match, tp.src,
1718                                 htons(icmp_key->icmp_type), is_mask);
1719                 SW_FLOW_KEY_PUT(match, tp.dst,
1720                                 htons(icmp_key->icmp_code), is_mask);
1721                 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1722         }
1723
1724         if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1725                 const struct ovs_key_icmpv6 *icmpv6_key;
1726
1727                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1728                 SW_FLOW_KEY_PUT(match, tp.src,
1729                                 htons(icmpv6_key->icmpv6_type), is_mask);
1730                 SW_FLOW_KEY_PUT(match, tp.dst,
1731                                 htons(icmpv6_key->icmpv6_code), is_mask);
1732                 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1733         }
1734
1735         if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1736                 const struct ovs_key_nd *nd_key;
1737
1738                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1739                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1740                         nd_key->nd_target,
1741                         sizeof(match->key->ipv6.nd.target),
1742                         is_mask);
1743                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1744                         nd_key->nd_sll, ETH_ALEN, is_mask);
1745                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1746                                 nd_key->nd_tll, ETH_ALEN, is_mask);
1747                 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1748         }
1749
1750         if (attrs != 0) {
1751                 OVS_NLERR(log, "Unknown key attributes %llx",
1752                           (unsigned long long)attrs);
1753                 return -EINVAL;
1754         }
1755
1756         return 0;
1757 }
1758
1759 static void nlattr_set(struct nlattr *attr, u8 val,
1760                        const struct ovs_len_tbl *tbl)
1761 {
1762         struct nlattr *nla;
1763         int rem;
1764
1765         /* The nlattr stream should already have been validated */
1766         nla_for_each_nested(nla, attr, rem) {
1767                 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1768                         nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
1769                 else
1770                         memset(nla_data(nla), val, nla_len(nla));
1771
1772                 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1773                         *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1774         }
1775 }
1776
1777 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1778 {
1779         nlattr_set(attr, val, ovs_key_lens);
1780 }
1781
1782 /**
1783  * ovs_nla_get_match - parses Netlink attributes into a flow key and
1784  * mask. In case the 'mask' is NULL, the flow is treated as exact match
1785  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1786  * does not include any don't care bit.
1787  * @net: Used to determine per-namespace field support.
1788  * @match: receives the extracted flow match information.
1789  * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1790  * sequence. The fields should of the packet that triggered the creation
1791  * of this flow.
1792  * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_*
1793  * Netlink attribute specifies the mask field of the wildcarded flow.
1794  * @log: Boolean to allow kernel error logging.  Normally true, but when
1795  * probing for feature compatibility this should be passed in as false to
1796  * suppress unnecessary error logging.
1797  */
1798 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1799                       const struct nlattr *nla_key,
1800                       const struct nlattr *nla_mask,
1801                       bool log)
1802 {
1803         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1804         struct nlattr *newmask = NULL;
1805         u64 key_attrs = 0;
1806         u64 mask_attrs = 0;
1807         int err;
1808
1809         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1810         if (err)
1811                 return err;
1812
1813         err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1814         if (err)
1815                 return err;
1816
1817         err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1818         if (err)
1819                 return err;
1820
1821         if (match->mask) {
1822                 if (!nla_mask) {
1823                         /* Create an exact match mask. We need to set to 0xff
1824                          * all the 'match->mask' fields that have been touched
1825                          * in 'match->key'. We cannot simply memset
1826                          * 'match->mask', because padding bytes and fields not
1827                          * specified in 'match->key' should be left to 0.
1828                          * Instead, we use a stream of netlink attributes,
1829                          * copied from 'key' and set to 0xff.
1830                          * ovs_key_from_nlattrs() will take care of filling
1831                          * 'match->mask' appropriately.
1832                          */
1833                         newmask = kmemdup(nla_key,
1834                                           nla_total_size(nla_len(nla_key)),
1835                                           GFP_KERNEL);
1836                         if (!newmask)
1837                                 return -ENOMEM;
1838
1839                         mask_set_nlattr(newmask, 0xff);
1840
1841                         /* The userspace does not send tunnel attributes that
1842                          * are 0, but we should not wildcard them nonetheless.
1843                          */
1844                         if (match->key->tun_proto)
1845                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1846                                                          0xff, true);
1847
1848                         nla_mask = newmask;
1849                 }
1850
1851                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1852                 if (err)
1853                         goto free_newmask;
1854
1855                 /* Always match on tci. */
1856                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1857                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1858
1859                 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1860                 if (err)
1861                         goto free_newmask;
1862
1863                 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1864                                            log);
1865                 if (err)
1866                         goto free_newmask;
1867         }
1868
1869         if (!match_validate(match, key_attrs, mask_attrs, log))
1870                 err = -EINVAL;
1871
1872 free_newmask:
1873         kfree(newmask);
1874         return err;
1875 }
1876
1877 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1878 {
1879         size_t len;
1880
1881         if (!attr)
1882                 return 0;
1883
1884         len = nla_len(attr);
1885         if (len < 1 || len > MAX_UFID_LENGTH) {
1886                 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1887                           nla_len(attr), MAX_UFID_LENGTH);
1888                 return 0;
1889         }
1890
1891         return len;
1892 }
1893
1894 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1895  * or false otherwise.
1896  */
1897 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1898                       bool log)
1899 {
1900         sfid->ufid_len = get_ufid_len(attr, log);
1901         if (sfid->ufid_len)
1902                 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1903
1904         return sfid->ufid_len;
1905 }
1906
1907 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1908                            const struct sw_flow_key *key, bool log)
1909 {
1910         struct sw_flow_key *new_key;
1911
1912         if (ovs_nla_get_ufid(sfid, ufid, log))
1913                 return 0;
1914
1915         /* If UFID was not provided, use unmasked key. */
1916         new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1917         if (!new_key)
1918                 return -ENOMEM;
1919         memcpy(new_key, key, sizeof(*key));
1920         sfid->unmasked_key = new_key;
1921
1922         return 0;
1923 }
1924
1925 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1926 {
1927         return attr ? nla_get_u32(attr) : 0;
1928 }
1929
1930 /**
1931  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1932  * @net: Network namespace.
1933  * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1934  * metadata.
1935  * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1936  * attributes.
1937  * @attrs: Bit mask for the netlink attributes included in @a.
1938  * @log: Boolean to allow kernel error logging.  Normally true, but when
1939  * probing for feature compatibility this should be passed in as false to
1940  * suppress unnecessary error logging.
1941  *
1942  * This parses a series of Netlink attributes that form a flow key, which must
1943  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1944  * get the metadata, that is, the parts of the flow key that cannot be
1945  * extracted from the packet itself.
1946  *
1947  * This must be called before the packet key fields are filled in 'key'.
1948  */
1949
1950 int ovs_nla_get_flow_metadata(struct net *net,
1951                               const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1952                               u64 attrs, struct sw_flow_key *key, bool log)
1953 {
1954         struct sw_flow_match match;
1955
1956         memset(&match, 0, sizeof(match));
1957         match.key = key;
1958
1959         key->ct_state = 0;
1960         key->ct_zone = 0;
1961         key->ct_orig_proto = 0;
1962         memset(&key->ct, 0, sizeof(key->ct));
1963         memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1964         memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1965
1966         key->phy.in_port = DP_MAX_PORTS;
1967
1968         return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1969 }
1970
1971 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1972                             bool is_mask)
1973 {
1974         __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1975
1976         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1977             nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1978                 return -EMSGSIZE;
1979         return 0;
1980 }
1981
1982 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1983                              struct sk_buff *skb)
1984 {
1985         struct nlattr *start;
1986
1987         start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH);
1988         if (!start)
1989                 return -EMSGSIZE;
1990
1991         if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1992                 goto nla_put_failure;
1993
1994         if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1995                 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1996                             sizeof(nsh->context), nsh->context))
1997                         goto nla_put_failure;
1998         }
1999
2000         /* Don't support MD type 2 yet */
2001
2002         nla_nest_end(skb, start);
2003
2004         return 0;
2005
2006 nla_put_failure:
2007         return -EMSGSIZE;
2008 }
2009
2010 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
2011                              const struct sw_flow_key *output, bool is_mask,
2012                              struct sk_buff *skb)
2013 {
2014         struct ovs_key_ethernet *eth_key;
2015         struct nlattr *nla;
2016         struct nlattr *encap = NULL;
2017         struct nlattr *in_encap = NULL;
2018
2019         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
2020                 goto nla_put_failure;
2021
2022         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
2023                 goto nla_put_failure;
2024
2025         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
2026                 goto nla_put_failure;
2027
2028         if ((swkey->tun_proto || is_mask)) {
2029                 const void *opts = NULL;
2030
2031                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
2032                         opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
2033
2034                 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
2035                                      swkey->tun_opts_len, swkey->tun_proto, 0))
2036                         goto nla_put_failure;
2037         }
2038
2039         if (swkey->phy.in_port == DP_MAX_PORTS) {
2040                 if (is_mask && (output->phy.in_port == 0xffff))
2041                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
2042                                 goto nla_put_failure;
2043         } else {
2044                 u16 upper_u16;
2045                 upper_u16 = !is_mask ? 0 : 0xffff;
2046
2047                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
2048                                 (upper_u16 << 16) | output->phy.in_port))
2049                         goto nla_put_failure;
2050         }
2051
2052         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2053                 goto nla_put_failure;
2054
2055         if (ovs_ct_put_key(swkey, output, skb))
2056                 goto nla_put_failure;
2057
2058         if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2059                 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2060                 if (!nla)
2061                         goto nla_put_failure;
2062
2063                 eth_key = nla_data(nla);
2064                 ether_addr_copy(eth_key->eth_src, output->eth.src);
2065                 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2066
2067                 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2068                         if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2069                                 goto nla_put_failure;
2070                         encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP);
2071                         if (!swkey->eth.vlan.tci)
2072                                 goto unencap;
2073
2074                         if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2075                                 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2076                                         goto nla_put_failure;
2077                                 in_encap = nla_nest_start_noflag(skb,
2078                                                                  OVS_KEY_ATTR_ENCAP);
2079                                 if (!swkey->eth.cvlan.tci)
2080                                         goto unencap;
2081                         }
2082                 }
2083
2084                 if (swkey->eth.type == htons(ETH_P_802_2)) {
2085                         /*
2086                         * Ethertype 802.2 is represented in the netlink with omitted
2087                         * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2088                         * 0xffff in the mask attribute.  Ethertype can also
2089                         * be wildcarded.
2090                         */
2091                         if (is_mask && output->eth.type)
2092                                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2093                                                         output->eth.type))
2094                                         goto nla_put_failure;
2095                         goto unencap;
2096                 }
2097         }
2098
2099         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2100                 goto nla_put_failure;
2101
2102         if (eth_type_vlan(swkey->eth.type)) {
2103                 /* There are 3 VLAN tags, we don't know anything about the rest
2104                  * of the packet, so truncate here.
2105                  */
2106                 WARN_ON_ONCE(!(encap && in_encap));
2107                 goto unencap;
2108         }
2109
2110         if (swkey->eth.type == htons(ETH_P_IP)) {
2111                 struct ovs_key_ipv4 *ipv4_key;
2112
2113                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2114                 if (!nla)
2115                         goto nla_put_failure;
2116                 ipv4_key = nla_data(nla);
2117                 ipv4_key->ipv4_src = output->ipv4.addr.src;
2118                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2119                 ipv4_key->ipv4_proto = output->ip.proto;
2120                 ipv4_key->ipv4_tos = output->ip.tos;
2121                 ipv4_key->ipv4_ttl = output->ip.ttl;
2122                 ipv4_key->ipv4_frag = output->ip.frag;
2123         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2124                 struct ovs_key_ipv6 *ipv6_key;
2125                 struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
2126
2127                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2128                 if (!nla)
2129                         goto nla_put_failure;
2130                 ipv6_key = nla_data(nla);
2131                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2132                                 sizeof(ipv6_key->ipv6_src));
2133                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2134                                 sizeof(ipv6_key->ipv6_dst));
2135                 ipv6_key->ipv6_label = output->ipv6.label;
2136                 ipv6_key->ipv6_proto = output->ip.proto;
2137                 ipv6_key->ipv6_tclass = output->ip.tos;
2138                 ipv6_key->ipv6_hlimit = output->ip.ttl;
2139                 ipv6_key->ipv6_frag = output->ip.frag;
2140
2141                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS,
2142                                   sizeof(*ipv6_exthdrs_key));
2143                 if (!nla)
2144                         goto nla_put_failure;
2145                 ipv6_exthdrs_key = nla_data(nla);
2146                 ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs;
2147         } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2148                 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2149                         goto nla_put_failure;
2150         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2151                    swkey->eth.type == htons(ETH_P_RARP)) {
2152                 struct ovs_key_arp *arp_key;
2153
2154                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2155                 if (!nla)
2156                         goto nla_put_failure;
2157                 arp_key = nla_data(nla);
2158                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
2159                 arp_key->arp_sip = output->ipv4.addr.src;
2160                 arp_key->arp_tip = output->ipv4.addr.dst;
2161                 arp_key->arp_op = htons(output->ip.proto);
2162                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2163                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2164         } else if (eth_p_mpls(swkey->eth.type)) {
2165                 u8 i, num_labels;
2166                 struct ovs_key_mpls *mpls_key;
2167
2168                 num_labels = hweight_long(output->mpls.num_labels_mask);
2169                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
2170                                   num_labels * sizeof(*mpls_key));
2171                 if (!nla)
2172                         goto nla_put_failure;
2173
2174                 mpls_key = nla_data(nla);
2175                 for (i = 0; i < num_labels; i++)
2176                         mpls_key[i].mpls_lse = output->mpls.lse[i];
2177         }
2178
2179         if ((swkey->eth.type == htons(ETH_P_IP) ||
2180              swkey->eth.type == htons(ETH_P_IPV6)) &&
2181              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2182
2183                 if (swkey->ip.proto == IPPROTO_TCP) {
2184                         struct ovs_key_tcp *tcp_key;
2185
2186                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2187                         if (!nla)
2188                                 goto nla_put_failure;
2189                         tcp_key = nla_data(nla);
2190                         tcp_key->tcp_src = output->tp.src;
2191                         tcp_key->tcp_dst = output->tp.dst;
2192                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2193                                          output->tp.flags))
2194                                 goto nla_put_failure;
2195                 } else if (swkey->ip.proto == IPPROTO_UDP) {
2196                         struct ovs_key_udp *udp_key;
2197
2198                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2199                         if (!nla)
2200                                 goto nla_put_failure;
2201                         udp_key = nla_data(nla);
2202                         udp_key->udp_src = output->tp.src;
2203                         udp_key->udp_dst = output->tp.dst;
2204                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
2205                         struct ovs_key_sctp *sctp_key;
2206
2207                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2208                         if (!nla)
2209                                 goto nla_put_failure;
2210                         sctp_key = nla_data(nla);
2211                         sctp_key->sctp_src = output->tp.src;
2212                         sctp_key->sctp_dst = output->tp.dst;
2213                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
2214                            swkey->ip.proto == IPPROTO_ICMP) {
2215                         struct ovs_key_icmp *icmp_key;
2216
2217                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2218                         if (!nla)
2219                                 goto nla_put_failure;
2220                         icmp_key = nla_data(nla);
2221                         icmp_key->icmp_type = ntohs(output->tp.src);
2222                         icmp_key->icmp_code = ntohs(output->tp.dst);
2223                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2224                            swkey->ip.proto == IPPROTO_ICMPV6) {
2225                         struct ovs_key_icmpv6 *icmpv6_key;
2226
2227                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2228                                                 sizeof(*icmpv6_key));
2229                         if (!nla)
2230                                 goto nla_put_failure;
2231                         icmpv6_key = nla_data(nla);
2232                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2233                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2234
2235                         if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
2236                             swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
2237                                 struct ovs_key_nd *nd_key;
2238
2239                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2240                                 if (!nla)
2241                                         goto nla_put_failure;
2242                                 nd_key = nla_data(nla);
2243                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2244                                                         sizeof(nd_key->nd_target));
2245                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2246                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2247                         }
2248                 }
2249         }
2250
2251 unencap:
2252         if (in_encap)
2253                 nla_nest_end(skb, in_encap);
2254         if (encap)
2255                 nla_nest_end(skb, encap);
2256
2257         return 0;
2258
2259 nla_put_failure:
2260         return -EMSGSIZE;
2261 }
2262
2263 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2264                     const struct sw_flow_key *output, int attr, bool is_mask,
2265                     struct sk_buff *skb)
2266 {
2267         int err;
2268         struct nlattr *nla;
2269
2270         nla = nla_nest_start_noflag(skb, attr);
2271         if (!nla)
2272                 return -EMSGSIZE;
2273         err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2274         if (err)
2275                 return err;
2276         nla_nest_end(skb, nla);
2277
2278         return 0;
2279 }
2280
2281 /* Called with ovs_mutex or RCU read lock. */
2282 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2283 {
2284         if (ovs_identifier_is_ufid(&flow->id))
2285                 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2286                                flow->id.ufid);
2287
2288         return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2289                                OVS_FLOW_ATTR_KEY, false, skb);
2290 }
2291
2292 /* Called with ovs_mutex or RCU read lock. */
2293 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2294 {
2295         return ovs_nla_put_key(&flow->key, &flow->key,
2296                                 OVS_FLOW_ATTR_KEY, false, skb);
2297 }
2298
2299 /* Called with ovs_mutex or RCU read lock. */
2300 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2301 {
2302         return ovs_nla_put_key(&flow->key, &flow->mask->key,
2303                                 OVS_FLOW_ATTR_MASK, true, skb);
2304 }
2305
2306 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
2307
2308 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2309 {
2310         struct sw_flow_actions *sfa;
2311
2312         WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2313
2314         sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
2315         if (!sfa)
2316                 return ERR_PTR(-ENOMEM);
2317
2318         sfa->actions_len = 0;
2319         return sfa;
2320 }
2321
2322 static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
2323
2324 static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
2325 {
2326         const struct nlattr *a;
2327         int rem;
2328
2329         nla_for_each_nested(a, action, rem) {
2330                 switch (nla_type(a)) {
2331                 case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
2332                 case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
2333                         ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
2334                         break;
2335                 }
2336         }
2337 }
2338
2339 static void ovs_nla_free_clone_action(const struct nlattr *action)
2340 {
2341         const struct nlattr *a = nla_data(action);
2342         int rem = nla_len(action);
2343
2344         switch (nla_type(a)) {
2345         case OVS_CLONE_ATTR_EXEC:
2346                 /* The real list of actions follows this attribute. */
2347                 a = nla_next(a, &rem);
2348                 ovs_nla_free_nested_actions(a, rem);
2349                 break;
2350         }
2351 }
2352
2353 static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
2354 {
2355         const struct nlattr *a = nla_data(action);
2356
2357         switch (nla_type(a)) {
2358         case OVS_DEC_TTL_ATTR_ACTION:
2359                 ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
2360                 break;
2361         }
2362 }
2363
2364 static void ovs_nla_free_sample_action(const struct nlattr *action)
2365 {
2366         const struct nlattr *a = nla_data(action);
2367         int rem = nla_len(action);
2368
2369         switch (nla_type(a)) {
2370         case OVS_SAMPLE_ATTR_ARG:
2371                 /* The real list of actions follows this attribute. */
2372                 a = nla_next(a, &rem);
2373                 ovs_nla_free_nested_actions(a, rem);
2374                 break;
2375         }
2376 }
2377
2378 static void ovs_nla_free_set_action(const struct nlattr *a)
2379 {
2380         const struct nlattr *ovs_key = nla_data(a);
2381         struct ovs_tunnel_info *ovs_tun;
2382
2383         switch (nla_type(ovs_key)) {
2384         case OVS_KEY_ATTR_TUNNEL_INFO:
2385                 ovs_tun = nla_data(ovs_key);
2386                 dst_release((struct dst_entry *)ovs_tun->tun_dst);
2387                 break;
2388         }
2389 }
2390
2391 static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
2392 {
2393         const struct nlattr *a;
2394         int rem;
2395
2396         /* Whenever new actions are added, the need to update this
2397          * function should be considered.
2398          */
2399         BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
2400
2401         if (!actions)
2402                 return;
2403
2404         nla_for_each_attr(a, actions, len, rem) {
2405                 switch (nla_type(a)) {
2406                 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
2407                         ovs_nla_free_check_pkt_len_action(a);
2408                         break;
2409
2410                 case OVS_ACTION_ATTR_CLONE:
2411                         ovs_nla_free_clone_action(a);
2412                         break;
2413
2414                 case OVS_ACTION_ATTR_CT:
2415                         ovs_ct_free_action(a);
2416                         break;
2417
2418                 case OVS_ACTION_ATTR_DEC_TTL:
2419                         ovs_nla_free_dec_ttl_action(a);
2420                         break;
2421
2422                 case OVS_ACTION_ATTR_SAMPLE:
2423                         ovs_nla_free_sample_action(a);
2424                         break;
2425
2426                 case OVS_ACTION_ATTR_SET:
2427                         ovs_nla_free_set_action(a);
2428                         break;
2429                 }
2430         }
2431 }
2432
2433 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2434 {
2435         if (!sf_acts)
2436                 return;
2437
2438         ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
2439         kfree(sf_acts);
2440 }
2441
2442 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2443 {
2444         ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2445 }
2446
2447 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2448  * The caller must hold rcu_read_lock for this to be sensible. */
2449 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2450 {
2451         call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2452 }
2453
2454 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2455                                        int attr_len, bool log)
2456 {
2457
2458         struct sw_flow_actions *acts;
2459         int new_acts_size;
2460         size_t req_size = NLA_ALIGN(attr_len);
2461         int next_offset = offsetof(struct sw_flow_actions, actions) +
2462                                         (*sfa)->actions_len;
2463
2464         if (req_size <= (ksize(*sfa) - next_offset))
2465                 goto out;
2466
2467         new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
2468
2469         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2470                 if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
2471                         OVS_NLERR(log, "Flow action size exceeds max %u",
2472                                   MAX_ACTIONS_BUFSIZE);
2473                         return ERR_PTR(-EMSGSIZE);
2474                 }
2475                 new_acts_size = MAX_ACTIONS_BUFSIZE;
2476         }
2477
2478         acts = nla_alloc_flow_actions(new_acts_size);
2479         if (IS_ERR(acts))
2480                 return (void *)acts;
2481
2482         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2483         acts->actions_len = (*sfa)->actions_len;
2484         acts->orig_len = (*sfa)->orig_len;
2485         kfree(*sfa);
2486         *sfa = acts;
2487
2488 out:
2489         (*sfa)->actions_len += req_size;
2490         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2491 }
2492
2493 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2494                                    int attrtype, void *data, int len, bool log)
2495 {
2496         struct nlattr *a;
2497
2498         a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2499         if (IS_ERR(a))
2500                 return a;
2501
2502         a->nla_type = attrtype;
2503         a->nla_len = nla_attr_size(len);
2504
2505         if (data)
2506                 memcpy(nla_data(a), data, len);
2507         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2508
2509         return a;
2510 }
2511
2512 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2513                        int len, bool log)
2514 {
2515         struct nlattr *a;
2516
2517         a = __add_action(sfa, attrtype, data, len, log);
2518
2519         return PTR_ERR_OR_ZERO(a);
2520 }
2521
2522 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2523                                           int attrtype, bool log)
2524 {
2525         int used = (*sfa)->actions_len;
2526         int err;
2527
2528         err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2529         if (err)
2530                 return err;
2531
2532         return used;
2533 }
2534
2535 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2536                                          int st_offset)
2537 {
2538         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2539                                                                st_offset);
2540
2541         a->nla_len = sfa->actions_len - st_offset;
2542 }
2543
2544 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2545                                   const struct sw_flow_key *key,
2546                                   struct sw_flow_actions **sfa,
2547                                   __be16 eth_type, __be16 vlan_tci,
2548                                   u32 mpls_label_count, bool log);
2549
2550 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2551                                     const struct sw_flow_key *key,
2552                                     struct sw_flow_actions **sfa,
2553                                     __be16 eth_type, __be16 vlan_tci,
2554                                     u32 mpls_label_count, bool log, bool last)
2555 {
2556         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2557         const struct nlattr *probability, *actions;
2558         const struct nlattr *a;
2559         int rem, start, err;
2560         struct sample_arg arg;
2561
2562         memset(attrs, 0, sizeof(attrs));
2563         nla_for_each_nested(a, attr, rem) {
2564                 int type = nla_type(a);
2565                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2566                         return -EINVAL;
2567                 attrs[type] = a;
2568         }
2569         if (rem)
2570                 return -EINVAL;
2571
2572         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2573         if (!probability || nla_len(probability) != sizeof(u32))
2574                 return -EINVAL;
2575
2576         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2577         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2578                 return -EINVAL;
2579
2580         /* validation done, copy sample action. */
2581         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2582         if (start < 0)
2583                 return start;
2584
2585         /* When both skb and flow may be changed, put the sample
2586          * into a deferred fifo. On the other hand, if only skb
2587          * may be modified, the actions can be executed in place.
2588          *
2589          * Do this analysis at the flow installation time.
2590          * Set 'clone_action->exec' to true if the actions can be
2591          * executed without being deferred.
2592          *
2593          * If the sample is the last action, it can always be excuted
2594          * rather than deferred.
2595          */
2596         arg.exec = last || !actions_may_change_flow(actions);
2597         arg.probability = nla_get_u32(probability);
2598
2599         err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2600                                  log);
2601         if (err)
2602                 return err;
2603
2604         err = __ovs_nla_copy_actions(net, actions, key, sfa,
2605                                      eth_type, vlan_tci, mpls_label_count, log);
2606
2607         if (err)
2608                 return err;
2609
2610         add_nested_action_end(*sfa, start);
2611
2612         return 0;
2613 }
2614
2615 static int validate_and_copy_dec_ttl(struct net *net,
2616                                      const struct nlattr *attr,
2617                                      const struct sw_flow_key *key,
2618                                      struct sw_flow_actions **sfa,
2619                                      __be16 eth_type, __be16 vlan_tci,
2620                                      u32 mpls_label_count, bool log)
2621 {
2622         const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
2623         int start, action_start, err, rem;
2624         const struct nlattr *a, *actions;
2625
2626         memset(attrs, 0, sizeof(attrs));
2627         nla_for_each_nested(a, attr, rem) {
2628                 int type = nla_type(a);
2629
2630                 /* Ignore unknown attributes to be future proof. */
2631                 if (type > OVS_DEC_TTL_ATTR_MAX)
2632                         continue;
2633
2634                 if (!type || attrs[type]) {
2635                         OVS_NLERR(log, "Duplicate or invalid key (type %d).",
2636                                   type);
2637                         return -EINVAL;
2638                 }
2639
2640                 attrs[type] = a;
2641         }
2642
2643         if (rem) {
2644                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
2645                 return -EINVAL;
2646         }
2647
2648         actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
2649         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) {
2650                 OVS_NLERR(log, "Missing valid actions attribute.");
2651                 return -EINVAL;
2652         }
2653
2654         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
2655         if (start < 0)
2656                 return start;
2657
2658         action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
2659         if (action_start < 0)
2660                 return action_start;
2661
2662         err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
2663                                      vlan_tci, mpls_label_count, log);
2664         if (err)
2665                 return err;
2666
2667         add_nested_action_end(*sfa, action_start);
2668         add_nested_action_end(*sfa, start);
2669         return 0;
2670 }
2671
2672 static int validate_and_copy_clone(struct net *net,
2673                                    const struct nlattr *attr,
2674                                    const struct sw_flow_key *key,
2675                                    struct sw_flow_actions **sfa,
2676                                    __be16 eth_type, __be16 vlan_tci,
2677                                    u32 mpls_label_count, bool log, bool last)
2678 {
2679         int start, err;
2680         u32 exec;
2681
2682         if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
2683                 return -EINVAL;
2684
2685         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
2686         if (start < 0)
2687                 return start;
2688
2689         exec = last || !actions_may_change_flow(attr);
2690
2691         err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
2692                                  sizeof(exec), log);
2693         if (err)
2694                 return err;
2695
2696         err = __ovs_nla_copy_actions(net, attr, key, sfa,
2697                                      eth_type, vlan_tci, mpls_label_count, log);
2698         if (err)
2699                 return err;
2700
2701         add_nested_action_end(*sfa, start);
2702
2703         return 0;
2704 }
2705
2706 void ovs_match_init(struct sw_flow_match *match,
2707                     struct sw_flow_key *key,
2708                     bool reset_key,
2709                     struct sw_flow_mask *mask)
2710 {
2711         memset(match, 0, sizeof(*match));
2712         match->key = key;
2713         match->mask = mask;
2714
2715         if (reset_key)
2716                 memset(key, 0, sizeof(*key));
2717
2718         if (mask) {
2719                 memset(&mask->key, 0, sizeof(mask->key));
2720                 mask->range.start = mask->range.end = 0;
2721         }
2722 }
2723
2724 static int validate_geneve_opts(struct sw_flow_key *key)
2725 {
2726         struct geneve_opt *option;
2727         int opts_len = key->tun_opts_len;
2728         bool crit_opt = false;
2729
2730         option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2731         while (opts_len > 0) {
2732                 int len;
2733
2734                 if (opts_len < sizeof(*option))
2735                         return -EINVAL;
2736
2737                 len = sizeof(*option) + option->length * 4;
2738                 if (len > opts_len)
2739                         return -EINVAL;
2740
2741                 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2742
2743                 option = (struct geneve_opt *)((u8 *)option + len);
2744                 opts_len -= len;
2745         }
2746
2747         key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2748
2749         return 0;
2750 }
2751
2752 static int validate_and_copy_set_tun(const struct nlattr *attr,
2753                                      struct sw_flow_actions **sfa, bool log)
2754 {
2755         struct sw_flow_match match;
2756         struct sw_flow_key key;
2757         struct metadata_dst *tun_dst;
2758         struct ip_tunnel_info *tun_info;
2759         struct ovs_tunnel_info *ovs_tun;
2760         struct nlattr *a;
2761         int err = 0, start, opts_type;
2762         __be16 dst_opt_type;
2763
2764         dst_opt_type = 0;
2765         ovs_match_init(&match, &key, true, NULL);
2766         opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2767         if (opts_type < 0)
2768                 return opts_type;
2769
2770         if (key.tun_opts_len) {
2771                 switch (opts_type) {
2772                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2773                         err = validate_geneve_opts(&key);
2774                         if (err < 0)
2775                                 return err;
2776                         dst_opt_type = TUNNEL_GENEVE_OPT;
2777                         break;
2778                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2779                         dst_opt_type = TUNNEL_VXLAN_OPT;
2780                         break;
2781                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2782                         dst_opt_type = TUNNEL_ERSPAN_OPT;
2783                         break;
2784                 }
2785         }
2786
2787         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2788         if (start < 0)
2789                 return start;
2790
2791         tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2792                                      GFP_KERNEL);
2793
2794         if (!tun_dst)
2795                 return -ENOMEM;
2796
2797         err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2798         if (err) {
2799                 dst_release((struct dst_entry *)tun_dst);
2800                 return err;
2801         }
2802
2803         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2804                          sizeof(*ovs_tun), log);
2805         if (IS_ERR(a)) {
2806                 dst_release((struct dst_entry *)tun_dst);
2807                 return PTR_ERR(a);
2808         }
2809
2810         ovs_tun = nla_data(a);
2811         ovs_tun->tun_dst = tun_dst;
2812
2813         tun_info = &tun_dst->u.tun_info;
2814         tun_info->mode = IP_TUNNEL_INFO_TX;
2815         if (key.tun_proto == AF_INET6)
2816                 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2817         else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0)
2818                 tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
2819         tun_info->key = key.tun_key;
2820
2821         /* We need to store the options in the action itself since
2822          * everything else will go away after flow setup. We can append
2823          * it to tun_info and then point there.
2824          */
2825         ip_tunnel_info_opts_set(tun_info,
2826                                 TUN_METADATA_OPTS(&key, key.tun_opts_len),
2827                                 key.tun_opts_len, dst_opt_type);
2828         add_nested_action_end(*sfa, start);
2829
2830         return err;
2831 }
2832
2833 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2834                          bool is_push_nsh, bool log)
2835 {
2836         struct sw_flow_match match;
2837         struct sw_flow_key key;
2838         int ret = 0;
2839
2840         ovs_match_init(&match, &key, true, NULL);
2841         ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2842                                       is_push_nsh, log);
2843         return !ret;
2844 }
2845
2846 /* Return false if there are any non-masked bits set.
2847  * Mask follows data immediately, before any netlink padding.
2848  */
2849 static bool validate_masked(u8 *data, int len)
2850 {
2851         u8 *mask = data + len;
2852
2853         while (len--)
2854                 if (*data++ & ~*mask++)
2855                         return false;
2856
2857         return true;
2858 }
2859
2860 static int validate_set(const struct nlattr *a,
2861                         const struct sw_flow_key *flow_key,
2862                         struct sw_flow_actions **sfa, bool *skip_copy,
2863                         u8 mac_proto, __be16 eth_type, bool masked, bool log)
2864 {
2865         const struct nlattr *ovs_key = nla_data(a);
2866         int key_type = nla_type(ovs_key);
2867         size_t key_len;
2868
2869         /* There can be only one key in a action */
2870         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2871                 return -EINVAL;
2872
2873         key_len = nla_len(ovs_key);
2874         if (masked)
2875                 key_len /= 2;
2876
2877         if (key_type > OVS_KEY_ATTR_MAX ||
2878             !check_attr_len(key_len, ovs_key_lens[key_type].len))
2879                 return -EINVAL;
2880
2881         if (masked && !validate_masked(nla_data(ovs_key), key_len))
2882                 return -EINVAL;
2883
2884         switch (key_type) {
2885         case OVS_KEY_ATTR_PRIORITY:
2886         case OVS_KEY_ATTR_SKB_MARK:
2887         case OVS_KEY_ATTR_CT_MARK:
2888         case OVS_KEY_ATTR_CT_LABELS:
2889                 break;
2890
2891         case OVS_KEY_ATTR_ETHERNET:
2892                 if (mac_proto != MAC_PROTO_ETHERNET)
2893                         return -EINVAL;
2894                 break;
2895
2896         case OVS_KEY_ATTR_TUNNEL: {
2897                 int err;
2898
2899                 if (masked)
2900                         return -EINVAL; /* Masked tunnel set not supported. */
2901
2902                 *skip_copy = true;
2903                 err = validate_and_copy_set_tun(a, sfa, log);
2904                 if (err)
2905                         return err;
2906                 break;
2907         }
2908         case OVS_KEY_ATTR_IPV4: {
2909                 const struct ovs_key_ipv4 *ipv4_key;
2910
2911                 if (eth_type != htons(ETH_P_IP))
2912                         return -EINVAL;
2913
2914                 ipv4_key = nla_data(ovs_key);
2915
2916                 if (masked) {
2917                         const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2918
2919                         /* Non-writeable fields. */
2920                         if (mask->ipv4_proto || mask->ipv4_frag)
2921                                 return -EINVAL;
2922                 } else {
2923                         if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2924                                 return -EINVAL;
2925
2926                         if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2927                                 return -EINVAL;
2928                 }
2929                 break;
2930         }
2931         case OVS_KEY_ATTR_IPV6: {
2932                 const struct ovs_key_ipv6 *ipv6_key;
2933
2934                 if (eth_type != htons(ETH_P_IPV6))
2935                         return -EINVAL;
2936
2937                 ipv6_key = nla_data(ovs_key);
2938
2939                 if (masked) {
2940                         const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2941
2942                         /* Non-writeable fields. */
2943                         if (mask->ipv6_proto || mask->ipv6_frag)
2944                                 return -EINVAL;
2945
2946                         /* Invalid bits in the flow label mask? */
2947                         if (ntohl(mask->ipv6_label) & 0xFFF00000)
2948                                 return -EINVAL;
2949                 } else {
2950                         if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2951                                 return -EINVAL;
2952
2953                         if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2954                                 return -EINVAL;
2955                 }
2956                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2957                         return -EINVAL;
2958
2959                 break;
2960         }
2961         case OVS_KEY_ATTR_TCP:
2962                 if ((eth_type != htons(ETH_P_IP) &&
2963                      eth_type != htons(ETH_P_IPV6)) ||
2964                     flow_key->ip.proto != IPPROTO_TCP)
2965                         return -EINVAL;
2966
2967                 break;
2968
2969         case OVS_KEY_ATTR_UDP:
2970                 if ((eth_type != htons(ETH_P_IP) &&
2971                      eth_type != htons(ETH_P_IPV6)) ||
2972                     flow_key->ip.proto != IPPROTO_UDP)
2973                         return -EINVAL;
2974
2975                 break;
2976
2977         case OVS_KEY_ATTR_MPLS:
2978                 if (!eth_p_mpls(eth_type))
2979                         return -EINVAL;
2980                 break;
2981
2982         case OVS_KEY_ATTR_SCTP:
2983                 if ((eth_type != htons(ETH_P_IP) &&
2984                      eth_type != htons(ETH_P_IPV6)) ||
2985                     flow_key->ip.proto != IPPROTO_SCTP)
2986                         return -EINVAL;
2987
2988                 break;
2989
2990         case OVS_KEY_ATTR_NSH:
2991                 if (eth_type != htons(ETH_P_NSH))
2992                         return -EINVAL;
2993                 if (!validate_nsh(nla_data(a), masked, false, log))
2994                         return -EINVAL;
2995                 break;
2996
2997         default:
2998                 return -EINVAL;
2999         }
3000
3001         /* Convert non-masked non-tunnel set actions to masked set actions. */
3002         if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
3003                 int start, len = key_len * 2;
3004                 struct nlattr *at;
3005
3006                 *skip_copy = true;
3007
3008                 start = add_nested_action_start(sfa,
3009                                                 OVS_ACTION_ATTR_SET_TO_MASKED,
3010                                                 log);
3011                 if (start < 0)
3012                         return start;
3013
3014                 at = __add_action(sfa, key_type, NULL, len, log);
3015                 if (IS_ERR(at))
3016                         return PTR_ERR(at);
3017
3018                 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
3019                 memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
3020                 /* Clear non-writeable bits from otherwise writeable fields. */
3021                 if (key_type == OVS_KEY_ATTR_IPV6) {
3022                         struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
3023
3024                         mask->ipv6_label &= htonl(0x000FFFFF);
3025                 }
3026                 add_nested_action_end(*sfa, start);
3027         }
3028
3029         return 0;
3030 }
3031
3032 static int validate_userspace(const struct nlattr *attr)
3033 {
3034         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
3035                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
3036                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
3037                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
3038         };
3039         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
3040         int error;
3041
3042         error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
3043                                             userspace_policy, NULL);
3044         if (error)
3045                 return error;
3046
3047         if (!a[OVS_USERSPACE_ATTR_PID] ||
3048             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
3049                 return -EINVAL;
3050
3051         return 0;
3052 }
3053
3054 static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
3055         [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
3056         [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
3057         [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
3058 };
3059
3060 static int validate_and_copy_check_pkt_len(struct net *net,
3061                                            const struct nlattr *attr,
3062                                            const struct sw_flow_key *key,
3063                                            struct sw_flow_actions **sfa,
3064                                            __be16 eth_type, __be16 vlan_tci,
3065                                            u32 mpls_label_count,
3066                                            bool log, bool last)
3067 {
3068         const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
3069         struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
3070         struct check_pkt_len_arg arg;
3071         int nested_acts_start;
3072         int start, err;
3073
3074         err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
3075                                           nla_data(attr), nla_len(attr),
3076                                           cpl_policy, NULL);
3077         if (err)
3078                 return err;
3079
3080         if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
3081             !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
3082                 return -EINVAL;
3083
3084         acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
3085         acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
3086
3087         /* Both the nested action should be present. */
3088         if (!acts_if_greater || !acts_if_lesser_eq)
3089                 return -EINVAL;
3090
3091         /* validation done, copy the nested actions. */
3092         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
3093                                         log);
3094         if (start < 0)
3095                 return start;
3096
3097         arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
3098         arg.exec_for_lesser_equal =
3099                 last || !actions_may_change_flow(acts_if_lesser_eq);
3100         arg.exec_for_greater =
3101                 last || !actions_may_change_flow(acts_if_greater);
3102
3103         err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
3104                                  sizeof(arg), log);
3105         if (err)
3106                 return err;
3107
3108         nested_acts_start = add_nested_action_start(sfa,
3109                 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
3110         if (nested_acts_start < 0)
3111                 return nested_acts_start;
3112
3113         err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
3114                                      eth_type, vlan_tci, mpls_label_count, log);
3115
3116         if (err)
3117                 return err;
3118
3119         add_nested_action_end(*sfa, nested_acts_start);
3120
3121         nested_acts_start = add_nested_action_start(sfa,
3122                 OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
3123         if (nested_acts_start < 0)
3124                 return nested_acts_start;
3125
3126         err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
3127                                      eth_type, vlan_tci, mpls_label_count, log);
3128
3129         if (err)
3130                 return err;
3131
3132         add_nested_action_end(*sfa, nested_acts_start);
3133         add_nested_action_end(*sfa, start);
3134         return 0;
3135 }
3136
3137 static int copy_action(const struct nlattr *from,
3138                        struct sw_flow_actions **sfa, bool log)
3139 {
3140         int totlen = NLA_ALIGN(from->nla_len);
3141         struct nlattr *to;
3142
3143         to = reserve_sfa_size(sfa, from->nla_len, log);
3144         if (IS_ERR(to))
3145                 return PTR_ERR(to);
3146
3147         memcpy(to, from, totlen);
3148         return 0;
3149 }
3150
3151 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3152                                   const struct sw_flow_key *key,
3153                                   struct sw_flow_actions **sfa,
3154                                   __be16 eth_type, __be16 vlan_tci,
3155                                   u32 mpls_label_count, bool log)
3156 {
3157         u8 mac_proto = ovs_key_mac_proto(key);
3158         const struct nlattr *a;
3159         int rem, err;
3160
3161         nla_for_each_nested(a, attr, rem) {
3162                 /* Expected argument lengths, (u32)-1 for variable length. */
3163                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
3164                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
3165                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
3166                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
3167                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
3168                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
3169                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
3170                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
3171                         [OVS_ACTION_ATTR_SET] = (u32)-1,
3172                         [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
3173                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
3174                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
3175                         [OVS_ACTION_ATTR_CT] = (u32)-1,
3176                         [OVS_ACTION_ATTR_CT_CLEAR] = 0,
3177                         [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
3178                         [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
3179                         [OVS_ACTION_ATTR_POP_ETH] = 0,
3180                         [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
3181                         [OVS_ACTION_ATTR_POP_NSH] = 0,
3182                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
3183                         [OVS_ACTION_ATTR_CLONE] = (u32)-1,
3184                         [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
3185                         [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
3186                         [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
3187                         [OVS_ACTION_ATTR_DROP] = sizeof(u32),
3188                 };
3189                 const struct ovs_action_push_vlan *vlan;
3190                 int type = nla_type(a);
3191                 bool skip_copy;
3192
3193                 if (type > OVS_ACTION_ATTR_MAX ||
3194                     (action_lens[type] != nla_len(a) &&
3195                      action_lens[type] != (u32)-1))
3196                         return -EINVAL;
3197
3198                 skip_copy = false;
3199                 switch (type) {
3200                 case OVS_ACTION_ATTR_UNSPEC:
3201                         return -EINVAL;
3202
3203                 case OVS_ACTION_ATTR_USERSPACE:
3204                         err = validate_userspace(a);
3205                         if (err)
3206                                 return err;
3207                         break;
3208
3209                 case OVS_ACTION_ATTR_OUTPUT:
3210                         if (nla_get_u32(a) >= DP_MAX_PORTS)
3211                                 return -EINVAL;
3212                         break;
3213
3214                 case OVS_ACTION_ATTR_TRUNC: {
3215                         const struct ovs_action_trunc *trunc = nla_data(a);
3216
3217                         if (trunc->max_len < ETH_HLEN)
3218                                 return -EINVAL;
3219                         break;
3220                 }
3221
3222                 case OVS_ACTION_ATTR_HASH: {
3223                         const struct ovs_action_hash *act_hash = nla_data(a);
3224
3225                         switch (act_hash->hash_alg) {
3226                         case OVS_HASH_ALG_L4:
3227                                 fallthrough;
3228                         case OVS_HASH_ALG_SYM_L4:
3229                                 break;
3230                         default:
3231                                 return  -EINVAL;
3232                         }
3233
3234                         break;
3235                 }
3236
3237                 case OVS_ACTION_ATTR_POP_VLAN:
3238                         if (mac_proto != MAC_PROTO_ETHERNET)
3239                                 return -EINVAL;
3240                         vlan_tci = htons(0);
3241                         break;
3242
3243                 case OVS_ACTION_ATTR_PUSH_VLAN:
3244                         if (mac_proto != MAC_PROTO_ETHERNET)
3245                                 return -EINVAL;
3246                         vlan = nla_data(a);
3247                         if (!eth_type_vlan(vlan->vlan_tpid))
3248                                 return -EINVAL;
3249                         if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
3250                                 return -EINVAL;
3251                         vlan_tci = vlan->vlan_tci;
3252                         break;
3253
3254                 case OVS_ACTION_ATTR_RECIRC:
3255                         break;
3256
3257                 case OVS_ACTION_ATTR_ADD_MPLS: {
3258                         const struct ovs_action_add_mpls *mpls = nla_data(a);
3259
3260                         if (!eth_p_mpls(mpls->mpls_ethertype))
3261                                 return -EINVAL;
3262
3263                         if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
3264                                 if (vlan_tci & htons(VLAN_CFI_MASK) ||
3265                                     (eth_type != htons(ETH_P_IP) &&
3266                                      eth_type != htons(ETH_P_IPV6) &&
3267                                      eth_type != htons(ETH_P_ARP) &&
3268                                      eth_type != htons(ETH_P_RARP) &&
3269                                      !eth_p_mpls(eth_type)))
3270                                         return -EINVAL;
3271                                 mpls_label_count++;
3272                         } else {
3273                                 if (mac_proto == MAC_PROTO_ETHERNET) {
3274                                         mpls_label_count = 1;
3275                                         mac_proto = MAC_PROTO_NONE;
3276                                 } else {
3277                                         mpls_label_count++;
3278                                 }
3279                         }
3280                         eth_type = mpls->mpls_ethertype;
3281                         break;
3282                 }
3283
3284                 case OVS_ACTION_ATTR_PUSH_MPLS: {
3285                         const struct ovs_action_push_mpls *mpls = nla_data(a);
3286
3287                         if (!eth_p_mpls(mpls->mpls_ethertype))
3288                                 return -EINVAL;
3289                         /* Prohibit push MPLS other than to a white list
3290                          * for packets that have a known tag order.
3291                          */
3292                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
3293                             (eth_type != htons(ETH_P_IP) &&
3294                              eth_type != htons(ETH_P_IPV6) &&
3295                              eth_type != htons(ETH_P_ARP) &&
3296                              eth_type != htons(ETH_P_RARP) &&
3297                              !eth_p_mpls(eth_type)))
3298                                 return -EINVAL;
3299                         eth_type = mpls->mpls_ethertype;
3300                         mpls_label_count++;
3301                         break;
3302                 }
3303
3304                 case OVS_ACTION_ATTR_POP_MPLS: {
3305                         __be16  proto;
3306                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
3307                             !eth_p_mpls(eth_type))
3308                                 return -EINVAL;
3309
3310                         /* Disallow subsequent L2.5+ set actions and mpls_pop
3311                          * actions once the last MPLS label in the packet is
3312                          * popped as there is no check here to ensure that
3313                          * the new eth type is valid and thus set actions could
3314                          * write off the end of the packet or otherwise corrupt
3315                          * it.
3316                          *
3317                          * Support for these actions is planned using packet
3318                          * recirculation.
3319                          */
3320                         proto = nla_get_be16(a);
3321
3322                         if (proto == htons(ETH_P_TEB) &&
3323                             mac_proto != MAC_PROTO_NONE)
3324                                 return -EINVAL;
3325
3326                         mpls_label_count--;
3327
3328                         if (!eth_p_mpls(proto) || !mpls_label_count)
3329                                 eth_type = htons(0);
3330                         else
3331                                 eth_type =  proto;
3332
3333                         break;
3334                 }
3335
3336                 case OVS_ACTION_ATTR_SET:
3337                         err = validate_set(a, key, sfa,
3338                                            &skip_copy, mac_proto, eth_type,
3339                                            false, log);
3340                         if (err)
3341                                 return err;
3342                         break;
3343
3344                 case OVS_ACTION_ATTR_SET_MASKED:
3345                         err = validate_set(a, key, sfa,
3346                                            &skip_copy, mac_proto, eth_type,
3347                                            true, log);
3348                         if (err)
3349                                 return err;
3350                         break;
3351
3352                 case OVS_ACTION_ATTR_SAMPLE: {
3353                         bool last = nla_is_last(a, rem);
3354
3355                         err = validate_and_copy_sample(net, a, key, sfa,
3356                                                        eth_type, vlan_tci,
3357                                                        mpls_label_count,
3358                                                        log, last);
3359                         if (err)
3360                                 return err;
3361                         skip_copy = true;
3362                         break;
3363                 }
3364
3365                 case OVS_ACTION_ATTR_CT:
3366                         err = ovs_ct_copy_action(net, a, key, sfa, log);
3367                         if (err)
3368                                 return err;
3369                         skip_copy = true;
3370                         break;
3371
3372                 case OVS_ACTION_ATTR_CT_CLEAR:
3373                         break;
3374
3375                 case OVS_ACTION_ATTR_PUSH_ETH:
3376                         /* Disallow pushing an Ethernet header if one
3377                          * is already present */
3378                         if (mac_proto != MAC_PROTO_NONE)
3379                                 return -EINVAL;
3380                         mac_proto = MAC_PROTO_ETHERNET;
3381                         break;
3382
3383                 case OVS_ACTION_ATTR_POP_ETH:
3384                         if (mac_proto != MAC_PROTO_ETHERNET)
3385                                 return -EINVAL;
3386                         if (vlan_tci & htons(VLAN_CFI_MASK))
3387                                 return -EINVAL;
3388                         mac_proto = MAC_PROTO_NONE;
3389                         break;
3390
3391                 case OVS_ACTION_ATTR_PUSH_NSH:
3392                         if (mac_proto != MAC_PROTO_ETHERNET) {
3393                                 u8 next_proto;
3394
3395                                 next_proto = tun_p_from_eth_p(eth_type);
3396                                 if (!next_proto)
3397                                         return -EINVAL;
3398                         }
3399                         mac_proto = MAC_PROTO_NONE;
3400                         if (!validate_nsh(nla_data(a), false, true, true))
3401                                 return -EINVAL;
3402                         break;
3403
3404                 case OVS_ACTION_ATTR_POP_NSH: {
3405                         __be16 inner_proto;
3406
3407                         if (eth_type != htons(ETH_P_NSH))
3408                                 return -EINVAL;
3409                         inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3410                         if (!inner_proto)
3411                                 return -EINVAL;
3412                         if (key->nsh.base.np == TUN_P_ETHERNET)
3413                                 mac_proto = MAC_PROTO_ETHERNET;
3414                         else
3415                                 mac_proto = MAC_PROTO_NONE;
3416                         break;
3417                 }
3418
3419                 case OVS_ACTION_ATTR_METER:
3420                         /* Non-existent meters are simply ignored.  */
3421                         break;
3422
3423                 case OVS_ACTION_ATTR_CLONE: {
3424                         bool last = nla_is_last(a, rem);
3425
3426                         err = validate_and_copy_clone(net, a, key, sfa,
3427                                                       eth_type, vlan_tci,
3428                                                       mpls_label_count,
3429                                                       log, last);
3430                         if (err)
3431                                 return err;
3432                         skip_copy = true;
3433                         break;
3434                 }
3435
3436                 case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
3437                         bool last = nla_is_last(a, rem);
3438
3439                         err = validate_and_copy_check_pkt_len(net, a, key, sfa,
3440                                                               eth_type,
3441                                                               vlan_tci,
3442                                                               mpls_label_count,
3443                                                               log, last);
3444                         if (err)
3445                                 return err;
3446                         skip_copy = true;
3447                         break;
3448                 }
3449
3450                 case OVS_ACTION_ATTR_DEC_TTL:
3451                         err = validate_and_copy_dec_ttl(net, a, key, sfa,
3452                                                         eth_type, vlan_tci,
3453                                                         mpls_label_count, log);
3454                         if (err)
3455                                 return err;
3456                         skip_copy = true;
3457                         break;
3458
3459                 case OVS_ACTION_ATTR_DROP:
3460                         if (!nla_is_last(a, rem))
3461                                 return -EINVAL;
3462                         break;
3463
3464                 default:
3465                         OVS_NLERR(log, "Unknown Action type %d", type);
3466                         return -EINVAL;
3467                 }
3468                 if (!skip_copy) {
3469                         err = copy_action(a, sfa, log);
3470                         if (err)
3471                                 return err;
3472                 }
3473         }
3474
3475         if (rem > 0)
3476                 return -EINVAL;
3477
3478         return 0;
3479 }
3480
3481 /* 'key' must be the masked key. */
3482 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3483                          const struct sw_flow_key *key,
3484                          struct sw_flow_actions **sfa, bool log)
3485 {
3486         int err;
3487         u32 mpls_label_count = 0;
3488
3489         *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3490         if (IS_ERR(*sfa))
3491                 return PTR_ERR(*sfa);
3492
3493         if (eth_p_mpls(key->eth.type))
3494                 mpls_label_count = hweight_long(key->mpls.num_labels_mask);
3495
3496         (*sfa)->orig_len = nla_len(attr);
3497         err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3498                                      key->eth.vlan.tci, mpls_label_count, log);
3499         if (err)
3500                 ovs_nla_free_flow_actions(*sfa);
3501
3502         return err;
3503 }
3504
3505 static int sample_action_to_attr(const struct nlattr *attr,
3506                                  struct sk_buff *skb)
3507 {
3508         struct nlattr *start, *ac_start = NULL, *sample_arg;
3509         int err = 0, rem = nla_len(attr);
3510         const struct sample_arg *arg;
3511         struct nlattr *actions;
3512
3513         start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
3514         if (!start)
3515                 return -EMSGSIZE;
3516
3517         sample_arg = nla_data(attr);
3518         arg = nla_data(sample_arg);
3519         actions = nla_next(sample_arg, &rem);
3520
3521         if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3522                 err = -EMSGSIZE;
3523                 goto out;
3524         }
3525
3526         ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
3527         if (!ac_start) {
3528                 err = -EMSGSIZE;
3529                 goto out;
3530         }
3531
3532         err = ovs_nla_put_actions(actions, rem, skb);
3533
3534 out:
3535         if (err) {
3536                 nla_nest_cancel(skb, ac_start);
3537                 nla_nest_cancel(skb, start);
3538         } else {
3539                 nla_nest_end(skb, ac_start);
3540                 nla_nest_end(skb, start);
3541         }
3542
3543         return err;
3544 }
3545
3546 static int clone_action_to_attr(const struct nlattr *attr,
3547                                 struct sk_buff *skb)
3548 {
3549         struct nlattr *start;
3550         int err = 0, rem = nla_len(attr);
3551
3552         start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE);
3553         if (!start)
3554                 return -EMSGSIZE;
3555
3556         /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
3557         attr = nla_next(nla_data(attr), &rem);
3558         err = ovs_nla_put_actions(attr, rem, skb);
3559
3560         if (err)
3561                 nla_nest_cancel(skb, start);
3562         else
3563                 nla_nest_end(skb, start);
3564
3565         return err;
3566 }
3567
3568 static int check_pkt_len_action_to_attr(const struct nlattr *attr,
3569                                         struct sk_buff *skb)
3570 {
3571         struct nlattr *start, *ac_start = NULL;
3572         const struct check_pkt_len_arg *arg;
3573         const struct nlattr *a, *cpl_arg;
3574         int err = 0, rem = nla_len(attr);
3575
3576         start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
3577         if (!start)
3578                 return -EMSGSIZE;
3579
3580         /* The first nested attribute in 'attr' is always
3581          * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
3582          */
3583         cpl_arg = nla_data(attr);
3584         arg = nla_data(cpl_arg);
3585
3586         if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
3587                 err = -EMSGSIZE;
3588                 goto out;
3589         }
3590
3591         /* Second nested attribute in 'attr' is always
3592          * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
3593          */
3594         a = nla_next(cpl_arg, &rem);
3595         ac_start =  nla_nest_start_noflag(skb,
3596                                           OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
3597         if (!ac_start) {
3598                 err = -EMSGSIZE;
3599                 goto out;
3600         }
3601
3602         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3603         if (err) {
3604                 nla_nest_cancel(skb, ac_start);
3605                 goto out;
3606         } else {
3607                 nla_nest_end(skb, ac_start);
3608         }
3609
3610         /* Third nested attribute in 'attr' is always
3611          * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
3612          */
3613         a = nla_next(a, &rem);
3614         ac_start =  nla_nest_start_noflag(skb,
3615                                           OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
3616         if (!ac_start) {
3617                 err = -EMSGSIZE;
3618                 goto out;
3619         }
3620
3621         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3622         if (err) {
3623                 nla_nest_cancel(skb, ac_start);
3624                 goto out;
3625         } else {
3626                 nla_nest_end(skb, ac_start);
3627         }
3628
3629         nla_nest_end(skb, start);
3630         return 0;
3631
3632 out:
3633         nla_nest_cancel(skb, start);
3634         return err;
3635 }
3636
3637 static int dec_ttl_action_to_attr(const struct nlattr *attr,
3638                                   struct sk_buff *skb)
3639 {
3640         struct nlattr *start, *action_start;
3641         const struct nlattr *a;
3642         int err = 0, rem;
3643
3644         start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
3645         if (!start)
3646                 return -EMSGSIZE;
3647
3648         nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
3649                 switch (nla_type(a)) {
3650                 case OVS_DEC_TTL_ATTR_ACTION:
3651
3652                         action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
3653                         if (!action_start) {
3654                                 err = -EMSGSIZE;
3655                                 goto out;
3656                         }
3657
3658                         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3659                         if (err)
3660                                 goto out;
3661
3662                         nla_nest_end(skb, action_start);
3663                         break;
3664
3665                 default:
3666                         /* Ignore all other option to be future compatible */
3667                         break;
3668                 }
3669         }
3670
3671         nla_nest_end(skb, start);
3672         return 0;
3673
3674 out:
3675         nla_nest_cancel(skb, start);
3676         return err;
3677 }
3678
3679 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3680 {
3681         const struct nlattr *ovs_key = nla_data(a);
3682         int key_type = nla_type(ovs_key);
3683         struct nlattr *start;
3684         int err;
3685
3686         switch (key_type) {
3687         case OVS_KEY_ATTR_TUNNEL_INFO: {
3688                 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3689                 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3690
3691                 start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
3692                 if (!start)
3693                         return -EMSGSIZE;
3694
3695                 err =  ip_tun_to_nlattr(skb, &tun_info->key,
3696                                         ip_tunnel_info_opts(tun_info),
3697                                         tun_info->options_len,
3698                                         ip_tunnel_info_af(tun_info), tun_info->mode);
3699                 if (err)
3700                         return err;
3701                 nla_nest_end(skb, start);
3702                 break;
3703         }
3704         default:
3705                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3706                         return -EMSGSIZE;
3707                 break;
3708         }
3709
3710         return 0;
3711 }
3712
3713 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3714                                                 struct sk_buff *skb)
3715 {
3716         const struct nlattr *ovs_key = nla_data(a);
3717         struct nlattr *nla;
3718         size_t key_len = nla_len(ovs_key) / 2;
3719
3720         /* Revert the conversion we did from a non-masked set action to
3721          * masked set action.
3722          */
3723         nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
3724         if (!nla)
3725                 return -EMSGSIZE;
3726
3727         if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3728                 return -EMSGSIZE;
3729
3730         nla_nest_end(skb, nla);
3731         return 0;
3732 }
3733
3734 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3735 {
3736         const struct nlattr *a;
3737         int rem, err;
3738
3739         nla_for_each_attr(a, attr, len, rem) {
3740                 int type = nla_type(a);
3741
3742                 switch (type) {
3743                 case OVS_ACTION_ATTR_SET:
3744                         err = set_action_to_attr(a, skb);
3745                         if (err)
3746                                 return err;
3747                         break;
3748
3749                 case OVS_ACTION_ATTR_SET_TO_MASKED:
3750                         err = masked_set_action_to_set_action_attr(a, skb);
3751                         if (err)
3752                                 return err;
3753                         break;
3754
3755                 case OVS_ACTION_ATTR_SAMPLE:
3756                         err = sample_action_to_attr(a, skb);
3757                         if (err)
3758                                 return err;
3759                         break;
3760
3761                 case OVS_ACTION_ATTR_CT:
3762                         err = ovs_ct_action_to_attr(nla_data(a), skb);
3763                         if (err)
3764                                 return err;
3765                         break;
3766
3767                 case OVS_ACTION_ATTR_CLONE:
3768                         err = clone_action_to_attr(a, skb);
3769                         if (err)
3770                                 return err;
3771                         break;
3772
3773                 case OVS_ACTION_ATTR_CHECK_PKT_LEN:
3774                         err = check_pkt_len_action_to_attr(a, skb);
3775                         if (err)
3776                                 return err;
3777                         break;
3778
3779                 case OVS_ACTION_ATTR_DEC_TTL:
3780                         err = dec_ttl_action_to_attr(a, skb);
3781                         if (err)
3782                                 return err;
3783                         break;
3784
3785                 default:
3786                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
3787                                 return -EMSGSIZE;
3788                         break;
3789                 }
3790         }
3791
3792         return 0;
3793 }