1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
3 * Patrick Schaaf <bof@bof.de>
4 * Martin Josefsson <gandalf@wlug.westbo.se>
5 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
11 #include <linux/ipv6.h>
12 #include <linux/netlink.h>
13 #include <linux/netfilter.h>
14 #include <linux/netfilter/x_tables.h>
15 #include <linux/stringify.h>
16 #include <linux/vmalloc.h>
17 #include <net/netlink.h>
18 #include <uapi/linux/netfilter/ipset/ip_set.h>
20 #define _IP_SET_MODULE_DESC(a, b, c) \
21 MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
22 #define IP_SET_MODULE_DESC(a, b, c) \
23 _IP_SET_MODULE_DESC(a, __stringify(b), __stringify(c))
27 IPSET_TYPE_IP_FLAG = 0,
28 IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG),
29 IPSET_TYPE_PORT_FLAG = 1,
30 IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG),
31 IPSET_TYPE_MAC_FLAG = 2,
32 IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG),
33 IPSET_TYPE_IP2_FLAG = 3,
34 IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG),
35 IPSET_TYPE_NAME_FLAG = 4,
36 IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
37 IPSET_TYPE_IFACE_FLAG = 5,
38 IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
39 IPSET_TYPE_MARK_FLAG = 6,
40 IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
41 IPSET_TYPE_NOMATCH_FLAG = 7,
42 IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
43 /* Strictly speaking not a feature, but a flag for dumping:
44 * this settype must be dumped last */
45 IPSET_DUMP_LAST_FLAG = 8,
46 IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
50 enum ip_set_extension {
51 IPSET_EXT_BIT_TIMEOUT = 0,
52 IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
53 IPSET_EXT_BIT_COUNTER = 1,
54 IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
55 IPSET_EXT_BIT_COMMENT = 2,
56 IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
57 IPSET_EXT_BIT_SKBINFO = 3,
58 IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
59 /* Mark set with an extension which needs to call destroy */
60 IPSET_EXT_BIT_DESTROY = 7,
61 IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
64 #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
65 #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
66 #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
67 #define SET_WITH_SKBINFO(s) ((s)->extensions & IPSET_EXT_SKBINFO)
68 #define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
70 /* Extension id, in size order */
72 IPSET_EXT_ID_COUNTER = 0,
82 struct ip_set_ext_type {
83 /* Destroy extension private data (can be NULL) */
84 void (*destroy)(struct ip_set *set, void *ext);
85 enum ip_set_extension type;
86 enum ipset_cadt_flags flag;
87 /* Size and minimal alignment */
92 extern const struct ip_set_ext_type ip_set_extensions[];
94 struct ip_set_counter {
99 struct ip_set_comment_rcu {
104 struct ip_set_comment {
105 struct ip_set_comment_rcu __rcu *c;
108 struct ip_set_skbinfo {
117 struct ip_set_skbinfo skbinfo;
129 #define ext_timeout(e, s) \
130 ((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]))
131 #define ext_counter(e, s) \
132 ((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]))
133 #define ext_comment(e, s) \
134 ((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]))
135 #define ext_skbinfo(e, s) \
136 ((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]))
138 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
139 const struct ip_set_ext *ext,
140 struct ip_set_ext *mext, u32 cmdflags);
142 /* Kernel API function options */
143 struct ip_set_adt_opt {
144 u8 family; /* Actual protocol family */
145 u8 dim; /* Dimension of match/target */
146 u8 flags; /* Direction and negation flags */
147 u32 cmdflags; /* Command-like flags */
148 struct ip_set_ext ext; /* Extensions */
151 /* Set type, variant-specific part */
152 struct ip_set_type_variant {
153 /* Kernelspace: test/add/del entries
154 * returns negative error code,
155 * zero for no match/success to add/delete
156 * positive for matching element */
157 int (*kadt)(struct ip_set *set, const struct sk_buff *skb,
158 const struct xt_action_param *par,
159 enum ipset_adt adt, struct ip_set_adt_opt *opt);
161 /* Userspace: test/add/del entries
162 * returns negative error code,
163 * zero for no match/success to add/delete
164 * positive for matching element */
165 int (*uadt)(struct ip_set *set, struct nlattr *tb[],
166 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
168 /* Low level add/del/test functions */
169 ipset_adtfn adt[IPSET_ADT_MAX];
171 /* When adding entries and set is full, try to resize the set */
172 int (*resize)(struct ip_set *set, bool retried);
173 /* Destroy the set */
174 void (*destroy)(struct ip_set *set);
175 /* Flush the elements */
176 void (*flush)(struct ip_set *set);
177 /* Expire entries before listing */
178 void (*expire)(struct ip_set *set);
179 /* List set header data */
180 int (*head)(struct ip_set *set, struct sk_buff *skb);
182 int (*list)(const struct ip_set *set, struct sk_buff *skb,
183 struct netlink_callback *cb);
184 /* Keep listing private when resizing runs parallel */
185 void (*uref)(struct ip_set *set, struct netlink_callback *cb,
188 /* Return true if "b" set is the same as "a"
189 * according to the create set parameters */
190 bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
191 /* Region-locking is used */
195 struct ip_set_region {
196 spinlock_t lock; /* Region lock */
197 size_t ext_size; /* Size of the dynamic extensions */
198 u32 elements; /* Number of elements vs timeout */
201 /* The core set type structure */
203 struct list_head list;
206 char name[IPSET_MAXNAMELEN];
207 /* Protocol version */
209 /* Set type dimension */
212 * Supported family: may be NFPROTO_UNSPEC for both
213 * NFPROTO_IPV4/NFPROTO_IPV6.
217 u8 revision_min, revision_max;
218 /* Set features to control swapping */
222 int (*create)(struct net *net, struct ip_set *set,
223 struct nlattr *tb[], u32 flags);
225 /* Attribute policies */
226 const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1];
227 const struct nla_policy adt_policy[IPSET_ATTR_ADT_MAX + 1];
229 /* Set this to THIS_MODULE if you are a module, otherwise NULL */
233 /* register and unregister set type */
234 extern int ip_set_type_register(struct ip_set_type *set_type);
235 extern void ip_set_type_unregister(struct ip_set_type *set_type);
237 /* A generic IP set */
239 /* The name of the set */
240 char name[IPSET_MAXNAMELEN];
241 /* Lock protecting the set data */
243 /* References to the set */
245 /* References to the set for netlink events like dump,
246 * ref can be swapped out by ip_set_swap
249 /* The core set type */
250 struct ip_set_type *type;
251 /* The type variant doing the real job */
252 const struct ip_set_type_variant *variant;
253 /* The actual INET family of the set */
255 /* The type revision */
261 /* Default timeout value, if enabled */
263 /* Number of elements (vs timeout) */
265 /* Size of the dynamic extensions (vs timeout) */
267 /* Element data size */
269 /* Offsets to extensions in elements */
270 size_t offset[IPSET_EXT_ID_MAX];
271 /* The type specific data */
276 ip_set_ext_destroy(struct ip_set *set, void *data)
278 /* Check that the extension is enabled for the set and
279 * call it's destroy function for its extension part in data.
281 if (SET_WITH_COMMENT(set))
282 ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
283 set, ext_comment(data, set));
287 ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
291 if (SET_WITH_TIMEOUT(set))
292 if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
293 htonl(set->timeout))))
295 if (SET_WITH_COUNTER(set))
296 cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
297 if (SET_WITH_COMMENT(set))
298 cadt_flags |= IPSET_FLAG_WITH_COMMENT;
299 if (SET_WITH_SKBINFO(set))
300 cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
301 if (SET_WITH_FORCEADD(set))
302 cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
306 return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
309 /* Netlink CB args */
311 IPSET_CB_NET = 0, /* net namespace */
312 IPSET_CB_PROTO, /* ipset protocol */
313 IPSET_CB_DUMP, /* dump single set/all sets */
314 IPSET_CB_INDEX, /* set index */
315 IPSET_CB_PRIVATE, /* set private data */
316 IPSET_CB_ARG0, /* type specific */
319 /* register and unregister set references */
320 extern ip_set_id_t ip_set_get_byname(struct net *net,
321 const char *name, struct ip_set **set);
322 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
323 extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
324 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
325 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
327 /* API for iptables set match, and SET target */
329 extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb,
330 const struct xt_action_param *par,
331 struct ip_set_adt_opt *opt);
332 extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb,
333 const struct xt_action_param *par,
334 struct ip_set_adt_opt *opt);
335 extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb,
336 const struct xt_action_param *par,
337 struct ip_set_adt_opt *opt);
339 /* Utility functions */
340 extern void *ip_set_alloc(size_t size);
341 extern void ip_set_free(void *members);
342 extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr);
343 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
344 extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
345 size_t len, size_t align);
346 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
347 struct ip_set_ext *ext);
348 extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
349 const void *e, bool active);
350 extern bool ip_set_match_extensions(struct ip_set *set,
351 const struct ip_set_ext *ext,
352 struct ip_set_ext *mext,
353 u32 flags, void *data);
356 ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr)
359 int ret = ip_set_get_ipaddr4(nla, &ip);
367 /* Ignore IPSET_ERR_EXIST errors if asked to do so? */
369 ip_set_eexist(int ret, u32 flags)
371 return ret == -IPSET_ERR_EXIST && (flags & IPSET_FLAG_EXIST);
374 /* Match elements marked with nomatch */
376 ip_set_enomatch(int ret, u32 flags, enum ipset_adt adt, struct ip_set *set)
378 return adt == IPSET_TEST &&
379 (set->type->features & IPSET_TYPE_NOMATCH) &&
380 ((flags >> 16) & IPSET_FLAG_NOMATCH) &&
381 (ret > 0 || ret == -ENOTEMPTY);
384 /* Check the NLA_F_NET_BYTEORDER flag */
386 ip_set_attr_netorder(struct nlattr *tb[], int type)
388 return tb[type] && (tb[type]->nla_type & NLA_F_NET_BYTEORDER);
392 ip_set_optattr_netorder(struct nlattr *tb[], int type)
394 return !tb[type] || (tb[type]->nla_type & NLA_F_NET_BYTEORDER);
397 /* Useful converters */
399 ip_set_get_h32(const struct nlattr *attr)
401 return ntohl(nla_get_be32(attr));
405 ip_set_get_h16(const struct nlattr *attr)
407 return ntohs(nla_get_be16(attr));
410 static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr)
412 struct nlattr *__nested = nla_nest_start(skb, type);
417 ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr);
419 nla_nest_end(skb, __nested);
423 static inline int nla_put_ipaddr6(struct sk_buff *skb, int type,
424 const struct in6_addr *ipaddrptr)
426 struct nlattr *__nested = nla_nest_start(skb, type);
431 ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr);
433 nla_nest_end(skb, __nested);
437 /* Get address from skbuff */
439 ip4addr(const struct sk_buff *skb, bool src)
441 return src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
445 ip4addrptr(const struct sk_buff *skb, bool src, __be32 *addr)
447 *addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
451 ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
453 memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr,
457 /* How often should the gc be run by default */
458 #define IPSET_GC_TIME (3 * 60)
460 /* Timeout period depending on the timeout value of the given set */
461 #define IPSET_GC_PERIOD(timeout) \
462 ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1)
464 /* Entry is set with no timeout value */
465 #define IPSET_ELEM_PERMANENT 0
467 /* Set is defined with timeout support: timeout value may be 0 */
468 #define IPSET_NO_TIMEOUT UINT_MAX
470 /* Max timeout value, see msecs_to_jiffies() in jiffies.h */
471 #define IPSET_MAX_TIMEOUT (UINT_MAX >> 1)/MSEC_PER_SEC
473 #define ip_set_adt_opt_timeout(opt, set) \
474 ((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
476 static inline unsigned int
477 ip_set_timeout_uget(struct nlattr *tb)
479 unsigned int timeout = ip_set_get_h32(tb);
481 /* Normalize to fit into jiffies */
482 if (timeout > IPSET_MAX_TIMEOUT)
483 timeout = IPSET_MAX_TIMEOUT;
489 ip_set_timeout_expired(const unsigned long *t)
491 return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
495 ip_set_timeout_set(unsigned long *timeout, u32 value)
500 *timeout = IPSET_ELEM_PERMANENT;
504 t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
505 if (t == IPSET_ELEM_PERMANENT)
512 ip_set_timeout_get(const unsigned long *timeout)
516 if (*timeout == IPSET_ELEM_PERMANENT)
519 t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
520 /* Zero value in userspace means no timeout */
521 return t == 0 ? 1 : t;
525 ip_set_comment_uget(struct nlattr *tb)
530 /* Called from uadd only, protected by the set spinlock.
531 * The kadt functions don't use the comment extensions in any way.
534 ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
535 const struct ip_set_ext *ext)
537 struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
538 size_t len = ext->comment ? strlen(ext->comment) : 0;
541 set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
543 rcu_assign_pointer(comment->c, NULL);
547 if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
548 len = IPSET_MAX_COMMENT_SIZE;
549 c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
552 strlcpy(c->str, ext->comment, len + 1);
553 set->ext_size += sizeof(*c) + strlen(c->str) + 1;
554 rcu_assign_pointer(comment->c, c);
557 /* Used only when dumping a set, protected by rcu_read_lock() */
559 ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
561 struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
565 return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
568 /* Called from uadd/udel, flush or the garbage collectors protected
569 * by the set spinlock.
570 * Called when the set is destroyed and when there can't be any user
571 * of the set data anymore.
574 ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
576 struct ip_set_comment_rcu *c;
578 c = rcu_dereference_protected(comment->c, 1);
581 set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
583 rcu_assign_pointer(comment->c, NULL);
587 ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
589 atomic64_add((long long)bytes, &(counter)->bytes);
593 ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
595 atomic64_add((long long)packets, &(counter)->packets);
599 ip_set_get_bytes(const struct ip_set_counter *counter)
601 return (u64)atomic64_read(&(counter)->bytes);
605 ip_set_get_packets(const struct ip_set_counter *counter)
607 return (u64)atomic64_read(&(counter)->packets);
611 ip_set_match_counter(u64 counter, u64 match, u8 op)
614 case IPSET_COUNTER_NONE:
616 case IPSET_COUNTER_EQ:
617 return counter == match;
618 case IPSET_COUNTER_NE:
619 return counter != match;
620 case IPSET_COUNTER_LT:
621 return counter < match;
622 case IPSET_COUNTER_GT:
623 return counter > match;
629 ip_set_update_counter(struct ip_set_counter *counter,
630 const struct ip_set_ext *ext, u32 flags)
632 if (ext->packets != ULLONG_MAX &&
633 !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
634 ip_set_add_bytes(ext->bytes, counter);
635 ip_set_add_packets(ext->packets, counter);
640 ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
642 return nla_put_net64(skb, IPSET_ATTR_BYTES,
643 cpu_to_be64(ip_set_get_bytes(counter)),
645 nla_put_net64(skb, IPSET_ATTR_PACKETS,
646 cpu_to_be64(ip_set_get_packets(counter)),
651 ip_set_init_counter(struct ip_set_counter *counter,
652 const struct ip_set_ext *ext)
654 if (ext->bytes != ULLONG_MAX)
655 atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
656 if (ext->packets != ULLONG_MAX)
657 atomic64_set(&(counter)->packets, (long long)(ext->packets));
661 ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
662 const struct ip_set_ext *ext,
663 struct ip_set_ext *mext, u32 flags)
665 mext->skbinfo = *skbinfo;
669 ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
671 /* Send nonzero parameters only */
672 return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
673 nla_put_net64(skb, IPSET_ATTR_SKBMARK,
674 cpu_to_be64((u64)skbinfo->skbmark << 32 |
675 skbinfo->skbmarkmask),
678 nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
679 cpu_to_be32(skbinfo->skbprio))) ||
680 (skbinfo->skbqueue &&
681 nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
682 cpu_to_be16(skbinfo->skbqueue)));
686 ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
687 const struct ip_set_ext *ext)
689 *skbinfo = ext->skbinfo;
692 #define IP_SET_INIT_KEXT(skb, opt, set) \
693 { .bytes = (skb)->len, .packets = 1, .target = true,\
694 .timeout = ip_set_adt_opt_timeout(opt, set) }
696 #define IP_SET_INIT_UEXT(set) \
697 { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
698 .timeout = (set)->timeout }
700 #define IPSET_CONCAT(a, b) a##b
701 #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)
703 #endif /*_IP_SET_H */