2 * net/core/fib_rules.c Generic Routing Rules
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation, version 2.
8 * Authors: Thomas Graf <tgraf@suug.ch>
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/slab.h>
14 #include <linux/list.h>
15 #include <linux/module.h>
16 #include <net/net_namespace.h>
18 #include <net/fib_rules.h>
19 #include <net/ip_tunnels.h>
21 int fib_default_rule_add(struct fib_rules_ops *ops,
22 u32 pref, u32 table, u32 flags)
26 r = kzalloc(ops->rule_size, GFP_KERNEL);
30 atomic_set(&r->refcnt, 1);
31 r->action = FR_ACT_TO_TBL;
35 r->fr_net = ops->fro_net;
37 r->suppress_prefixlen = -1;
38 r->suppress_ifgroup = -1;
40 /* The lock is not required here, the list in unreacheable
41 * at the moment this function is called */
42 list_add_tail(&r->list, &ops->rules_list);
45 EXPORT_SYMBOL(fib_default_rule_add);
47 static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
49 struct list_head *pos;
50 struct fib_rule *rule;
52 if (!list_empty(&ops->rules_list)) {
53 pos = ops->rules_list.next;
54 if (pos->next != &ops->rules_list) {
55 rule = list_entry(pos->next, struct fib_rule, list);
57 return rule->pref - 1;
64 static void notify_rule_change(int event, struct fib_rule *rule,
65 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
68 static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
70 struct fib_rules_ops *ops;
73 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
74 if (ops->family == family) {
75 if (!try_module_get(ops->owner))
86 static void rules_ops_put(struct fib_rules_ops *ops)
89 module_put(ops->owner);
92 static void flush_route_cache(struct fib_rules_ops *ops)
95 ops->flush_cache(ops);
98 static int __fib_rules_register(struct fib_rules_ops *ops)
101 struct fib_rules_ops *o;
106 if (ops->rule_size < sizeof(struct fib_rule))
109 if (ops->match == NULL || ops->configure == NULL ||
110 ops->compare == NULL || ops->fill == NULL ||
114 spin_lock(&net->rules_mod_lock);
115 list_for_each_entry(o, &net->rules_ops, list)
116 if (ops->family == o->family)
119 list_add_tail_rcu(&ops->list, &net->rules_ops);
122 spin_unlock(&net->rules_mod_lock);
127 struct fib_rules_ops *
128 fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
130 struct fib_rules_ops *ops;
133 ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
135 return ERR_PTR(-ENOMEM);
137 INIT_LIST_HEAD(&ops->rules_list);
140 err = __fib_rules_register(ops);
148 EXPORT_SYMBOL_GPL(fib_rules_register);
150 static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
152 struct fib_rule *rule, *tmp;
154 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
155 list_del_rcu(&rule->list);
162 void fib_rules_unregister(struct fib_rules_ops *ops)
164 struct net *net = ops->fro_net;
166 spin_lock(&net->rules_mod_lock);
167 list_del_rcu(&ops->list);
168 spin_unlock(&net->rules_mod_lock);
170 fib_rules_cleanup_ops(ops);
173 EXPORT_SYMBOL_GPL(fib_rules_unregister);
175 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
176 struct flowi *fl, int flags,
177 struct fib_lookup_arg *arg)
181 if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
184 if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
187 if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
190 if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
193 if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
196 ret = ops->match(rule, fl, flags);
198 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
201 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
202 int flags, struct fib_lookup_arg *arg)
204 struct fib_rule *rule;
209 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
211 if (!fib_rule_match(rule, ops, fl, flags, arg))
214 if (rule->action == FR_ACT_GOTO) {
215 struct fib_rule *target;
217 target = rcu_dereference(rule->ctarget);
218 if (target == NULL) {
224 } else if (rule->action == FR_ACT_NOP)
227 err = ops->action(rule, fl, flags, arg);
229 if (!err && ops->suppress && ops->suppress(rule, arg))
232 if (err != -EAGAIN) {
233 if ((arg->flags & FIB_LOOKUP_NOREF) ||
234 likely(atomic_inc_not_zero(&rule->refcnt))) {
248 EXPORT_SYMBOL_GPL(fib_rules_lookup);
250 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
251 struct fib_rules_ops *ops)
256 if (tb[FRA_SRC] == NULL ||
257 frh->src_len > (ops->addr_size * 8) ||
258 nla_len(tb[FRA_SRC]) != ops->addr_size)
262 if (tb[FRA_DST] == NULL ||
263 frh->dst_len > (ops->addr_size * 8) ||
264 nla_len(tb[FRA_DST]) != ops->addr_size)
272 static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
273 struct nlattr **tb, struct fib_rule *rule)
277 list_for_each_entry(r, &ops->rules_list, list) {
278 if (r->action != rule->action)
281 if (r->table != rule->table)
284 if (r->pref != rule->pref)
287 if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
290 if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
293 if (r->mark != rule->mark)
296 if (r->mark_mask != rule->mark_mask)
299 if (r->tun_id != rule->tun_id)
302 if (r->fr_net != rule->fr_net)
305 if (r->l3mdev != rule->l3mdev)
308 if (!ops->compare(r, frh, tb))
315 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
317 struct net *net = sock_net(skb->sk);
318 struct fib_rule_hdr *frh = nlmsg_data(nlh);
319 struct fib_rules_ops *ops = NULL;
320 struct fib_rule *rule, *r, *last = NULL;
321 struct nlattr *tb[FRA_MAX+1];
322 int err = -EINVAL, unresolved = 0;
324 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
327 ops = lookup_rules_ops(net, frh->family);
333 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
337 err = validate_rulemsg(frh, tb, ops);
341 rule = kzalloc(ops->rule_size, GFP_KERNEL);
348 rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
349 : fib_default_rule_pref(ops);
351 if (tb[FRA_IIFNAME]) {
352 struct net_device *dev;
355 nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
356 dev = __dev_get_by_name(net, rule->iifname);
358 rule->iifindex = dev->ifindex;
361 if (tb[FRA_OIFNAME]) {
362 struct net_device *dev;
365 nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
366 dev = __dev_get_by_name(net, rule->oifname);
368 rule->oifindex = dev->ifindex;
371 if (tb[FRA_FWMARK]) {
372 rule->mark = nla_get_u32(tb[FRA_FWMARK]);
374 /* compatibility: if the mark value is non-zero all bits
375 * are compared unless a mask is explicitly specified.
377 rule->mark_mask = 0xFFFFFFFF;
381 rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
384 rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
386 if (tb[FRA_L3MDEV]) {
387 #ifdef CONFIG_NET_L3_MASTER_DEV
388 rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
389 if (rule->l3mdev != 1)
394 rule->action = frh->action;
395 rule->flags = frh->flags;
396 rule->table = frh_get_table(frh, tb);
397 if (tb[FRA_SUPPRESS_PREFIXLEN])
398 rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
400 rule->suppress_prefixlen = -1;
402 if (tb[FRA_SUPPRESS_IFGROUP])
403 rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
405 rule->suppress_ifgroup = -1;
409 if (rule->action != FR_ACT_GOTO)
412 rule->target = nla_get_u32(tb[FRA_GOTO]);
413 /* Backward jumps are prohibited to avoid endless loops */
414 if (rule->target <= rule->pref)
417 list_for_each_entry(r, &ops->rules_list, list) {
418 if (r->pref == rule->target) {
419 RCU_INIT_POINTER(rule->ctarget, r);
424 if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
426 } else if (rule->action == FR_ACT_GOTO)
429 if (rule->l3mdev && rule->table)
432 if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
433 rule_exists(ops, frh, tb, rule)) {
438 err = ops->configure(rule, skb, frh, tb);
442 list_for_each_entry(r, &ops->rules_list, list) {
443 if (r->pref > rule->pref)
451 list_add_rcu(&rule->list, &last->list);
453 list_add_rcu(&rule->list, &ops->rules_list);
455 if (ops->unresolved_rules) {
457 * There are unresolved goto rules in the list, check if
458 * any of them are pointing to this new rule.
460 list_for_each_entry(r, &ops->rules_list, list) {
461 if (r->action == FR_ACT_GOTO &&
462 r->target == rule->pref &&
463 rtnl_dereference(r->ctarget) == NULL) {
464 rcu_assign_pointer(r->ctarget, rule);
465 if (--ops->unresolved_rules == 0)
471 if (rule->action == FR_ACT_GOTO)
472 ops->nr_goto_rules++;
475 ops->unresolved_rules++;
478 ip_tunnel_need_metadata();
480 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
481 flush_route_cache(ops);
491 EXPORT_SYMBOL_GPL(fib_nl_newrule);
493 int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
495 struct net *net = sock_net(skb->sk);
496 struct fib_rule_hdr *frh = nlmsg_data(nlh);
497 struct fib_rules_ops *ops = NULL;
498 struct fib_rule *rule, *tmp;
499 struct nlattr *tb[FRA_MAX+1];
502 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
505 ops = lookup_rules_ops(net, frh->family);
511 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
515 err = validate_rulemsg(frh, tb, ops);
519 list_for_each_entry(rule, &ops->rules_list, list) {
520 if (frh->action && (frh->action != rule->action))
523 if (frh_get_table(frh, tb) &&
524 (frh_get_table(frh, tb) != rule->table))
527 if (tb[FRA_PRIORITY] &&
528 (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
531 if (tb[FRA_IIFNAME] &&
532 nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
535 if (tb[FRA_OIFNAME] &&
536 nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
539 if (tb[FRA_FWMARK] &&
540 (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
543 if (tb[FRA_FWMASK] &&
544 (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
547 if (tb[FRA_TUN_ID] &&
548 (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
551 if (tb[FRA_L3MDEV] &&
552 (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
555 if (!ops->compare(rule, frh, tb))
558 if (rule->flags & FIB_RULE_PERMANENT) {
564 err = ops->delete(rule);
570 ip_tunnel_unneed_metadata();
572 list_del_rcu(&rule->list);
574 if (rule->action == FR_ACT_GOTO) {
575 ops->nr_goto_rules--;
576 if (rtnl_dereference(rule->ctarget) == NULL)
577 ops->unresolved_rules--;
581 * Check if this rule is a target to any of them. If so,
582 * disable them. As this operation is eventually very
583 * expensive, it is only performed if goto rules have
584 * actually been added.
586 if (ops->nr_goto_rules > 0) {
587 list_for_each_entry(tmp, &ops->rules_list, list) {
588 if (rtnl_dereference(tmp->ctarget) == rule) {
589 RCU_INIT_POINTER(tmp->ctarget, NULL);
590 ops->unresolved_rules++;
595 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
596 NETLINK_CB(skb).portid);
598 flush_route_cache(ops);
608 EXPORT_SYMBOL_GPL(fib_nl_delrule);
610 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
611 struct fib_rule *rule)
613 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
614 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
615 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
616 + nla_total_size(4) /* FRA_PRIORITY */
617 + nla_total_size(4) /* FRA_TABLE */
618 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
619 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
620 + nla_total_size(4) /* FRA_FWMARK */
621 + nla_total_size(4) /* FRA_FWMASK */
622 + nla_total_size_64bit(8); /* FRA_TUN_ID */
624 if (ops->nlmsg_payload)
625 payload += ops->nlmsg_payload(rule);
630 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
631 u32 pid, u32 seq, int type, int flags,
632 struct fib_rules_ops *ops)
634 struct nlmsghdr *nlh;
635 struct fib_rule_hdr *frh;
637 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
641 frh = nlmsg_data(nlh);
642 frh->family = ops->family;
643 frh->table = rule->table;
644 if (nla_put_u32(skb, FRA_TABLE, rule->table))
645 goto nla_put_failure;
646 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
647 goto nla_put_failure;
650 frh->action = rule->action;
651 frh->flags = rule->flags;
653 if (rule->action == FR_ACT_GOTO &&
654 rcu_access_pointer(rule->ctarget) == NULL)
655 frh->flags |= FIB_RULE_UNRESOLVED;
657 if (rule->iifname[0]) {
658 if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
659 goto nla_put_failure;
660 if (rule->iifindex == -1)
661 frh->flags |= FIB_RULE_IIF_DETACHED;
664 if (rule->oifname[0]) {
665 if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
666 goto nla_put_failure;
667 if (rule->oifindex == -1)
668 frh->flags |= FIB_RULE_OIF_DETACHED;
672 nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
674 nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
675 ((rule->mark_mask || rule->mark) &&
676 nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
678 nla_put_u32(skb, FRA_GOTO, rule->target)) ||
680 nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
682 nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
683 goto nla_put_failure;
685 if (rule->suppress_ifgroup != -1) {
686 if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
687 goto nla_put_failure;
690 if (ops->fill(rule, skb, frh) < 0)
691 goto nla_put_failure;
697 nlmsg_cancel(skb, nlh);
701 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
702 struct fib_rules_ops *ops)
705 struct fib_rule *rule;
709 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
710 if (idx < cb->args[1])
713 err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
714 cb->nlh->nlmsg_seq, RTM_NEWRULE,
728 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
730 struct net *net = sock_net(skb->sk);
731 struct fib_rules_ops *ops;
734 family = rtnl_msg_family(cb->nlh);
735 if (family != AF_UNSPEC) {
736 /* Protocol specific dump request */
737 ops = lookup_rules_ops(net, family);
739 return -EAFNOSUPPORT;
741 dump_rules(skb, cb, ops);
747 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
748 if (idx < cb->args[0] || !try_module_get(ops->owner))
751 if (dump_rules(skb, cb, ops) < 0)
764 static void notify_rule_change(int event, struct fib_rule *rule,
765 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
773 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
777 err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
779 /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
780 WARN_ON(err == -EMSGSIZE);
785 rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
789 rtnl_set_sk_err(net, ops->nlgroup, err);
792 static void attach_rules(struct list_head *rules, struct net_device *dev)
794 struct fib_rule *rule;
796 list_for_each_entry(rule, rules, list) {
797 if (rule->iifindex == -1 &&
798 strcmp(dev->name, rule->iifname) == 0)
799 rule->iifindex = dev->ifindex;
800 if (rule->oifindex == -1 &&
801 strcmp(dev->name, rule->oifname) == 0)
802 rule->oifindex = dev->ifindex;
806 static void detach_rules(struct list_head *rules, struct net_device *dev)
808 struct fib_rule *rule;
810 list_for_each_entry(rule, rules, list) {
811 if (rule->iifindex == dev->ifindex)
813 if (rule->oifindex == dev->ifindex)
819 static int fib_rules_event(struct notifier_block *this, unsigned long event,
822 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
823 struct net *net = dev_net(dev);
824 struct fib_rules_ops *ops;
829 case NETDEV_REGISTER:
830 list_for_each_entry(ops, &net->rules_ops, list)
831 attach_rules(&ops->rules_list, dev);
834 case NETDEV_CHANGENAME:
835 list_for_each_entry(ops, &net->rules_ops, list) {
836 detach_rules(&ops->rules_list, dev);
837 attach_rules(&ops->rules_list, dev);
841 case NETDEV_UNREGISTER:
842 list_for_each_entry(ops, &net->rules_ops, list)
843 detach_rules(&ops->rules_list, dev);
850 static struct notifier_block fib_rules_notifier = {
851 .notifier_call = fib_rules_event,
854 static int __net_init fib_rules_net_init(struct net *net)
856 INIT_LIST_HEAD(&net->rules_ops);
857 spin_lock_init(&net->rules_mod_lock);
861 static struct pernet_operations fib_rules_net_ops = {
862 .init = fib_rules_net_init,
865 static int __init fib_rules_init(void)
868 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
869 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
870 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
872 err = register_pernet_subsys(&fib_rules_net_ops);
876 err = register_netdevice_notifier(&fib_rules_notifier);
878 goto fail_unregister;
883 unregister_pernet_subsys(&fib_rules_net_ops);
885 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
886 rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
887 rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
891 subsys_initcall(fib_rules_init);