1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/types.h>
4 #include <linux/atomic.h>
5 #include <linux/inetdevice.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <linux/netfilter_ipv6.h>
10 #include <net/netfilter/nf_nat_masquerade.h>
12 struct masq_dev_work {
13 struct work_struct work;
15 union nf_inet_addr addr;
17 int (*iter)(struct nf_conn *i, void *data);
20 #define MAX_MASQ_WORKER_COUNT 16
22 static DEFINE_MUTEX(masq_mutex);
23 static unsigned int masq_refcnt __read_mostly;
24 static atomic_t masq_worker_count __read_mostly;
27 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
28 const struct nf_nat_range2 *range,
29 const struct net_device *out)
32 struct nf_conn_nat *nat;
33 enum ip_conntrack_info ctinfo;
34 struct nf_nat_range2 newrange;
35 const struct rtable *rt;
38 WARN_ON(hooknum != NF_INET_POST_ROUTING);
40 ct = nf_ct_get(skb, &ctinfo);
42 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
43 ctinfo == IP_CT_RELATED_REPLY)));
45 /* Source address is 0.0.0.0 - locally generated packet that is
46 * probably not supposed to be masqueraded.
48 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
52 nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
53 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
55 pr_info("%s ate my IP address\n", out->name);
59 nat = nf_ct_nat_ext_add(ct);
61 nat->masq_index = out->ifindex;
63 /* Transfer from original range. */
64 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
65 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
66 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
67 newrange.min_addr.ip = newsrc;
68 newrange.max_addr.ip = newsrc;
69 newrange.min_proto = range->min_proto;
70 newrange.max_proto = range->max_proto;
72 /* Hand modified range to generic setup. */
73 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
75 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
77 static void iterate_cleanup_work(struct work_struct *work)
79 struct masq_dev_work *w;
81 w = container_of(work, struct masq_dev_work, work);
83 nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
87 atomic_dec(&masq_worker_count);
88 module_put(THIS_MODULE);
91 /* Iterate conntrack table in the background and remove conntrack entries
92 * that use the device/address being removed.
94 * In case too many work items have been queued already or memory allocation
95 * fails iteration is skipped, conntrack entries will time out eventually.
97 static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
99 int (*iter)(struct nf_conn *i, void *data),
102 struct masq_dev_work *w;
104 if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
107 net = maybe_get_net(net);
111 if (!try_module_get(THIS_MODULE))
114 w = kzalloc(sizeof(*w), gfp_flags);
116 /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
117 atomic_inc(&masq_worker_count);
119 INIT_WORK(&w->work, iterate_cleanup_work);
120 w->ifindex = ifindex;
125 schedule_work(&w->work);
129 module_put(THIS_MODULE);
134 static int device_cmp(struct nf_conn *i, void *arg)
136 const struct nf_conn_nat *nat = nfct_nat(i);
137 const struct masq_dev_work *w = arg;
141 return nat->masq_index == w->ifindex;
144 static int masq_device_event(struct notifier_block *this,
148 const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
149 struct net *net = dev_net(dev);
151 if (event == NETDEV_DOWN) {
152 /* Device was downed. Search entire table for
153 * conntracks which were associated with that device,
157 nf_nat_masq_schedule(net, NULL, dev->ifindex,
158 device_cmp, GFP_KERNEL);
164 static int inet_cmp(struct nf_conn *ct, void *ptr)
166 struct nf_conntrack_tuple *tuple;
167 struct masq_dev_work *w = ptr;
169 if (!device_cmp(ct, ptr))
172 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
174 return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
177 static int masq_inet_event(struct notifier_block *this,
181 const struct in_ifaddr *ifa = ptr;
182 const struct in_device *idev;
183 const struct net_device *dev;
184 union nf_inet_addr addr;
186 if (event != NETDEV_DOWN)
189 /* The masq_dev_notifier will catch the case of the device going
190 * down. So if the inetdev is dead and being destroyed we have
191 * no work to do. Otherwise this is an individual address removal
192 * and we have to perform the flush.
198 memset(&addr, 0, sizeof(addr));
200 addr.ip = ifa->ifa_address;
203 nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
204 inet_cmp, GFP_KERNEL);
209 static struct notifier_block masq_dev_notifier = {
210 .notifier_call = masq_device_event,
213 static struct notifier_block masq_inet_notifier = {
214 .notifier_call = masq_inet_event,
217 #if IS_ENABLED(CONFIG_IPV6)
219 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
220 const struct in6_addr *daddr, unsigned int srcprefs,
221 struct in6_addr *saddr)
223 #ifdef CONFIG_IPV6_MODULE
224 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
227 return -EHOSTUNREACH;
229 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
231 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
236 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
237 const struct net_device *out)
239 enum ip_conntrack_info ctinfo;
240 struct nf_conn_nat *nat;
243 struct nf_nat_range2 newrange;
245 ct = nf_ct_get(skb, &ctinfo);
246 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
247 ctinfo == IP_CT_RELATED_REPLY)));
249 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
250 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
253 nat = nf_ct_nat_ext_add(ct);
255 nat->masq_index = out->ifindex;
257 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
258 newrange.min_addr.in6 = src;
259 newrange.max_addr.in6 = src;
260 newrange.min_proto = range->min_proto;
261 newrange.max_proto = range->max_proto;
263 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
265 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
267 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
269 * Defer it to the system workqueue.
271 * As we can have 'a lot' of inet_events (depending on amount of ipv6
272 * addresses being deleted), we also need to limit work item queue.
274 static int masq_inet6_event(struct notifier_block *this,
275 unsigned long event, void *ptr)
277 struct inet6_ifaddr *ifa = ptr;
278 const struct net_device *dev;
279 union nf_inet_addr addr;
281 if (event != NETDEV_DOWN)
284 dev = ifa->idev->dev;
286 memset(&addr, 0, sizeof(addr));
288 addr.in6 = ifa->addr;
290 nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
295 static struct notifier_block masq_inet6_notifier = {
296 .notifier_call = masq_inet6_event,
299 static int nf_nat_masquerade_ipv6_register_notifier(void)
301 return register_inet6addr_notifier(&masq_inet6_notifier);
304 static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
307 int nf_nat_masquerade_inet_register_notifiers(void)
311 mutex_lock(&masq_mutex);
312 if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
317 /* check if the notifier was already set */
318 if (++masq_refcnt > 1)
321 /* Register for device down reports */
322 ret = register_netdevice_notifier(&masq_dev_notifier);
325 /* Register IP address change reports */
326 ret = register_inetaddr_notifier(&masq_inet_notifier);
330 ret = nf_nat_masquerade_ipv6_register_notifier();
334 mutex_unlock(&masq_mutex);
337 unregister_inetaddr_notifier(&masq_inet_notifier);
339 unregister_netdevice_notifier(&masq_dev_notifier);
343 mutex_unlock(&masq_mutex);
346 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
348 void nf_nat_masquerade_inet_unregister_notifiers(void)
350 mutex_lock(&masq_mutex);
351 /* check if the notifiers still have clients */
352 if (--masq_refcnt > 0)
355 unregister_netdevice_notifier(&masq_dev_notifier);
356 unregister_inetaddr_notifier(&masq_inet_notifier);
357 #if IS_ENABLED(CONFIG_IPV6)
358 unregister_inet6addr_notifier(&masq_inet6_notifier);
361 mutex_unlock(&masq_mutex);
363 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);