1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_conntrack_bridge.h>
20 #include <net/netfilter/nf_log.h>
23 #include <linux/icmp.h>
24 #include <linux/sysctl.h>
25 #include <net/route.h>
28 #include <linux/netfilter_ipv4.h>
29 #include <linux/netfilter_ipv6.h>
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_seqadj.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36 #include <net/netfilter/nf_nat_helper.h>
37 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
38 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
40 #include <linux/ipv6.h>
41 #include <linux/in6.h>
43 #include <net/inet_frag.h>
45 static DEFINE_MUTEX(nf_ct_proto_mutex);
49 void nf_l4proto_log_invalid(const struct sk_buff *skb,
50 const struct nf_hook_state *state,
54 struct net *net = state->net;
58 if (net->ct.sysctl_log_invalid != protonum &&
59 net->ct.sysctl_log_invalid != IPPROTO_RAW)
66 nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
67 NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
70 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
73 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
74 const struct nf_conn *ct,
75 const struct nf_hook_state *state,
83 if (likely(net->ct.sysctl_log_invalid == 0))
90 nf_l4proto_log_invalid(skb, state,
91 nf_ct_protonum(ct), "%pV", &vaf);
94 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
97 const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
100 case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
101 case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
102 case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
103 #ifdef CONFIG_NF_CT_PROTO_DCCP
104 case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
106 #ifdef CONFIG_NF_CT_PROTO_SCTP
107 case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
109 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
110 case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
112 #ifdef CONFIG_NF_CT_PROTO_GRE
113 case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
115 #if IS_ENABLED(CONFIG_IPV6)
116 case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
117 #endif /* CONFIG_IPV6 */
120 return &nf_conntrack_l4proto_generic;
122 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
124 static bool in_vrf_postrouting(const struct nf_hook_state *state)
126 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
127 if (state->hook == NF_INET_POST_ROUTING &&
128 netif_is_l3_master(state->out))
134 unsigned int nf_confirm(void *priv,
136 const struct nf_hook_state *state)
138 const struct nf_conn_help *help;
139 enum ip_conntrack_info ctinfo;
140 unsigned int protoff;
146 ct = nf_ct_get(skb, &ctinfo);
147 if (!ct || in_vrf_postrouting(state))
150 help = nfct_help(ct);
152 seqadj_needed = test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb);
153 if (!help && !seqadj_needed)
154 return nf_conntrack_confirm(skb);
156 /* helper->help() do not expect ICMP packets */
157 if (ctinfo == IP_CT_RELATED_REPLY)
158 return nf_conntrack_confirm(skb);
160 switch (nf_ct_l3num(ct)) {
162 protoff = skb_network_offset(skb) + ip_hdrlen(skb);
165 pnum = ipv6_hdr(skb)->nexthdr;
166 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
167 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
168 return nf_conntrack_confirm(skb);
171 return nf_conntrack_confirm(skb);
175 const struct nf_conntrack_helper *helper;
178 /* rcu_read_lock()ed by nf_hook */
179 helper = rcu_dereference(help->helper);
181 ret = helper->help(skb,
184 if (ret != NF_ACCEPT)
190 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
191 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
195 /* We've seen it coming out the other side: confirm it */
196 return nf_conntrack_confirm(skb);
198 EXPORT_SYMBOL_GPL(nf_confirm);
200 static unsigned int ipv4_conntrack_in(void *priv,
202 const struct nf_hook_state *state)
204 return nf_conntrack_in(skb, state);
207 static unsigned int ipv4_conntrack_local(void *priv,
209 const struct nf_hook_state *state)
211 if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
212 enum ip_conntrack_info ctinfo;
213 struct nf_conn *tmpl;
215 tmpl = nf_ct_get(skb, &ctinfo);
216 if (tmpl && nf_ct_is_template(tmpl)) {
217 /* when skipping ct, clear templates to avoid fooling
218 * later targets/matches
226 return nf_conntrack_in(skb, state);
229 /* Connection tracking may drop packets, but never alters them, so
230 * make it the first hook.
232 static const struct nf_hook_ops ipv4_conntrack_ops[] = {
234 .hook = ipv4_conntrack_in,
236 .hooknum = NF_INET_PRE_ROUTING,
237 .priority = NF_IP_PRI_CONNTRACK,
240 .hook = ipv4_conntrack_local,
242 .hooknum = NF_INET_LOCAL_OUT,
243 .priority = NF_IP_PRI_CONNTRACK,
248 .hooknum = NF_INET_POST_ROUTING,
249 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
254 .hooknum = NF_INET_LOCAL_IN,
255 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
259 /* Fast function for those who don't want to parse /proc (and I don't
261 * Reversing the socket's dst/src point of view gives us the reply
265 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
267 const struct inet_sock *inet = inet_sk(sk);
268 const struct nf_conntrack_tuple_hash *h;
269 struct nf_conntrack_tuple tuple;
271 memset(&tuple, 0, sizeof(tuple));
274 tuple.src.u3.ip = inet->inet_rcv_saddr;
275 tuple.src.u.tcp.port = inet->inet_sport;
276 tuple.dst.u3.ip = inet->inet_daddr;
277 tuple.dst.u.tcp.port = inet->inet_dport;
278 tuple.src.l3num = PF_INET;
279 tuple.dst.protonum = sk->sk_protocol;
282 /* We only do TCP and SCTP at the moment: is there a better way? */
283 if (tuple.dst.protonum != IPPROTO_TCP &&
284 tuple.dst.protonum != IPPROTO_SCTP) {
285 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
289 if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
290 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
291 *len, sizeof(struct sockaddr_in));
295 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
297 struct sockaddr_in sin;
298 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
300 sin.sin_family = AF_INET;
301 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
302 .tuple.dst.u.tcp.port;
303 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
305 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
307 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
308 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
310 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
315 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
316 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
317 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
321 static struct nf_sockopt_ops so_getorigdst = {
323 .get_optmin = SO_ORIGINAL_DST,
324 .get_optmax = SO_ORIGINAL_DST + 1,
326 .owner = THIS_MODULE,
329 #if IS_ENABLED(CONFIG_IPV6)
331 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
333 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
334 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
335 const struct inet_sock *inet = inet_sk(sk);
336 const struct nf_conntrack_tuple_hash *h;
337 struct sockaddr_in6 sin6;
343 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
344 tuple.src.u.tcp.port = inet->inet_sport;
345 tuple.dst.u3.in6 = sk->sk_v6_daddr;
346 tuple.dst.u.tcp.port = inet->inet_dport;
347 tuple.dst.protonum = sk->sk_protocol;
348 bound_dev_if = sk->sk_bound_dev_if;
349 flow_label = inet6->flow_label;
352 if (tuple.dst.protonum != IPPROTO_TCP &&
353 tuple.dst.protonum != IPPROTO_SCTP)
356 if (*len < 0 || (unsigned int)*len < sizeof(sin6))
359 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
361 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
362 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
363 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
367 ct = nf_ct_tuplehash_to_ctrack(h);
369 sin6.sin6_family = AF_INET6;
370 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
371 sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
372 memcpy(&sin6.sin6_addr,
373 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
374 sizeof(sin6.sin6_addr));
377 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
378 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
381 static struct nf_sockopt_ops so_getorigdst6 = {
383 .get_optmin = IP6T_SO_ORIGINAL_DST,
384 .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
385 .get = ipv6_getorigdst,
386 .owner = THIS_MODULE,
389 static unsigned int ipv6_conntrack_in(void *priv,
391 const struct nf_hook_state *state)
393 return nf_conntrack_in(skb, state);
396 static unsigned int ipv6_conntrack_local(void *priv,
398 const struct nf_hook_state *state)
400 return nf_conntrack_in(skb, state);
403 static const struct nf_hook_ops ipv6_conntrack_ops[] = {
405 .hook = ipv6_conntrack_in,
407 .hooknum = NF_INET_PRE_ROUTING,
408 .priority = NF_IP6_PRI_CONNTRACK,
411 .hook = ipv6_conntrack_local,
413 .hooknum = NF_INET_LOCAL_OUT,
414 .priority = NF_IP6_PRI_CONNTRACK,
419 .hooknum = NF_INET_POST_ROUTING,
420 .priority = NF_IP6_PRI_LAST,
425 .hooknum = NF_INET_LOCAL_IN,
426 .priority = NF_IP6_PRI_LAST - 1,
431 static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
433 u8 nfproto = (unsigned long)_nfproto;
435 if (nf_ct_l3num(ct) != nfproto)
438 if (nf_ct_protonum(ct) == IPPROTO_TCP &&
439 ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
440 ct->proto.tcp.seen[0].td_maxwin = 0;
441 ct->proto.tcp.seen[1].td_maxwin = 0;
447 static struct nf_ct_bridge_info *nf_ct_bridge_info;
449 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
451 struct nf_conntrack_net *cnet = nf_ct_pernet(net);
452 bool fixup_needed = false, retry = true;
455 mutex_lock(&nf_ct_proto_mutex);
460 if (cnet->users4 > 1)
462 err = nf_defrag_ipv4_enable(net);
468 err = nf_register_net_hooks(net, ipv4_conntrack_ops,
469 ARRAY_SIZE(ipv4_conntrack_ops));
475 #if IS_ENABLED(CONFIG_IPV6)
478 if (cnet->users6 > 1)
480 err = nf_defrag_ipv6_enable(net);
486 err = nf_register_net_hooks(net, ipv6_conntrack_ops,
487 ARRAY_SIZE(ipv6_conntrack_ops));
495 if (!nf_ct_bridge_info) {
500 mutex_unlock(&nf_ct_proto_mutex);
501 request_module("nf_conntrack_bridge");
505 if (!try_module_get(nf_ct_bridge_info->me)) {
509 cnet->users_bridge++;
510 if (cnet->users_bridge > 1)
513 err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
514 nf_ct_bridge_info->ops_size);
516 cnet->users_bridge = 0;
525 mutex_unlock(&nf_ct_proto_mutex);
528 struct nf_ct_iter_data iter_data = {
530 .data = (void *)(unsigned long)nfproto,
532 nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup, &iter_data);
538 static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
540 struct nf_conntrack_net *cnet = nf_ct_pernet(net);
542 mutex_lock(&nf_ct_proto_mutex);
545 if (cnet->users4 && (--cnet->users4 == 0)) {
546 nf_unregister_net_hooks(net, ipv4_conntrack_ops,
547 ARRAY_SIZE(ipv4_conntrack_ops));
548 nf_defrag_ipv4_disable(net);
551 #if IS_ENABLED(CONFIG_IPV6)
553 if (cnet->users6 && (--cnet->users6 == 0)) {
554 nf_unregister_net_hooks(net, ipv6_conntrack_ops,
555 ARRAY_SIZE(ipv6_conntrack_ops));
556 nf_defrag_ipv6_disable(net);
561 if (!nf_ct_bridge_info)
563 if (cnet->users_bridge && (--cnet->users_bridge == 0))
564 nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
565 nf_ct_bridge_info->ops_size);
567 module_put(nf_ct_bridge_info->me);
570 mutex_unlock(&nf_ct_proto_mutex);
573 static int nf_ct_netns_inet_get(struct net *net)
577 err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
578 #if IS_ENABLED(CONFIG_IPV6)
581 err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
587 nf_ct_netns_put(net, NFPROTO_IPV4);
593 int nf_ct_netns_get(struct net *net, u8 nfproto)
599 err = nf_ct_netns_inet_get(net);
602 err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
606 err = nf_ct_netns_inet_get(net);
608 nf_ct_netns_put(net, NFPROTO_BRIDGE);
613 err = nf_ct_netns_do_get(net, nfproto);
618 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
620 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
624 nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
627 nf_ct_netns_do_put(net, NFPROTO_IPV4);
628 nf_ct_netns_do_put(net, NFPROTO_IPV6);
631 nf_ct_netns_do_put(net, nfproto);
635 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
637 void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
639 WARN_ON(nf_ct_bridge_info);
640 mutex_lock(&nf_ct_proto_mutex);
641 nf_ct_bridge_info = info;
642 mutex_unlock(&nf_ct_proto_mutex);
644 EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
646 void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
648 WARN_ON(!nf_ct_bridge_info);
649 mutex_lock(&nf_ct_proto_mutex);
650 nf_ct_bridge_info = NULL;
651 mutex_unlock(&nf_ct_proto_mutex);
653 EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
655 int nf_conntrack_proto_init(void)
659 ret = nf_register_sockopt(&so_getorigdst);
663 #if IS_ENABLED(CONFIG_IPV6)
664 ret = nf_register_sockopt(&so_getorigdst6);
666 goto cleanup_sockopt;
671 #if IS_ENABLED(CONFIG_IPV6)
673 nf_unregister_sockopt(&so_getorigdst);
678 void nf_conntrack_proto_fini(void)
680 nf_unregister_sockopt(&so_getorigdst);
681 #if IS_ENABLED(CONFIG_IPV6)
682 nf_unregister_sockopt(&so_getorigdst6);
686 void nf_conntrack_proto_pernet_init(struct net *net)
688 nf_conntrack_generic_init_net(net);
689 nf_conntrack_udp_init_net(net);
690 nf_conntrack_tcp_init_net(net);
691 nf_conntrack_icmp_init_net(net);
692 #if IS_ENABLED(CONFIG_IPV6)
693 nf_conntrack_icmpv6_init_net(net);
695 #ifdef CONFIG_NF_CT_PROTO_DCCP
696 nf_conntrack_dccp_init_net(net);
698 #ifdef CONFIG_NF_CT_PROTO_SCTP
699 nf_conntrack_sctp_init_net(net);
701 #ifdef CONFIG_NF_CT_PROTO_GRE
702 nf_conntrack_gre_init_net(net);
706 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
707 &nf_conntrack_htable_size, 0600);
709 MODULE_ALIAS("ip_conntrack");
710 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
711 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
712 MODULE_LICENSE("GPL");