3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81 static const struct inet_connection_sock_af_ops ipv6_mapped;
82 static const struct inet_connection_sock_af_ops ipv6_specific;
83 #ifdef CONFIG_TCP_MD5SIG
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
88 const struct in6_addr *addr)
94 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
96 struct dst_entry *dst = skb_dst(skb);
97 const struct rt6_info *rt = (const struct rt6_info *)dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
103 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
106 static void tcp_v6_hash(struct sock *sk)
108 if (sk->sk_state != TCP_CLOSE) {
109 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
114 __inet6_hash(sk, NULL);
119 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
121 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
122 ipv6_hdr(skb)->saddr.s6_addr32,
124 tcp_hdr(skb)->source);
127 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
130 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
131 struct inet_sock *inet = inet_sk(sk);
132 struct inet_connection_sock *icsk = inet_csk(sk);
133 struct ipv6_pinfo *np = inet6_sk(sk);
134 struct tcp_sock *tp = tcp_sk(sk);
135 struct in6_addr *saddr = NULL, *final_p, final;
138 struct dst_entry *dst;
142 if (addr_len < SIN6_LEN_RFC2133)
145 if (usin->sin6_family != AF_INET6)
146 return -EAFNOSUPPORT;
148 memset(&fl6, 0, sizeof(fl6));
151 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
152 IP6_ECN_flow_init(fl6.flowlabel);
153 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
154 struct ip6_flowlabel *flowlabel;
155 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
156 if (flowlabel == NULL)
158 usin->sin6_addr = flowlabel->dst;
159 fl6_sock_release(flowlabel);
164 * connect() to INADDR_ANY means loopback (BSD'ism).
167 if(ipv6_addr_any(&usin->sin6_addr))
168 usin->sin6_addr.s6_addr[15] = 0x1;
170 addr_type = ipv6_addr_type(&usin->sin6_addr);
172 if(addr_type & IPV6_ADDR_MULTICAST)
175 if (addr_type&IPV6_ADDR_LINKLOCAL) {
176 if (addr_len >= sizeof(struct sockaddr_in6) &&
177 usin->sin6_scope_id) {
178 /* If interface is set while binding, indices
181 if (sk->sk_bound_dev_if &&
182 sk->sk_bound_dev_if != usin->sin6_scope_id)
185 sk->sk_bound_dev_if = usin->sin6_scope_id;
188 /* Connect to link-local address requires an interface */
189 if (!sk->sk_bound_dev_if)
193 if (tp->rx_opt.ts_recent_stamp &&
194 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
200 np->daddr = usin->sin6_addr;
201 np->flow_label = fl6.flowlabel;
207 if (addr_type == IPV6_ADDR_MAPPED) {
208 u32 exthdrlen = icsk->icsk_ext_hdr_len;
209 struct sockaddr_in sin;
211 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
213 if (__ipv6_only_sock(sk))
216 sin.sin_family = AF_INET;
217 sin.sin_port = usin->sin6_port;
218 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
220 icsk->icsk_af_ops = &ipv6_mapped;
221 sk->sk_backlog_rcv = tcp_v4_do_rcv;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
226 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
229 icsk->icsk_ext_hdr_len = exthdrlen;
230 icsk->icsk_af_ops = &ipv6_specific;
231 sk->sk_backlog_rcv = tcp_v6_do_rcv;
232 #ifdef CONFIG_TCP_MD5SIG
233 tp->af_specific = &tcp_sock_ipv6_specific;
237 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
238 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
245 if (!ipv6_addr_any(&np->rcv_saddr))
246 saddr = &np->rcv_saddr;
248 fl6.flowi6_proto = IPPROTO_TCP;
249 fl6.daddr = np->daddr;
250 fl6.saddr = saddr ? *saddr : np->saddr;
251 fl6.flowi6_oif = sk->sk_bound_dev_if;
252 fl6.flowi6_mark = sk->sk_mark;
253 fl6.fl6_dport = usin->sin6_port;
254 fl6.fl6_sport = inet->inet_sport;
256 final_p = fl6_update_dst(&fl6, np->opt, &final);
258 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
260 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
268 np->rcv_saddr = *saddr;
271 /* set the source address */
273 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
275 sk->sk_gso_type = SKB_GSO_TCPV6;
276 __ip6_dst_store(sk, dst, NULL, NULL);
278 rt = (struct rt6_info *) dst;
279 if (tcp_death_row.sysctl_tw_recycle &&
280 !tp->rx_opt.ts_recent_stamp &&
281 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
282 tcp_fetch_timewait_stamp(sk, dst);
284 icsk->icsk_ext_hdr_len = 0;
286 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
289 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
291 inet->inet_dport = usin->sin6_port;
293 tcp_set_state(sk, TCP_SYN_SENT);
294 err = inet6_hash_connect(&tcp_death_row, sk);
298 if (!tp->write_seq && likely(!tp->repair))
299 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
304 err = tcp_connect(sk);
311 tcp_set_state(sk, TCP_CLOSE);
314 inet->inet_dport = 0;
315 sk->sk_route_caps = 0;
319 static void tcp_v6_mtu_reduced(struct sock *sk)
321 struct dst_entry *dst;
323 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
326 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
330 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
331 tcp_sync_mss(sk, dst_mtu(dst));
332 tcp_simple_retransmit(sk);
336 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
337 u8 type, u8 code, int offset, __be32 info)
339 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
340 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
341 struct ipv6_pinfo *np;
346 struct net *net = dev_net(skb->dev);
348 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
349 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
352 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
357 if (sk->sk_state == TCP_TIME_WAIT) {
358 inet_twsk_put(inet_twsk(sk));
363 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
364 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
366 if (sk->sk_state == TCP_CLOSE)
369 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
370 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 seq = ntohl(th->seq);
376 if (sk->sk_state != TCP_LISTEN &&
377 !between(seq, tp->snd_una, tp->snd_nxt)) {
378 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
384 if (type == NDISC_REDIRECT) {
385 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
388 dst->ops->redirect(dst, sk, skb);
391 if (type == ICMPV6_PKT_TOOBIG) {
392 tp->mtu_info = ntohl(info);
393 if (!sock_owned_by_user(sk))
394 tcp_v6_mtu_reduced(sk);
395 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
401 icmpv6_err_convert(type, code, &err);
403 /* Might be for an request_sock */
404 switch (sk->sk_state) {
405 struct request_sock *req, **prev;
407 if (sock_owned_by_user(sk))
410 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
411 &hdr->saddr, inet6_iif(skb));
415 /* ICMPs are not backlogged, hence we cannot get
416 * an established socket here.
418 WARN_ON(req->sk != NULL);
420 if (seq != tcp_rsk(req)->snt_isn) {
421 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
425 inet_csk_reqsk_queue_drop(sk, req, prev);
429 case TCP_SYN_RECV: /* Cannot happen.
430 It can, it SYNs are crossed. --ANK */
431 if (!sock_owned_by_user(sk)) {
433 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
437 sk->sk_err_soft = err;
441 if (!sock_owned_by_user(sk) && np->recverr) {
443 sk->sk_error_report(sk);
445 sk->sk_err_soft = err;
453 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
455 struct request_sock *req,
456 struct request_values *rvp,
459 struct inet6_request_sock *treq = inet6_rsk(req);
460 struct ipv6_pinfo *np = inet6_sk(sk);
461 struct sk_buff * skb;
464 /* First, grab a route. */
465 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
468 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
471 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
473 fl6->daddr = treq->rmt_addr;
474 skb_set_queue_mapping(skb, queue_mapping);
475 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
476 err = net_xmit_eval(err);
483 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
484 struct request_values *rvp)
489 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
491 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
495 static void tcp_v6_reqsk_destructor(struct request_sock *req)
497 kfree_skb(inet6_rsk(req)->pktopts);
500 #ifdef CONFIG_TCP_MD5SIG
501 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
502 const struct in6_addr *addr)
504 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
507 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
508 struct sock *addr_sk)
510 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
513 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
514 struct request_sock *req)
516 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
519 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
522 struct tcp_md5sig cmd;
523 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
525 if (optlen < sizeof(cmd))
528 if (copy_from_user(&cmd, optval, sizeof(cmd)))
531 if (sin6->sin6_family != AF_INET6)
534 if (!cmd.tcpm_keylen) {
535 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
536 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
538 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
542 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
545 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
546 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
547 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
549 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
550 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
553 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
554 const struct in6_addr *daddr,
555 const struct in6_addr *saddr, int nbytes)
557 struct tcp6_pseudohdr *bp;
558 struct scatterlist sg;
560 bp = &hp->md5_blk.ip6;
561 /* 1. TCP pseudo-header (RFC2460) */
564 bp->protocol = cpu_to_be32(IPPROTO_TCP);
565 bp->len = cpu_to_be32(nbytes);
567 sg_init_one(&sg, bp, sizeof(*bp));
568 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
571 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
572 const struct in6_addr *daddr, struct in6_addr *saddr,
573 const struct tcphdr *th)
575 struct tcp_md5sig_pool *hp;
576 struct hash_desc *desc;
578 hp = tcp_get_md5sig_pool();
580 goto clear_hash_noput;
581 desc = &hp->md5_desc;
583 if (crypto_hash_init(desc))
585 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
587 if (tcp_md5_hash_header(hp, th))
589 if (tcp_md5_hash_key(hp, key))
591 if (crypto_hash_final(desc, md5_hash))
594 tcp_put_md5sig_pool();
598 tcp_put_md5sig_pool();
600 memset(md5_hash, 0, 16);
604 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
605 const struct sock *sk,
606 const struct request_sock *req,
607 const struct sk_buff *skb)
609 const struct in6_addr *saddr, *daddr;
610 struct tcp_md5sig_pool *hp;
611 struct hash_desc *desc;
612 const struct tcphdr *th = tcp_hdr(skb);
615 saddr = &inet6_sk(sk)->saddr;
616 daddr = &inet6_sk(sk)->daddr;
618 saddr = &inet6_rsk(req)->loc_addr;
619 daddr = &inet6_rsk(req)->rmt_addr;
621 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
622 saddr = &ip6h->saddr;
623 daddr = &ip6h->daddr;
626 hp = tcp_get_md5sig_pool();
628 goto clear_hash_noput;
629 desc = &hp->md5_desc;
631 if (crypto_hash_init(desc))
634 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
636 if (tcp_md5_hash_header(hp, th))
638 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
640 if (tcp_md5_hash_key(hp, key))
642 if (crypto_hash_final(desc, md5_hash))
645 tcp_put_md5sig_pool();
649 tcp_put_md5sig_pool();
651 memset(md5_hash, 0, 16);
655 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
657 const __u8 *hash_location = NULL;
658 struct tcp_md5sig_key *hash_expected;
659 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
660 const struct tcphdr *th = tcp_hdr(skb);
664 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
665 hash_location = tcp_parse_md5sig_option(th);
667 /* We've parsed the options - do we have a hash? */
668 if (!hash_expected && !hash_location)
671 if (hash_expected && !hash_location) {
672 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
676 if (!hash_expected && hash_location) {
677 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
681 /* check the signature */
682 genhash = tcp_v6_md5_hash_skb(newhash,
686 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
687 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
688 genhash ? "failed" : "mismatch",
689 &ip6h->saddr, ntohs(th->source),
690 &ip6h->daddr, ntohs(th->dest));
697 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
699 .obj_size = sizeof(struct tcp6_request_sock),
700 .rtx_syn_ack = tcp_v6_rtx_synack,
701 .send_ack = tcp_v6_reqsk_send_ack,
702 .destructor = tcp_v6_reqsk_destructor,
703 .send_reset = tcp_v6_send_reset,
704 .syn_ack_timeout = tcp_syn_ack_timeout,
707 #ifdef CONFIG_TCP_MD5SIG
708 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
709 .md5_lookup = tcp_v6_reqsk_md5_lookup,
710 .calc_md5_hash = tcp_v6_md5_hash_skb,
714 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
715 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
717 const struct tcphdr *th = tcp_hdr(skb);
719 struct sk_buff *buff;
721 struct net *net = dev_net(skb_dst(skb)->dev);
722 struct sock *ctl_sk = net->ipv6.tcp_sk;
723 unsigned int tot_len = sizeof(struct tcphdr);
724 struct dst_entry *dst;
728 tot_len += TCPOLEN_TSTAMP_ALIGNED;
729 #ifdef CONFIG_TCP_MD5SIG
731 tot_len += TCPOLEN_MD5SIG_ALIGNED;
734 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
739 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
741 t1 = (struct tcphdr *) skb_push(buff, tot_len);
742 skb_reset_transport_header(buff);
744 /* Swap the send and the receive. */
745 memset(t1, 0, sizeof(*t1));
746 t1->dest = th->source;
747 t1->source = th->dest;
748 t1->doff = tot_len / 4;
749 t1->seq = htonl(seq);
750 t1->ack_seq = htonl(ack);
751 t1->ack = !rst || !th->ack;
753 t1->window = htons(win);
755 topt = (__be32 *)(t1 + 1);
758 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
759 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
760 *topt++ = htonl(tcp_time_stamp);
764 #ifdef CONFIG_TCP_MD5SIG
766 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
767 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
768 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
769 &ipv6_hdr(skb)->saddr,
770 &ipv6_hdr(skb)->daddr, t1);
774 memset(&fl6, 0, sizeof(fl6));
775 fl6.daddr = ipv6_hdr(skb)->saddr;
776 fl6.saddr = ipv6_hdr(skb)->daddr;
778 buff->ip_summed = CHECKSUM_PARTIAL;
781 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
783 fl6.flowi6_proto = IPPROTO_TCP;
784 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
785 fl6.flowi6_oif = inet6_iif(skb);
786 fl6.fl6_dport = t1->dest;
787 fl6.fl6_sport = t1->source;
788 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
790 /* Pass a socket to ip6_dst_lookup either it is for RST
791 * Underlying function will use this to retrieve the network
794 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
796 skb_dst_set(buff, dst);
797 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
798 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
800 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
807 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
809 const struct tcphdr *th = tcp_hdr(skb);
810 u32 seq = 0, ack_seq = 0;
811 struct tcp_md5sig_key *key = NULL;
812 #ifdef CONFIG_TCP_MD5SIG
813 const __u8 *hash_location = NULL;
814 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
815 unsigned char newhash[16];
817 struct sock *sk1 = NULL;
823 if (!ipv6_unicast_destination(skb))
826 #ifdef CONFIG_TCP_MD5SIG
827 hash_location = tcp_parse_md5sig_option(th);
828 if (!sk && hash_location) {
830 * active side is lost. Try to find listening socket through
831 * source port, and then find md5 key through listening socket.
832 * we are not loose security here:
833 * Incoming packet is checked with md5 hash with finding key,
834 * no RST generated if md5 hash doesn't match.
836 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
837 &tcp_hashinfo, &ipv6h->saddr,
838 th->source, &ipv6h->daddr,
839 ntohs(th->source), inet6_iif(skb));
844 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
848 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
849 if (genhash || memcmp(hash_location, newhash, 16) != 0)
852 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
857 seq = ntohl(th->ack_seq);
859 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
862 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
864 #ifdef CONFIG_TCP_MD5SIG
873 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
874 struct tcp_md5sig_key *key, u8 tclass)
876 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
879 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
881 struct inet_timewait_sock *tw = inet_twsk(sk);
882 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
884 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
885 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
886 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
892 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
893 struct request_sock *req)
895 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
896 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
900 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
902 struct request_sock *req, **prev;
903 const struct tcphdr *th = tcp_hdr(skb);
906 /* Find possible connection requests. */
907 req = inet6_csk_search_req(sk, &prev, th->source,
908 &ipv6_hdr(skb)->saddr,
909 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
911 return tcp_check_req(sk, skb, req, prev, false);
913 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
914 &ipv6_hdr(skb)->saddr, th->source,
915 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
918 if (nsk->sk_state != TCP_TIME_WAIT) {
922 inet_twsk_put(inet_twsk(nsk));
926 #ifdef CONFIG_SYN_COOKIES
928 sk = cookie_v6_check(sk, skb);
933 /* FIXME: this is substantially similar to the ipv4 code.
934 * Can some kind of merge be done? -- erics
936 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
938 struct tcp_extend_values tmp_ext;
939 struct tcp_options_received tmp_opt;
940 const u8 *hash_location;
941 struct request_sock *req;
942 struct inet6_request_sock *treq;
943 struct ipv6_pinfo *np = inet6_sk(sk);
944 struct tcp_sock *tp = tcp_sk(sk);
945 __u32 isn = TCP_SKB_CB(skb)->when;
946 struct dst_entry *dst = NULL;
948 bool want_cookie = false;
950 if (skb->protocol == htons(ETH_P_IP))
951 return tcp_v4_conn_request(sk, skb);
953 if (!ipv6_unicast_destination(skb))
956 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
957 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
962 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
965 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
969 #ifdef CONFIG_TCP_MD5SIG
970 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
973 tcp_clear_options(&tmp_opt);
974 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
975 tmp_opt.user_mss = tp->rx_opt.user_mss;
976 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
978 if (tmp_opt.cookie_plus > 0 &&
979 tmp_opt.saw_tstamp &&
980 !tp->rx_opt.cookie_out_never &&
981 (sysctl_tcp_cookie_size > 0 ||
982 (tp->cookie_values != NULL &&
983 tp->cookie_values->cookie_desired > 0))) {
986 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
987 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
989 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
992 /* Secret recipe starts with IP addresses */
993 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
998 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1004 /* plus variable length Initiator Cookie */
1007 *c++ ^= *hash_location++;
1009 want_cookie = false; /* not our kind of cookie */
1010 tmp_ext.cookie_out_never = 0; /* false */
1011 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1012 } else if (!tp->rx_opt.cookie_in_always) {
1013 /* redundant indications, but ensure initialization. */
1014 tmp_ext.cookie_out_never = 1; /* true */
1015 tmp_ext.cookie_plus = 0;
1019 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1021 if (want_cookie && !tmp_opt.saw_tstamp)
1022 tcp_clear_options(&tmp_opt);
1024 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1025 tcp_openreq_init(req, &tmp_opt, skb);
1027 treq = inet6_rsk(req);
1028 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1029 treq->loc_addr = ipv6_hdr(skb)->daddr;
1030 if (!want_cookie || tmp_opt.tstamp_ok)
1031 TCP_ECN_create_request(req, skb, sock_net(sk));
1033 treq->iif = sk->sk_bound_dev_if;
1035 /* So that link locals have meaning */
1036 if (!sk->sk_bound_dev_if &&
1037 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1038 treq->iif = inet6_iif(skb);
1041 if (ipv6_opt_accepted(sk, skb) ||
1042 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1043 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1044 atomic_inc(&skb->users);
1045 treq->pktopts = skb;
1049 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1050 req->cookie_ts = tmp_opt.tstamp_ok;
1054 /* VJ's idea. We save last timestamp seen
1055 * from the destination in peer table, when entering
1056 * state TIME-WAIT, and check against it before
1057 * accepting new connection request.
1059 * If "isn" is not zero, this request hit alive
1060 * timewait bucket, so that all the necessary checks
1061 * are made in the function processing timewait state.
1063 if (tmp_opt.saw_tstamp &&
1064 tcp_death_row.sysctl_tw_recycle &&
1065 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1066 if (!tcp_peer_is_proven(req, dst, true)) {
1067 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1068 goto drop_and_release;
1071 /* Kill the following clause, if you dislike this way. */
1072 else if (!sysctl_tcp_syncookies &&
1073 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1074 (sysctl_max_syn_backlog >> 2)) &&
1075 !tcp_peer_is_proven(req, dst, false)) {
1076 /* Without syncookies last quarter of
1077 * backlog is filled with destinations,
1078 * proven to be alive.
1079 * It means that we continue to communicate
1080 * to destinations, already remembered
1081 * to the moment of synflood.
1083 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1084 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1085 goto drop_and_release;
1088 isn = tcp_v6_init_sequence(skb);
1091 tcp_rsk(req)->snt_isn = isn;
1093 if (security_inet_conn_request(sk, skb, req))
1094 goto drop_and_release;
1096 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1097 (struct request_values *)&tmp_ext,
1098 skb_get_queue_mapping(skb)) ||
1102 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1103 tcp_rsk(req)->listener = NULL;
1104 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1112 return 0; /* don't send reset */
1115 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1116 struct request_sock *req,
1117 struct dst_entry *dst)
1119 struct inet6_request_sock *treq;
1120 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1121 struct tcp6_sock *newtcp6sk;
1122 struct inet_sock *newinet;
1123 struct tcp_sock *newtp;
1125 #ifdef CONFIG_TCP_MD5SIG
1126 struct tcp_md5sig_key *key;
1130 if (skb->protocol == htons(ETH_P_IP)) {
1135 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1140 newtcp6sk = (struct tcp6_sock *)newsk;
1141 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1143 newinet = inet_sk(newsk);
1144 newnp = inet6_sk(newsk);
1145 newtp = tcp_sk(newsk);
1147 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1149 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1151 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1153 newnp->rcv_saddr = newnp->saddr;
1155 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1156 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1157 #ifdef CONFIG_TCP_MD5SIG
1158 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1161 newnp->ipv6_ac_list = NULL;
1162 newnp->ipv6_fl_list = NULL;
1163 newnp->pktoptions = NULL;
1165 newnp->mcast_oif = inet6_iif(skb);
1166 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1167 newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1170 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1171 * here, tcp_create_openreq_child now does this for us, see the comment in
1172 * that function for the gory details. -acme
1175 /* It is tricky place. Until this moment IPv4 tcp
1176 worked with IPv6 icsk.icsk_af_ops.
1179 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1184 treq = inet6_rsk(req);
1186 if (sk_acceptq_is_full(sk))
1190 dst = inet6_csk_route_req(sk, &fl6, req);
1195 newsk = tcp_create_openreq_child(sk, req, skb);
1200 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1201 * count here, tcp_create_openreq_child now does this for us, see the
1202 * comment in that function for the gory details. -acme
1205 newsk->sk_gso_type = SKB_GSO_TCPV6;
1206 __ip6_dst_store(newsk, dst, NULL, NULL);
1207 inet6_sk_rx_dst_set(newsk, skb);
1209 newtcp6sk = (struct tcp6_sock *)newsk;
1210 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1212 newtp = tcp_sk(newsk);
1213 newinet = inet_sk(newsk);
1214 newnp = inet6_sk(newsk);
1216 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1218 newnp->daddr = treq->rmt_addr;
1219 newnp->saddr = treq->loc_addr;
1220 newnp->rcv_saddr = treq->loc_addr;
1221 newsk->sk_bound_dev_if = treq->iif;
1223 /* Now IPv6 options...
1225 First: no IPv4 options.
1227 newinet->inet_opt = NULL;
1228 newnp->ipv6_ac_list = NULL;
1229 newnp->ipv6_fl_list = NULL;
1232 newnp->rxopt.all = np->rxopt.all;
1234 /* Clone pktoptions received with SYN */
1235 newnp->pktoptions = NULL;
1236 if (treq->pktopts != NULL) {
1237 newnp->pktoptions = skb_clone(treq->pktopts,
1238 sk_gfp_atomic(sk, GFP_ATOMIC));
1239 consume_skb(treq->pktopts);
1240 treq->pktopts = NULL;
1241 if (newnp->pktoptions)
1242 skb_set_owner_r(newnp->pktoptions, newsk);
1245 newnp->mcast_oif = inet6_iif(skb);
1246 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1247 newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1249 /* Clone native IPv6 options from listening socket (if any)
1251 Yes, keeping reference count would be much more clever,
1252 but we make one more one thing there: reattach optmem
1256 newnp->opt = ipv6_dup_options(newsk, np->opt);
1258 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1260 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1261 newnp->opt->opt_flen);
1263 tcp_mtup_init(newsk);
1264 tcp_sync_mss(newsk, dst_mtu(dst));
1265 newtp->advmss = dst_metric_advmss(dst);
1266 if (tcp_sk(sk)->rx_opt.user_mss &&
1267 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1268 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1270 tcp_initialize_rcv_mss(newsk);
1271 tcp_synack_rtt_meas(newsk, req);
1272 newtp->total_retrans = req->num_retrans;
1274 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1275 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1277 #ifdef CONFIG_TCP_MD5SIG
1278 /* Copy over the MD5 key from the original socket */
1279 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1280 /* We're using one, so create a matching key
1281 * on the newsk structure. If we fail to get
1282 * memory, then we end up not copying the key
1285 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1286 AF_INET6, key->key, key->keylen,
1287 sk_gfp_atomic(sk, GFP_ATOMIC));
1291 if (__inet_inherit_port(sk, newsk) < 0) {
1292 inet_csk_prepare_forced_close(newsk);
1296 __inet6_hash(newsk, NULL);
1301 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1305 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1309 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1311 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1312 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1313 &ipv6_hdr(skb)->daddr, skb->csum)) {
1314 skb->ip_summed = CHECKSUM_UNNECESSARY;
1319 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1320 &ipv6_hdr(skb)->saddr,
1321 &ipv6_hdr(skb)->daddr, 0));
1323 if (skb->len <= 76) {
1324 return __skb_checksum_complete(skb);
1329 /* The socket must have it's spinlock held when we get
1332 * We have a potential double-lock case here, so even when
1333 * doing backlog processing we use the BH locking scheme.
1334 * This is because we cannot sleep with the original spinlock
1337 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1339 struct ipv6_pinfo *np = inet6_sk(sk);
1340 struct tcp_sock *tp;
1341 struct sk_buff *opt_skb = NULL;
1343 /* Imagine: socket is IPv6. IPv4 packet arrives,
1344 goes to IPv4 receive handler and backlogged.
1345 From backlog it always goes here. Kerboom...
1346 Fortunately, tcp_rcv_established and rcv_established
1347 handle them correctly, but it is not case with
1348 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1351 if (skb->protocol == htons(ETH_P_IP))
1352 return tcp_v4_do_rcv(sk, skb);
1354 #ifdef CONFIG_TCP_MD5SIG
1355 if (tcp_v6_inbound_md5_hash (sk, skb))
1359 if (sk_filter(sk, skb))
1363 * socket locking is here for SMP purposes as backlog rcv
1364 * is currently called with bh processing disabled.
1367 /* Do Stevens' IPV6_PKTOPTIONS.
1369 Yes, guys, it is the only place in our code, where we
1370 may make it not affecting IPv4.
1371 The rest of code is protocol independent,
1372 and I do not like idea to uglify IPv4.
1374 Actually, all the idea behind IPV6_PKTOPTIONS
1375 looks not very well thought. For now we latch
1376 options, received in the last packet, enqueued
1377 by tcp. Feel free to propose better solution.
1381 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1383 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1384 struct dst_entry *dst = sk->sk_rx_dst;
1386 sock_rps_save_rxhash(sk, skb);
1388 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1389 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1391 sk->sk_rx_dst = NULL;
1395 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1398 goto ipv6_pktoptions;
1402 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1405 if (sk->sk_state == TCP_LISTEN) {
1406 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1411 * Queue it on the new socket if the new socket is active,
1412 * otherwise we just shortcircuit this and continue with
1416 sock_rps_save_rxhash(nsk, skb);
1417 if (tcp_child_process(sk, nsk, skb))
1420 __kfree_skb(opt_skb);
1424 sock_rps_save_rxhash(sk, skb);
1426 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1429 goto ipv6_pktoptions;
1433 tcp_v6_send_reset(sk, skb);
1436 __kfree_skb(opt_skb);
1440 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1445 /* Do you ask, what is it?
1447 1. skb was enqueued by tcp.
1448 2. skb is added to tail of read queue, rather than out of order.
1449 3. socket is not in passive state.
1450 4. Finally, it really contains options, which user wants to receive.
1453 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1454 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1455 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1456 np->mcast_oif = inet6_iif(opt_skb);
1457 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1458 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1459 if (np->rxopt.bits.rxtclass)
1460 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1461 if (ipv6_opt_accepted(sk, opt_skb)) {
1462 skb_set_owner_r(opt_skb, sk);
1463 opt_skb = xchg(&np->pktoptions, opt_skb);
1465 __kfree_skb(opt_skb);
1466 opt_skb = xchg(&np->pktoptions, NULL);
1474 static int tcp_v6_rcv(struct sk_buff *skb)
1476 const struct tcphdr *th;
1477 const struct ipv6hdr *hdr;
1480 struct net *net = dev_net(skb->dev);
1482 if (skb->pkt_type != PACKET_HOST)
1486 * Count it even if it's bad.
1488 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1490 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1495 if (th->doff < sizeof(struct tcphdr)/4)
1497 if (!pskb_may_pull(skb, th->doff*4))
1500 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1504 hdr = ipv6_hdr(skb);
1505 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1506 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1507 skb->len - th->doff*4);
1508 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1509 TCP_SKB_CB(skb)->when = 0;
1510 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1511 TCP_SKB_CB(skb)->sacked = 0;
1513 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1518 if (sk->sk_state == TCP_TIME_WAIT)
1521 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1522 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1523 goto discard_and_relse;
1526 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1527 goto discard_and_relse;
1529 if (sk_filter(sk, skb))
1530 goto discard_and_relse;
1534 bh_lock_sock_nested(sk);
1536 if (!sock_owned_by_user(sk)) {
1537 #ifdef CONFIG_NET_DMA
1538 struct tcp_sock *tp = tcp_sk(sk);
1539 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1540 tp->ucopy.dma_chan = net_dma_find_channel();
1541 if (tp->ucopy.dma_chan)
1542 ret = tcp_v6_do_rcv(sk, skb);
1546 if (!tcp_prequeue(sk, skb))
1547 ret = tcp_v6_do_rcv(sk, skb);
1549 } else if (unlikely(sk_add_backlog(sk, skb,
1550 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1552 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1553 goto discard_and_relse;
1558 return ret ? -1 : 0;
1561 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1564 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1566 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1568 tcp_v6_send_reset(NULL, skb);
1585 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1586 inet_twsk_put(inet_twsk(sk));
1590 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1591 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1592 inet_twsk_put(inet_twsk(sk));
1596 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1601 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1602 &ipv6_hdr(skb)->saddr, th->source,
1603 &ipv6_hdr(skb)->daddr,
1604 ntohs(th->dest), inet6_iif(skb));
1606 struct inet_timewait_sock *tw = inet_twsk(sk);
1607 inet_twsk_deschedule(tw, &tcp_death_row);
1612 /* Fall through to ACK */
1615 tcp_v6_timewait_ack(sk, skb);
1619 case TCP_TW_SUCCESS:;
1624 static void tcp_v6_early_demux(struct sk_buff *skb)
1626 const struct ipv6hdr *hdr;
1627 const struct tcphdr *th;
1630 if (skb->pkt_type != PACKET_HOST)
1633 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1636 hdr = ipv6_hdr(skb);
1639 if (th->doff < sizeof(struct tcphdr) / 4)
1642 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1643 &hdr->saddr, th->source,
1644 &hdr->daddr, ntohs(th->dest),
1648 skb->destructor = sock_edemux;
1649 if (sk->sk_state != TCP_TIME_WAIT) {
1650 struct dst_entry *dst = sk->sk_rx_dst;
1653 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1655 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1656 skb_dst_set_noref(skb, dst);
1661 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1662 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1663 .twsk_unique = tcp_twsk_unique,
1664 .twsk_destructor= tcp_twsk_destructor,
1667 static const struct inet_connection_sock_af_ops ipv6_specific = {
1668 .queue_xmit = inet6_csk_xmit,
1669 .send_check = tcp_v6_send_check,
1670 .rebuild_header = inet6_sk_rebuild_header,
1671 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1672 .conn_request = tcp_v6_conn_request,
1673 .syn_recv_sock = tcp_v6_syn_recv_sock,
1674 .net_header_len = sizeof(struct ipv6hdr),
1675 .net_frag_header_len = sizeof(struct frag_hdr),
1676 .setsockopt = ipv6_setsockopt,
1677 .getsockopt = ipv6_getsockopt,
1678 .addr2sockaddr = inet6_csk_addr2sockaddr,
1679 .sockaddr_len = sizeof(struct sockaddr_in6),
1680 .bind_conflict = inet6_csk_bind_conflict,
1681 #ifdef CONFIG_COMPAT
1682 .compat_setsockopt = compat_ipv6_setsockopt,
1683 .compat_getsockopt = compat_ipv6_getsockopt,
1687 #ifdef CONFIG_TCP_MD5SIG
1688 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1689 .md5_lookup = tcp_v6_md5_lookup,
1690 .calc_md5_hash = tcp_v6_md5_hash_skb,
1691 .md5_parse = tcp_v6_parse_md5_keys,
1696 * TCP over IPv4 via INET6 API
1699 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1700 .queue_xmit = ip_queue_xmit,
1701 .send_check = tcp_v4_send_check,
1702 .rebuild_header = inet_sk_rebuild_header,
1703 .sk_rx_dst_set = inet_sk_rx_dst_set,
1704 .conn_request = tcp_v6_conn_request,
1705 .syn_recv_sock = tcp_v6_syn_recv_sock,
1706 .net_header_len = sizeof(struct iphdr),
1707 .setsockopt = ipv6_setsockopt,
1708 .getsockopt = ipv6_getsockopt,
1709 .addr2sockaddr = inet6_csk_addr2sockaddr,
1710 .sockaddr_len = sizeof(struct sockaddr_in6),
1711 .bind_conflict = inet6_csk_bind_conflict,
1712 #ifdef CONFIG_COMPAT
1713 .compat_setsockopt = compat_ipv6_setsockopt,
1714 .compat_getsockopt = compat_ipv6_getsockopt,
1718 #ifdef CONFIG_TCP_MD5SIG
1719 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1720 .md5_lookup = tcp_v4_md5_lookup,
1721 .calc_md5_hash = tcp_v4_md5_hash_skb,
1722 .md5_parse = tcp_v6_parse_md5_keys,
1726 /* NOTE: A lot of things set to zero explicitly by call to
1727 * sk_alloc() so need not be done here.
1729 static int tcp_v6_init_sock(struct sock *sk)
1731 struct inet_connection_sock *icsk = inet_csk(sk);
1735 icsk->icsk_af_ops = &ipv6_specific;
1737 #ifdef CONFIG_TCP_MD5SIG
1738 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1744 static void tcp_v6_destroy_sock(struct sock *sk)
1746 tcp_v4_destroy_sock(sk);
1747 inet6_destroy_sock(sk);
1750 #ifdef CONFIG_PROC_FS
1751 /* Proc filesystem TCPv6 sock list dumping. */
1752 static void get_openreq6(struct seq_file *seq,
1753 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1755 int ttd = req->expires - jiffies;
1756 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1757 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1763 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1764 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1766 src->s6_addr32[0], src->s6_addr32[1],
1767 src->s6_addr32[2], src->s6_addr32[3],
1768 ntohs(inet_rsk(req)->loc_port),
1769 dest->s6_addr32[0], dest->s6_addr32[1],
1770 dest->s6_addr32[2], dest->s6_addr32[3],
1771 ntohs(inet_rsk(req)->rmt_port),
1773 0,0, /* could print option size, but that is af dependent. */
1774 1, /* timers active (only the expire timer) */
1775 jiffies_to_clock_t(ttd),
1777 from_kuid_munged(seq_user_ns(seq), uid),
1778 0, /* non standard timer */
1779 0, /* open_requests have no inode */
1783 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1785 const struct in6_addr *dest, *src;
1788 unsigned long timer_expires;
1789 const struct inet_sock *inet = inet_sk(sp);
1790 const struct tcp_sock *tp = tcp_sk(sp);
1791 const struct inet_connection_sock *icsk = inet_csk(sp);
1792 const struct ipv6_pinfo *np = inet6_sk(sp);
1795 src = &np->rcv_saddr;
1796 destp = ntohs(inet->inet_dport);
1797 srcp = ntohs(inet->inet_sport);
1799 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1801 timer_expires = icsk->icsk_timeout;
1802 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1804 timer_expires = icsk->icsk_timeout;
1805 } else if (timer_pending(&sp->sk_timer)) {
1807 timer_expires = sp->sk_timer.expires;
1810 timer_expires = jiffies;
1814 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1815 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1817 src->s6_addr32[0], src->s6_addr32[1],
1818 src->s6_addr32[2], src->s6_addr32[3], srcp,
1819 dest->s6_addr32[0], dest->s6_addr32[1],
1820 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1822 tp->write_seq-tp->snd_una,
1823 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1825 jiffies_delta_to_clock_t(timer_expires - jiffies),
1826 icsk->icsk_retransmits,
1827 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1828 icsk->icsk_probes_out,
1830 atomic_read(&sp->sk_refcnt), sp,
1831 jiffies_to_clock_t(icsk->icsk_rto),
1832 jiffies_to_clock_t(icsk->icsk_ack.ato),
1833 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1835 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1839 static void get_timewait6_sock(struct seq_file *seq,
1840 struct inet_timewait_sock *tw, int i)
1842 const struct in6_addr *dest, *src;
1844 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1845 long delta = tw->tw_ttd - jiffies;
1847 dest = &tw6->tw_v6_daddr;
1848 src = &tw6->tw_v6_rcv_saddr;
1849 destp = ntohs(tw->tw_dport);
1850 srcp = ntohs(tw->tw_sport);
1853 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1854 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1856 src->s6_addr32[0], src->s6_addr32[1],
1857 src->s6_addr32[2], src->s6_addr32[3], srcp,
1858 dest->s6_addr32[0], dest->s6_addr32[1],
1859 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1860 tw->tw_substate, 0, 0,
1861 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1862 atomic_read(&tw->tw_refcnt), tw);
1865 static int tcp6_seq_show(struct seq_file *seq, void *v)
1867 struct tcp_iter_state *st;
1869 if (v == SEQ_START_TOKEN) {
1874 "st tx_queue rx_queue tr tm->when retrnsmt"
1875 " uid timeout inode\n");
1880 switch (st->state) {
1881 case TCP_SEQ_STATE_LISTENING:
1882 case TCP_SEQ_STATE_ESTABLISHED:
1883 get_tcp6_sock(seq, v, st->num);
1885 case TCP_SEQ_STATE_OPENREQ:
1886 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1888 case TCP_SEQ_STATE_TIME_WAIT:
1889 get_timewait6_sock(seq, v, st->num);
1896 static const struct file_operations tcp6_afinfo_seq_fops = {
1897 .owner = THIS_MODULE,
1898 .open = tcp_seq_open,
1900 .llseek = seq_lseek,
1901 .release = seq_release_net
1904 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1907 .seq_fops = &tcp6_afinfo_seq_fops,
1909 .show = tcp6_seq_show,
1913 int __net_init tcp6_proc_init(struct net *net)
1915 return tcp_proc_register(net, &tcp6_seq_afinfo);
1918 void tcp6_proc_exit(struct net *net)
1920 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1924 struct proto tcpv6_prot = {
1926 .owner = THIS_MODULE,
1928 .connect = tcp_v6_connect,
1929 .disconnect = tcp_disconnect,
1930 .accept = inet_csk_accept,
1932 .init = tcp_v6_init_sock,
1933 .destroy = tcp_v6_destroy_sock,
1934 .shutdown = tcp_shutdown,
1935 .setsockopt = tcp_setsockopt,
1936 .getsockopt = tcp_getsockopt,
1937 .recvmsg = tcp_recvmsg,
1938 .sendmsg = tcp_sendmsg,
1939 .sendpage = tcp_sendpage,
1940 .backlog_rcv = tcp_v6_do_rcv,
1941 .release_cb = tcp_release_cb,
1942 .mtu_reduced = tcp_v6_mtu_reduced,
1943 .hash = tcp_v6_hash,
1944 .unhash = inet_unhash,
1945 .get_port = inet_csk_get_port,
1946 .enter_memory_pressure = tcp_enter_memory_pressure,
1947 .sockets_allocated = &tcp_sockets_allocated,
1948 .memory_allocated = &tcp_memory_allocated,
1949 .memory_pressure = &tcp_memory_pressure,
1950 .orphan_count = &tcp_orphan_count,
1951 .sysctl_wmem = sysctl_tcp_wmem,
1952 .sysctl_rmem = sysctl_tcp_rmem,
1953 .max_header = MAX_TCP_HEADER,
1954 .obj_size = sizeof(struct tcp6_sock),
1955 .slab_flags = SLAB_DESTROY_BY_RCU,
1956 .twsk_prot = &tcp6_timewait_sock_ops,
1957 .rsk_prot = &tcp6_request_sock_ops,
1958 .h.hashinfo = &tcp_hashinfo,
1959 .no_autobind = true,
1960 #ifdef CONFIG_COMPAT
1961 .compat_setsockopt = compat_tcp_setsockopt,
1962 .compat_getsockopt = compat_tcp_getsockopt,
1964 #ifdef CONFIG_MEMCG_KMEM
1965 .proto_cgroup = tcp_proto_cgroup,
1969 static const struct inet6_protocol tcpv6_protocol = {
1970 .early_demux = tcp_v6_early_demux,
1971 .handler = tcp_v6_rcv,
1972 .err_handler = tcp_v6_err,
1973 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1976 static struct inet_protosw tcpv6_protosw = {
1977 .type = SOCK_STREAM,
1978 .protocol = IPPROTO_TCP,
1979 .prot = &tcpv6_prot,
1980 .ops = &inet6_stream_ops,
1982 .flags = INET_PROTOSW_PERMANENT |
1986 static int __net_init tcpv6_net_init(struct net *net)
1988 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1989 SOCK_RAW, IPPROTO_TCP, net);
1992 static void __net_exit tcpv6_net_exit(struct net *net)
1994 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1997 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1999 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2002 static struct pernet_operations tcpv6_net_ops = {
2003 .init = tcpv6_net_init,
2004 .exit = tcpv6_net_exit,
2005 .exit_batch = tcpv6_net_exit_batch,
2008 int __init tcpv6_init(void)
2012 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2016 /* register inet6 protocol */
2017 ret = inet6_register_protosw(&tcpv6_protosw);
2019 goto out_tcpv6_protocol;
2021 ret = register_pernet_subsys(&tcpv6_net_ops);
2023 goto out_tcpv6_protosw;
2028 inet6_unregister_protosw(&tcpv6_protosw);
2030 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2034 void tcpv6_exit(void)
2036 unregister_pernet_subsys(&tcpv6_net_ops);
2037 inet6_unregister_protosw(&tcpv6_protosw);
2038 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);