Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[platform/kernel/linux-rpi.git] / net / ipv4 / tcp_ipv4.c
index fec656f..6873f46 100644 (file)
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
 static u32 tcp_v4_init_seq(const struct sk_buff *skb)
 {
        return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        struct rtable *rt;
        int err;
        struct ip_options_rcu *inet_opt;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
 
        if (addr_len < sizeof(struct sockaddr_in))
                return -EINVAL;
@@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        arg.tos = ip_hdr(skb)->tos;
        arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        if (sk) {
                ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                   inet_twsk(sk)->tw_mark : sk->sk_mark;
@@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
        local_bh_enable();
@@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
        arg.tos = tos;
        arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                           inet_twsk(sk)->tw_mark : sk->sk_mark;
        ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        local_bh_enable();
 }
@@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot);
 
 static void __net_exit tcp_sk_exit(struct net *net)
 {
-       int cpu;
+       struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
 
        if (net->ipv4.tcp_congestion_control)
                bpf_module_put(net->ipv4.tcp_congestion_control,
                               net->ipv4.tcp_congestion_control->owner);
-
-       for_each_possible_cpu(cpu)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
-       free_percpu(net->ipv4.tcp_sk);
+       if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+               kfree(tcp_death_row);
 }
 
 static int __net_init tcp_sk_init(struct net *net)
 {
-       int res, cpu, cnt;
-
-       net->ipv4.tcp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.tcp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(cpu) {
-               struct sock *sk;
-
-               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
-                                          IPPROTO_TCP, net);
-               if (res)
-                       goto fail;
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
-               /* Please enforce IP_DF and IPID==0 for RST and
-                * ACK sent in SYN-RECV and TIME-WAIT state.
-                */
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
-               *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
-       }
+       int cnt;
 
        net->ipv4.sysctl_tcp_ecn = 2;
        net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_tw_reuse = 2;
        net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
 
+       net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+       if (!net->ipv4.tcp_death_row)
+               return -ENOMEM;
+       refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
        cnt = tcp_hashinfo.ehash_mask + 1;
-       net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
-       net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+       net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+       net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
 
        net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
        net->ipv4.sysctl_tcp_sack = 1;
@@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net)
                net->ipv4.tcp_congestion_control = &tcp_reno;
 
        return 0;
-fail:
-       tcp_sk_exit(net);
-
-       return res;
 }
 
 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
-       inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
        list_for_each_entry(net, net_exit_list, exit_list)
                tcp_fastopen_ctx_destroy(net);
 }
@@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void)
 
 void __init tcp_v4_init(void)
 {
+       int cpu, res;
+
+       for_each_possible_cpu(cpu) {
+               struct sock *sk;
+
+               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+                                          IPPROTO_TCP, &init_net);
+               if (res)
+                       panic("Failed to create the TCP control socket.\n");
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+               /* Please enforce IP_DF and IPID==0 for RST and
+                * ACK sent in SYN-RECV and TIME-WAIT state.
+                */
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+               per_cpu(ipv4_tcp_sk, cpu) = sk;
+       }
        if (register_pernet_subsys(&tcp_sk_ops))
                panic("Failed to create the TCP control socket.\n");