From aa1330766c49199bdab4d4a9096d98b072df9044 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 2 Sep 2009 23:45:45 -0700 Subject: [PATCH] tcp: replace hard coded GFP_KERNEL with sk_allocation This fixed a lockdep warning which appeared when doing stress memory tests over NFS: inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. page reclaim => nfs_writepage => tcp_sendmsg => lock sk_lock mount_root => nfs_root_data => tcp_close => lock sk_lock => tcp_send_fin => alloc_skb_fclone => page reclaim David raised a concern that if the allocation fails in tcp_send_fin(), and it's GFP_ATOMIC, we are going to yield() (which sleeps) and loop endlessly waiting for the allocation to succeed. But fact is, the original GFP_KERNEL also sleeps. GFP_ATOMIC+yield() looks weird, but it is no worse the implicit sleep inside GFP_KERNEL. Both could loop endlessly under memory pressure. CC: Arnaldo Carvalho de Melo CC: David S. Miller CC: Herbert Xu Signed-off-by: Wu Fengguang Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 10 +++++----- net/ipv4/tcp_ipv4.c | 7 ++++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 5 +++-- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index df50bc4..b71a446 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1186,7 +1186,7 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); +extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 59f69a6..edeea06 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_KERNEL); + tcp_send_active_reset(sk, sk->sk_allocation); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void) EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) +static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; struct tcp_md5sig_pool **pool; @@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), sk->sk_allocation); if (!p) goto out_free; *per_cpu_ptr(pool, cpu) = p; @@ -2688,7 +2688,7 @@ out_free: return NULL; } -struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) +struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) { struct tcp_md5sig_pool **pool; int alloc = 0; @@ -2709,7 +2709,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); + struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); if (!p) { tcp_md5sig_users--; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ce7d3b0..0543561 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -886,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - if (tcp_alloc_md5sig_pool() == NULL) { + if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -1007,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!tcp_sk(sk)->md5sig_info) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct tcp_md5sig_info *p; + p = kzalloc(sizeof(*p), sk->sk_allocation); if (!p) return -EINVAL; @@ -1016,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); if (!newkey) return -ENOMEM; return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6c8b422..e48c37d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (key != NULL) { memcpy(&tcptw->tw_md5_key, key->key, key->keylen); tcptw->tw_md5_keylen = key->keylen; - if (tcp_alloc_md5sig_pool() == NULL) + if (tcp_alloc_md5sig_pool(sk) == NULL) BUG(); } } while (0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e00442..5200aab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2135,7 +2135,8 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk->sk_allocation); if (skb) break; yield(); @@ -2388,7 +2389,7 @@ int tcp_connect(struct sock *sk) sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); - tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d73617e..65aecf2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -591,7 +591,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, } sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - if (tcp_alloc_md5sig_pool() == NULL) { + if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } -- 2.7.4