From b416c144f46af1a30ddfa4e4319a8f077381ad63 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Oct 2013 13:14:53 +0100 Subject: [PATCH] netfilter: x_tables: fix ordering of jumpstack allocation and table update During kernel stability testing on an SMP ARMv7 system, Yalin Wang reported the following panic from the netfilter code: 1fe0: 0000001c 5e2d3b10 4007e779 4009e110 60000010 00000032 ff565656 ff545454 [] (ipt_do_table+0x448/0x584) from [] (nf_iterate+0x48/0x7c) [] (nf_iterate+0x48/0x7c) from [] (nf_hook_slow+0x58/0x104) [] (nf_hook_slow+0x58/0x104) from [] (ip_local_deliver+0x88/0xa8) [] (ip_local_deliver+0x88/0xa8) from [] (ip_rcv_finish+0x418/0x43c) [] (ip_rcv_finish+0x418/0x43c) from [] (__netif_receive_skb+0x4cc/0x598) [] (__netif_receive_skb+0x4cc/0x598) from [] (process_backlog+0x84/0x158) [] (process_backlog+0x84/0x158) from [] (net_rx_action+0x70/0x1dc) [] (net_rx_action+0x70/0x1dc) from [] (__do_softirq+0x11c/0x27c) [] (__do_softirq+0x11c/0x27c) from [] (do_softirq+0x44/0x50) [] (do_softirq+0x44/0x50) from [] (local_bh_enable_ip+0x8c/0xd0) [] (local_bh_enable_ip+0x8c/0xd0) from [] (inet_stream_connect+0x164/0x298) [] (inet_stream_connect+0x164/0x298) from [] (sys_connect+0x88/0xc8) [] (sys_connect+0x88/0xc8) from [] (ret_fast_syscall+0x0/0x30) Code: 2a000021 e59d2028 e59de01c e59f011c (e7824103) ---[ end trace da227214a82491bd ]--- Kernel panic - not syncing: Fatal exception in interrupt This comes about because CPU1 is executing xt_replace_table in response to a setsockopt syscall, resulting in: ret = xt_jumpstack_alloc(newinfo); --> newinfo->jumpstack = kzalloc(size, GFP_KERNEL); [...] table->private = newinfo; newinfo->initial_entries = private->initial_entries; Meanwhile, CPU0 is handling the network receive path and ends up in ipt_do_table, resulting in: private = table->private; [...] jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; On weakly ordered memory architectures, the writes to table->private and newinfo->jumpstack from CPU1 can be observed out of order by CPU0. Furthermore, on architectures which don't respect ordering of address dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered. This patch adds an smp_wmb() before the assignment to table->private (which is essentially publishing newinfo) to ensure that all writes to newinfo will be observed before plugging it into the table structure. A dependent-read barrier is also added on the consumer sides, to ensure the same ordering requirements are also respected there. Cc: Paul E. McKenney Reported-by: Wang, Yalin Tested-by: Wang, Yalin Signed-off-by: Will Deacon Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 5 +++++ net/ipv4/netfilter/ip_tables.c | 5 +++++ net/ipv6/netfilter/ip6_tables.c | 5 +++++ net/netfilter/x_tables.c | 7 ++++++- 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21..59da7cd 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d23118d9..718dfbd 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb, addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c2..710238f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028..227aa11 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table, return NULL; } - table->private = newinfo; newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; /* * Even though table entries have now been swapped, other CPU's -- 2.7.4