We used a global BH disable in LOCAL_OUT hook.
Add _bh suffix to all places that need it and remove
the disabling from LOCAL_OUT and sync code.
Functions like ip_defrag need protection from
BH, so add it. As for nf_nat_mangle_tcp_packet, it needs
RCU lock.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
unsigned int flag, __u32 seq, int diff)
{
/* spinlock is to keep updating cp->flags atomic */
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
vseq->previous_delta = vseq->delta;
vseq->delta += diff;
vseq->init_seq = seq;
cp->flags |= flag;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-static inline void ct_write_lock(unsigned int key)
+static inline void ct_write_lock_bh(unsigned int key)
{
- spin_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_write_unlock(unsigned int key)
+static inline void ct_write_unlock_bh(unsigned int key)
{
- spin_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
/* Hash by protocol, client address and port */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
}
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
return ret;
}
/* unhash it and decrease its reference counter */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = 0;
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
return ret;
}
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = atomic_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
return ret;
}
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
atomic_dec(&ip_vs_conn_no_cport_cnt);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* hash on new dport */
ip_vs_conn_hash(cp);
if (dest) {
struct ip_vs_proto_data *pd;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->dest) {
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
rcu_read_unlock();
return;
}
ip_vs_unbind_app(cp);
ip_vs_bind_dest(cp, dest);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* Update its packet transmitter */
cp->packet_xmit = NULL;
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- int err = ip_defrag(skb, user);
+ int err;
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET6);
}
#endif
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET6);
}
#endif
* hopefully it will succeed on the retransmitted
* packet.
*/
+ rcu_read_lock();
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start-data, end-start,
buf, buf_len);
+ rcu_read_unlock();
if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_lblc_new(tbl, &iph.daddr, dest);
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
sysctl_lblcr_expiration(svc))) {
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
if (atomic_read(&en->set.size) > 1) {
struct ip_vs_dest *m;
if (m)
ip_vs_dest_set_erase(&en->set, m);
}
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
}
/* If the destination is not overloaded, use it */
}
/* Update our cache entry */
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_dest_set_insert(&en->set, dest, true);
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
goto out;
}
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_lblcr_new(tbl, &iph.daddr, dest);
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_sctp_state(pd, cp, direction, skb);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 sctp_app_hashkey(__be16 port)
if (th == NULL)
return;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_tcp_state(pd, cp, direction, th);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 tcp_app_hashkey(__be16 port)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
: tcp_timeouts[IP_VS_TCP_S_LISTEN]);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
/* ---------------------------------------------
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
p = (struct list_head *) svc->sched_data;
last = dest = list_entry(p, struct ip_vs_dest, n_list);
} while (pass < 2 && p != &svc->destinations);
stop:
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
out:
svc->sched_data = &dest->n_list;
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
return;
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
if (!buff) {
buff = ip_vs_sync_buff_create_v0(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
cp = cp->control;
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
if (!buff) {
buff = ip_vs_sync_buff_create(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
}
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
kfree(param->pe_data);
dest = cp->dest;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
!(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
if (flags & IP_VS_CONN_F_INACTIVE) {
flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
cp->flags = flags;
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
if (!dest)
ip_vs_try_bind_dest(cp);
} else {
break;
}
- /* disable bottom half, because it accesses the data
- shared by softirq while getting/creating conns */
- local_bh_disable();
ip_vs_process_message(tinfo->net, tinfo->buf, len);
- local_bh_enable();
}
}
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- spin_lock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
dest = mark->cl;
/* No available dests? */
if (mark->mw == 0)
mark->cl = dest;
out:
- spin_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
return dest;
err_noavail:
rt = (struct rtable *) dest_dst->dst_cache;
else {
dest_dst = ip_vs_dest_dst_alloc();
- spin_lock(&dest->dst_lock);
+ spin_lock_bh(&dest->dst_lock);
if (!dest_dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
goto err_unreach;
}
rt = do_output_route4(net, dest->addr.ip, rt_mode,
&dest_dst->dst_saddr.ip);
if (!rt) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
&dest->addr.ip, &dest_dst->dst_saddr.ip,
atomic_read(&rt->dst.__refcnt));
u32 cookie;
dest_dst = ip_vs_dest_dst_alloc();
- spin_lock(&dest->dst_lock);
+ spin_lock_bh(&dest->dst_lock);
if (!dest_dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
goto err_unreach;
}
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
do_xfrm);
if (!dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
rt = (struct rt6_info *) dst;
cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
&dest->addr.in6, &dest_dst->dst_saddr.in6,
atomic_read(&rt->dst.__refcnt));