From baebdf48c360080710f80699eea3affbb13d6c65 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 12 Feb 2022 00:38:38 +0100 Subject: [PATCH] net: dev: Makes sure netif_rx() can be invoked in any context. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Dave suggested a while ago (eleven years by now) "Let's make netif_rx() work in all contexts and get rid of netif_rx_ni()". Eric agreed and pointed out that modern devices should use netif_receive_skb() to avoid the overhead. In the meantime someone added another variant, netif_rx_any_context(), which behaves as suggested. netif_rx() must be invoked with disabled bottom halves to ensure that pending softirqs, which were raised within the function, are handled. netif_rx_ni() can be invoked only from process context (bottom halves must be enabled) because the function handles pending softirqs without checking if bottom halves were disabled or not. netif_rx_any_context() invokes on the former functions by checking in_interrupts(). netif_rx() could be taught to handle both cases (disabled and enabled bottom halves) by simply disabling bottom halves while invoking netif_rx_internal(). The local_bh_enable() invocation will then invoke pending softirqs only if the BH-disable counter drops to zero. Eric is concerned about the overhead of BH-disable+enable especially in regard to the loopback driver. As critical as this driver is, it will receive a shortcut to avoid the additional overhead which is not needed. Add a local_bh_disable() section in netif_rx() to ensure softirqs are handled if needed. Provide __netif_rx() which does not disable BH and has a lockdep assert to ensure that interrupts are disabled. Use this shortcut in the loopback driver and in drivers/net/*.c. Make netif_rx_ni() and netif_rx_any_context() invoke netif_rx() so they can be removed once they are no more users left. Link: https://lkml.kernel.org/r/20100415.020246.218622820.davem@davemloft.net Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- drivers/net/amt.c | 4 +-- drivers/net/geneve.c | 4 +-- drivers/net/gtp.c | 2 +- drivers/net/loopback.c | 4 +-- drivers/net/macsec.c | 6 ++--- drivers/net/macvlan.c | 4 +-- drivers/net/mhi_net.c | 2 +- drivers/net/ntb_netdev.c | 2 +- drivers/net/rionet.c | 2 +- drivers/net/sb1000.c | 2 +- drivers/net/veth.c | 2 +- drivers/net/vrf.c | 2 +- drivers/net/vxlan.c | 2 +- include/linux/netdevice.h | 14 ++++++++-- include/trace/events/net.h | 14 ---------- net/core/dev.c | 67 ++++++++++++++++++++-------------------------- 16 files changed, 60 insertions(+), 73 deletions(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index f1a36d7..10455c9 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -2373,7 +2373,7 @@ static bool amt_membership_query_handler(struct amt_dev *amt, skb->pkt_type = PACKET_MULTICAST; skb->ip_summed = CHECKSUM_NONE; len = skb->len; - if (netif_rx(skb) == NET_RX_SUCCESS) { + if (__netif_rx(skb) == NET_RX_SUCCESS) { amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true); dev_sw_netstats_rx_add(amt->dev, len); } else { @@ -2470,7 +2470,7 @@ report: skb->pkt_type = PACKET_MULTICAST; skb->ip_summed = CHECKSUM_NONE; len = skb->len; - if (netif_rx(skb) == NET_RX_SUCCESS) { + if (__netif_rx(skb) == NET_RX_SUCCESS) { amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_UPDATE, true); dev_sw_netstats_rx_add(amt->dev, len); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c1fdd72..a895ff7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -925,7 +925,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } skb->protocol = eth_type_trans(skb, geneve->dev); - netif_rx(skb); + __netif_rx(skb); dst_release(&rt->dst); return -EMSGSIZE; } @@ -1021,7 +1021,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, } skb->protocol = eth_type_trans(skb, geneve->dev); - netif_rx(skb); + __netif_rx(skb); dst_release(dst); return -EMSGSIZE; } diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 24e5c54..bf087171 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -207,7 +207,7 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, dev_sw_netstats_rx_add(pctx->dev, skb->len); - netif_rx(skb); + __netif_rx(skb); return 0; err: diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index ed0edf5..d05f86f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -78,7 +78,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb_orphan(skb); - /* Before queueing this packet to netif_rx(), + /* Before queueing this packet to __netif_rx(), * make sure dst is refcounted. */ skb_dst_force(skb); @@ -86,7 +86,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb->protocol = eth_type_trans(skb, dev); len = skb->len; - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) dev_lstats_add(dev, len); return NETDEV_TX_OK; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d08743..832f09a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1033,7 +1033,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) else nskb->pkt_type = PACKET_MULTICAST; - netif_rx(nskb); + __netif_rx(nskb); } continue; } @@ -1056,7 +1056,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) nskb->dev = ndev; - if (netif_rx(nskb) == NET_RX_SUCCESS) { + if (__netif_rx(nskb) == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); @@ -1288,7 +1288,7 @@ nosci: macsec_reset_skb(nskb, macsec->secy.netdev); - ret = netif_rx(nskb); + ret = __netif_rx(nskb); if (ret == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUnknownSCI++; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6ef5f77..d87c06c 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -410,7 +410,7 @@ static void macvlan_forward_source_one(struct sk_buff *skb, if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, dev->dev_addr)) nskb->pkt_type = PACKET_HOST; - ret = netif_rx(nskb); + ret = __netif_rx(nskb); macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false); } @@ -468,7 +468,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) /* forward to original port. */ vlan = src; ret = macvlan_broadcast_one(skb, vlan, eth, 0) ?: - netif_rx(skb); + __netif_rx(skb); handle_res = RX_HANDLER_CONSUMED; goto out; } diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index aaa628f..0b1b6f6 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -225,7 +225,7 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, u64_stats_inc(&mhi_netdev->stats.rx_packets); u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - netif_rx(skb); + __netif_rx(skb); } /* Refill if RX buffers queue becomes low */ diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 98ca6b184..80bdc07 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -119,7 +119,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, skb->protocol = eth_type_trans(skb, ndev); skb->ip_summed = CHECKSUM_NONE; - if (netif_rx(skb) == NET_RX_DROP) { + if (__netif_rx(skb) == NET_RX_DROP) { ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } else { diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 1a95f3b..39e61e0 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -109,7 +109,7 @@ static int rionet_rx_clean(struct net_device *ndev) skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE); rnet->rx_skb[i]->protocol = eth_type_trans(rnet->rx_skb[i], ndev); - error = netif_rx(rnet->rx_skb[i]); + error = __netif_rx(rnet->rx_skb[i]); if (error == NET_RX_DROP) { ndev->stats.rx_dropped++; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index 57a6d59..c3f8020 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -872,7 +872,7 @@ printk("cm0: IP identification: %02x%02x fragment offset: %02x%02x\n", buffer[3 /* datagram completed: send to upper level */ skb_trim(skb, dlen); - netif_rx(skb); + __netif_rx(skb); stats->rx_bytes+=dlen; stats->rx_packets++; lp->rx_skb[ns] = NULL; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index d29fb97..58b20ea 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -287,7 +287,7 @@ static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, { return __dev_forward_skb(dev, skb) ?: xdp ? veth_xdp_rx(rq, skb) : - netif_rx(skb); + __netif_rx(skb); } /* return true if the specified skb has chances of GRO aggregation diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e0b1ab9..714cafc 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -418,7 +418,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) vrf_rx_stats(dev, len); else this_cpu_inc(dev->dstats->rx_drps); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 359d167..d0dc90d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2541,7 +2541,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, tx_stats->tx_bytes += len; u64_stats_update_end(&tx_stats->syncp); - if (netif_rx(skb) == NET_RX_SUCCESS) { + if (__netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += len; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5f6e2c0..e99db83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3672,8 +3672,18 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); -int netif_rx_ni(struct sk_buff *skb); -int netif_rx_any_context(struct sk_buff *skb); +int __netif_rx(struct sk_buff *skb); + +static inline int netif_rx_ni(struct sk_buff *skb) +{ + return netif_rx(skb); +} + +static inline int netif_rx_any_context(struct sk_buff *skb) +{ + return netif_rx(skb); +} + int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 78c448c..032b431 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -260,13 +260,6 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, TP_ARGS(skb) ); -DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry, - - TP_PROTO(const struct sk_buff *skb), - - TP_ARGS(skb) -); - DECLARE_EVENT_CLASS(net_dev_rx_exit_template, TP_PROTO(int ret), @@ -312,13 +305,6 @@ DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_exit, TP_ARGS(ret) ); -DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_ni_exit, - - TP_PROTO(int ret), - - TP_ARGS(ret) -); - DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_list_exit, TP_PROTO(int ret), diff --git a/net/core/dev.c b/net/core/dev.c index 441bacf..ce5047e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4816,65 +4816,56 @@ static int netif_rx_internal(struct sk_buff *skb) } /** + * __netif_rx - Slightly optimized version of netif_rx + * @skb: buffer to post + * + * This behaves as netif_rx except that it does not disable bottom halves. + * As a result this function may only be invoked from the interrupt context + * (either hard or soft interrupt). + */ +int __netif_rx(struct sk_buff *skb) +{ + int ret; + + lockdep_assert_once(hardirq_count() | softirq_count()); + + trace_netif_rx_entry(skb); + ret = netif_rx_internal(skb); + trace_netif_rx_exit(ret); + return ret; +} +EXPORT_SYMBOL(__netif_rx); + +/** * netif_rx - post buffer to the network code * @skb: buffer to post * * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. + * the upper (protocol) levels to process via the backlog NAPI device. It + * always succeeds. The buffer may be dropped during processing for + * congestion control or by the protocol layers. + * The network buffer is passed via the backlog NAPI device. Modern NIC + * driver should use NAPI and GRO. + * This function can used from any context. * * return values: * NET_RX_SUCCESS (no congestion) * NET_RX_DROP (packet was dropped) * */ - int netif_rx(struct sk_buff *skb) { int ret; + local_bh_disable(); trace_netif_rx_entry(skb); - ret = netif_rx_internal(skb); trace_netif_rx_exit(ret); - + local_bh_enable(); return ret; } EXPORT_SYMBOL(netif_rx); -int netif_rx_ni(struct sk_buff *skb) -{ - int err; - - trace_netif_rx_ni_entry(skb); - - preempt_disable(); - err = netif_rx_internal(skb); - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); - trace_netif_rx_ni_exit(err); - - return err; -} -EXPORT_SYMBOL(netif_rx_ni); - -int netif_rx_any_context(struct sk_buff *skb) -{ - /* - * If invoked from contexts which do not invoke bottom half - * processing either at return from interrupt or when softrqs are - * reenabled, use netif_rx_ni() which invokes bottomhalf processing - * directly. - */ - if (in_interrupt()) - return netif_rx(skb); - else - return netif_rx_ni(skb); -} -EXPORT_SYMBOL(netif_rx_any_context); - static __latent_entropy void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); -- 2.7.4