From: Brett Creeley Date: Wed, 26 Jun 2019 09:20:19 +0000 (-0700) Subject: ice: Only bump Rx tail and release buffers once per napi_poll X-Git-Tag: v5.15~5506^2~311^2~8 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cb7db35641c9a508247bdcd73831c855c8758cd3;p=platform%2Fkernel%2Flinux-starfive.git ice: Only bump Rx tail and release buffers once per napi_poll Currently we bump the Rx tail and release/give buffers to hardware every 16 descriptors. This causes us to bump Rx tail up to 4 times per napi_poll call. Also we are always bumping tail on an odd index and this is a problem because hardware ignores the lower 3 bits in the QRX_TAIL register. This is making it so hardware sees tail bumps only every 8 descriptors. Instead lets only bump Rx tail once per napi_poll if the value aligns with hardware's expectations of the lower 3 bits being cleared. Also only release/give Rx buffers once per napi_poll call. Signed-off-by: Brett Creeley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dd7392f..0c45930 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -377,18 +377,28 @@ err: */ static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) { + u16 prev_ntu = rx_ring->next_to_use; + rx_ring->next_to_use = val; /* update next to alloc since we have filled the ring */ rx_ring->next_to_alloc = val; - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). + /* QRX_TAIL will be updated with any tail value, but hardware ignores + * the lower 3 bits. This makes it so we only bump tail on meaningful + * boundaries. Also, this allows us to bump tail on intervals of 8 up to + * the budget depending on the current traffic load. */ - wmb(); - writel(val, rx_ring->tail); + val &= ~0x7; + if (prev_ntu != val) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); + } } /** @@ -445,7 +455,13 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace * - * Returns false if all allocations were successful, true if any fail + * Returns false if all allocations were successful, true if any fail. Returning + * true signals to the caller that we didn't replace cleaned_count buffers and + * there is more work to do. + * + * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx + * buffers. Then bump tail at most one time. Grouping like this lets us avoid + * multiple tail writes per call. */ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) { @@ -990,7 +1006,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); - bool failure = false; + bool failure; /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { @@ -1002,13 +1018,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) u16 vlan_tag = 0; u8 rx_ptype; - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= ICE_RX_BUF_WRITE) { - failure = failure || - ice_alloc_rx_bufs(rx_ring, cleaned_count); - cleaned_count = 0; - } - /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); @@ -1085,6 +1094,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) total_rx_pkts++; } + /* return up to cleaned_count buffers to hardware */ + failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); + /* update queue and vector specific stats */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.pkts += total_rx_pkts;