From c1b3bdb2ffa9a343ded117e01d6b4b43b9df53f8 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Tue, 31 Jul 2018 11:39:42 -0400 Subject: [PATCH] be2net: gather debug info and reset adapter (only for Lancer) on a tx-timeout This patch handles a TX-timeout as follows: 1) This patch gathers and prints the following info that can help in diagnosing the cause of a TX-timeout. a) TX queue and completion queue entries. b) SKB and TCP/UDP header details. 2) For Lancer NICs (TX-timeout recovery is not supported for BE3/Skyhawk-R NICs), it recovers from the TX timeout as follows: a) On a TX-timeout, driver sets the PHYSDEV_CONTROL_FW_RESET_MASK bit in the PHYSDEV_CONTROL register. Lancer firmware goes into an error state and indicates this back to the driver via a bit in a doorbell register. b) Driver detects this and calls be_err_recover(). DMA is disabled, all pending TX skbs are unmapped and freed (be_close()). All rings are destroyed (be_clear()). c) The driver waits for the FW to re-initialize and re-creates all rings along with other data structs (be_resume()) Signed-off-by: Suresh Reddy Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 80 ++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 05e4c0b..580cdec 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1412,6 +1412,83 @@ drop: return NETDEV_TX_OK; } +static void be_tx_timeout(struct net_device *netdev) +{ + struct be_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->pdev->dev; + struct be_tx_obj *txo; + struct sk_buff *skb; + struct tcphdr *tcphdr; + struct udphdr *udphdr; + u32 *entry; + int status; + int i, j; + + for_all_tx_queues(adapter, txo, i) { + dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n", + i, txo->q.head, txo->q.tail, + atomic_read(&txo->q.used), txo->q.id); + + entry = txo->q.dma_mem.va; + for (j = 0; j < TX_Q_LEN * 4; j += 4) { + if (entry[j] != 0 || entry[j + 1] != 0 || + entry[j + 2] != 0 || entry[j + 3] != 0) { + dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n", + j, entry[j], entry[j + 1], + entry[j + 2], entry[j + 3]); + } + } + + entry = txo->cq.dma_mem.va; + dev_info(dev, "TXCQ Dump: %d H: %d T: %d used: %d\n", + i, txo->cq.head, txo->cq.tail, + atomic_read(&txo->cq.used)); + for (j = 0; j < TX_CQ_LEN * 4; j += 4) { + if (entry[j] != 0 || entry[j + 1] != 0 || + entry[j + 2] != 0 || entry[j + 3] != 0) { + dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n", + j, entry[j], entry[j + 1], + entry[j + 2], entry[j + 3]); + } + } + + for (j = 0; j < TX_Q_LEN; j++) { + if (txo->sent_skb_list[j]) { + skb = txo->sent_skb_list[j]; + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + tcphdr = tcp_hdr(skb); + dev_info(dev, "TCP source port %d\n", + ntohs(tcphdr->source)); + dev_info(dev, "TCP dest port %d\n", + ntohs(tcphdr->dest)); + dev_info(dev, "TCP seqence num %d\n", + ntohs(tcphdr->seq)); + dev_info(dev, "TCP ack_seq %d\n", + ntohs(tcphdr->ack_seq)); + } else if (ip_hdr(skb)->protocol == + IPPROTO_UDP) { + udphdr = udp_hdr(skb); + dev_info(dev, "UDP source port %d\n", + ntohs(udphdr->source)); + dev_info(dev, "UDP dest port %d\n", + ntohs(udphdr->dest)); + } + dev_info(dev, "skb[%d] %p len %d proto 0x%x\n", + j, skb, skb->len, skb->protocol); + } + } + } + + if (lancer_chip(adapter)) { + dev_info(dev, "Initiating reset due to tx timeout\n"); + dev_info(dev, "Resetting adapter\n"); + status = lancer_physdev_ctrl(adapter, + PHYSDEV_CONTROL_FW_RESET_MASK); + if (status) + dev_err(dev, "Reset failed .. Reboot server\n"); + } +} + static inline bool be_in_all_promisc(struct be_adapter *adapter) { return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) == @@ -3274,7 +3351,7 @@ void be_detect_error(struct be_adapter *adapter) /* Do not log error messages if its a FW reset */ if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && sliport_err2 == SLIPORT_ERROR_FW_RESET2) { - dev_info(dev, "Firmware update in progress\n"); + dev_info(dev, "Reset is in progress\n"); } else { dev_err(dev, "Error detected in the card\n"); dev_err(dev, "ERR: sliport status 0x%x\n", @@ -5218,6 +5295,7 @@ static const struct net_device_ops be_netdev_ops = { .ndo_get_vf_config = be_get_vf_config, .ndo_set_vf_link_state = be_set_vf_link_state, .ndo_set_vf_spoofchk = be_set_vf_spoofchk, + .ndo_tx_timeout = be_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = be_netpoll, #endif -- 2.7.4