octeontx2-pf: Use hardware register for CQE count
authorGeetha sowjanya <gakula@marvell.com>
Tue, 28 Sep 2021 05:55:26 +0000 (11:25 +0530)
committerDavid S. Miller <davem@davemloft.net>
Tue, 28 Sep 2021 13:10:24 +0000 (14:10 +0100)
Current driver uses software CQ head pointer to poll on CQE
header in memory to determine if CQE is valid. Software needs
to make sure, that the reads of the CQE do not get re-ordered
so much that it ends up with an inconsistent view of the CQE.
To ensure that DMB barrier after read to first CQE cacheline
and before reading of the rest of the CQE is needed.
But having barrier for every CQE read will impact the performance,
instead use hardware CQ head and tail pointers to find the
valid number of CQEs.

Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
include/linux/soc/marvell/octeontx2/asm.h

index 78df173e6df240f6bd5b65f84516defcac380b94..4c3dbade8cfb569ef771e45d21f02f87fc81f544 100644 (file)
@@ -1006,6 +1006,9 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
                        return err;
        }
 
+       pfvf->cq_op_addr = (__force u64 *)otx2_get_regaddr(pfvf,
+                                                          NIX_LF_CQ_OP_STATUS);
+
        /* Initialize work queue for receive buffer refill */
        pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
                                        sizeof(struct refill_work), GFP_KERNEL);
index 0a792fce55f18a3eaaa98aca865a3a16ca316729..069d1b925102030854ac1d2903a33ab5450eab95 100644 (file)
@@ -343,6 +343,7 @@ struct otx2_nic {
 #define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED  BIT_ULL(13)
 #define OTX2_FLAG_DMACFLTR_SUPPORT             BIT_ULL(14)
        u64                     flags;
+       u64                     *cq_op_addr;
 
        struct otx2_qset        qset;
        struct otx2_hw          hw;
index f42b1d4e0c679ea0555de7260d3d0f96631e8d71..3f3ec8ffc4ddfc95a2f1f9f53f241a1e70f8d363 100644 (file)
 
 #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
 
+static int otx2_nix_cq_op_status(struct otx2_nic *pfvf,
+                                struct otx2_cq_queue *cq)
+{
+       u64 incr = (u64)(cq->cq_idx) << 32;
+       u64 status;
+
+       status = otx2_atomic64_fetch_add(incr, pfvf->cq_op_addr);
+
+       if (unlikely(status & BIT_ULL(CQ_OP_STAT_OP_ERR) ||
+                    status & BIT_ULL(CQ_OP_STAT_CQ_ERR))) {
+               dev_err(pfvf->dev, "CQ stopped due to error");
+               return -EINVAL;
+       }
+
+       cq->cq_tail = status & 0xFFFFF;
+       cq->cq_head = (status >> 20) & 0xFFFFF;
+       if (cq->cq_tail < cq->cq_head)
+               cq->pend_cqe = (cq->cqe_cnt - cq->cq_head) +
+                               cq->cq_tail;
+       else
+               cq->pend_cqe = cq->cq_tail - cq->cq_head;
+
+       return 0;
+}
+
 static struct nix_cqe_hdr_s *otx2_get_next_cqe(struct otx2_cq_queue *cq)
 {
        struct nix_cqe_hdr_s *cqe_hdr;
@@ -318,7 +343,14 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
        struct nix_cqe_rx_s *cqe;
        int processed_cqe = 0;
 
-       while (likely(processed_cqe < budget)) {
+       if (cq->pend_cqe >= budget)
+               goto process_cqe;
+
+       if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+               return 0;
+
+process_cqe:
+       while (likely(processed_cqe < budget) && cq->pend_cqe) {
                cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head);
                if (cqe->hdr.cqe_type == NIX_XQE_TYPE_INVALID ||
                    !cqe->sg.seg_addr) {
@@ -334,6 +366,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
                cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
                cqe->sg.seg_addr = 0x00;
                processed_cqe++;
+               cq->pend_cqe--;
        }
 
        /* Free CQEs to HW */
@@ -368,7 +401,14 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
        struct nix_cqe_tx_s *cqe;
        int processed_cqe = 0;
 
-       while (likely(processed_cqe < budget)) {
+       if (cq->pend_cqe >= budget)
+               goto process_cqe;
+
+       if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+               return 0;
+
+process_cqe:
+       while (likely(processed_cqe < budget) && cq->pend_cqe) {
                cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
                if (unlikely(!cqe)) {
                        if (!processed_cqe)
@@ -380,6 +420,7 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
 
                cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
                processed_cqe++;
+               cq->pend_cqe--;
        }
 
        /* Free CQEs to HW */
@@ -936,10 +977,16 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
        int processed_cqe = 0;
        u64 iova, pa;
 
-       while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) {
-               if (!cqe->sg.subdc)
-                       continue;
+       if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+               return;
+
+       while (cq->pend_cqe) {
+               cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
                processed_cqe++;
+               cq->pend_cqe--;
+
+               if (!cqe)
+                       continue;
                if (cqe->sg.segs > 1) {
                        otx2_free_rcv_seg(pfvf, cqe, cq->cq_idx);
                        continue;
@@ -965,7 +1012,16 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 
        sq = &pfvf->qset.sq[cq->cint_idx];
 
-       while ((cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq))) {
+       if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+               return;
+
+       while (cq->pend_cqe) {
+               cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
+               processed_cqe++;
+               cq->pend_cqe--;
+
+               if (!cqe)
+                       continue;
                sg = &sq->sg[cqe->comp.sqe_id];
                skb = (struct sk_buff *)sg->skb;
                if (skb) {
@@ -973,7 +1029,6 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
                        dev_kfree_skb_any(skb);
                        sg->skb = (u64)NULL;
                }
-               processed_cqe++;
        }
 
        /* Free CQEs to HW */
index 3ff1ad79c0011deb5fda36e04c7a6128ec88e606..6a97631ff2269be672b1c385d9f8723e634df091 100644 (file)
@@ -56,6 +56,9 @@
  */
 #define CQ_QCOUNT_DEFAULT      1
 
+#define CQ_OP_STAT_OP_ERR       63
+#define CQ_OP_STAT_CQ_ERR       46
+
 struct queue_stats {
        u64     bytes;
        u64     pkts;
@@ -122,6 +125,8 @@ struct otx2_cq_queue {
        u16                     pool_ptrs;
        u32                     cqe_cnt;
        u32                     cq_head;
+       u32                     cq_tail;
+       u32                     pend_cqe;
        void                    *cqe_base;
        struct qmem             *cqe;
        struct otx2_pool        *rbpool;
index fa1d6af0164ee987bc52bd88c2c234ace7be3382..0f79fd7f81a1ce10a2688d982b8c99dbd970d9a2 100644 (file)
                         : [rf] "+r"(val)               \
                         : [rs] "r"(addr));             \
 })
+
+static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr)
+{
+       u64 result;
+
+       asm volatile (".cpu  generic+lse\n"
+                     "ldadda %x[i], %x[r], [%[b]]"
+                     : [r] "=r" (result), "+m" (*ptr)
+                     : [i] "r" (incr), [b] "r" (ptr)
+                     : "memory");
+       return result;
+}
+
 #else
 #define otx2_lmt_flush(ioaddr)          ({ 0; })
 #define cn10k_lmt_flush(val, addr)     ({ addr = val; })
+#define otx2_atomic64_fetch_add(incr, ptr)     ({ incr; })
 #endif
 
 #endif /* __SOC_OTX2_ASM_H */