IB/hif1: Remove static tracing from SDMA hot path
authorMichael J. Ruhl <michael.j.ruhl@intel.com>
Mon, 28 Aug 2017 18:23:27 +0000 (11:23 -0700)
committerDoug Ledford <dledford@redhat.com>
Mon, 28 Aug 2017 23:12:27 +0000 (19:12 -0400)
The hfi1_cdbg() macro can be instantiated in the hot path even when it
is not in use.  This shows up on perf profiles.

Rework the macros (for SDMA and MMU), to use the trace interface directly
to eliminate this performance hit.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/mmu_rb.c
drivers/infiniband/hw/hfi1/trace.h
drivers/infiniband/hw/hfi1/trace_mmu.h [new file with mode: 0644]
drivers/infiniband/hw/hfi1/trace_tx.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hfi1/user_sdma.h

index d893582..2bc8926 100644 (file)
@@ -430,8 +430,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
        if (!iter_is_iovec(from) || !dim)
                return -EINVAL;
 
-       hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
-                 fd->uctxt->ctxt, fd->subctxt, dim);
+       trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
 
        if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
                return -ENOSPC;
index d41fd87..13dcef0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -172,9 +172,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
        unsigned long flags;
        int ret = 0;
 
+       trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
        spin_lock_irqsave(&handler->lock, flags);
-       hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
-                 mnode->len);
        node = __mmu_rb_search(handler, mnode->addr, mnode->len);
        if (node) {
                ret = -EINVAL;
@@ -200,7 +199,7 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
 {
        struct mmu_rb_node *node = NULL;
 
-       hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+       trace_hfi1_mmu_rb_search(addr, len);
        if (!handler->ops->filter) {
                node = __mmu_int_rb_iter_first(&handler->root, addr,
                                               (addr + len) - 1);
@@ -281,8 +280,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
        unsigned long flags;
 
        /* Validity of handler and node pointers has been checked by caller. */
-       hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
-                 node->len);
+       trace_hfi1_mmu_rb_remove(node->addr, node->len);
        spin_lock_irqsave(&handler->lock, flags);
        __mmu_int_rb_remove(node, &handler->root);
        list_del(&node->list); /* remove from LRU list */
@@ -321,8 +319,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
             node; node = ptr) {
                /* Guard against node removal. */
                ptr = __mmu_int_rb_iter_next(node, start, end - 1);
-               hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
-                         node->addr, node->len);
+               trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
                if (handler->ops->invalidate(handler->ops_arg, node)) {
                        __mmu_int_rb_remove(node, root);
                        /* move from LRU list to delete list */
index 92dc88f..af50c07 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -51,3 +51,4 @@
 #include "trace_rc.h"
 #include "trace_rx.h"
 #include "trace_tx.h"
+#include "trace_mmu.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
new file mode 100644 (file)
index 0000000..3b7abbc
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_MMU_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_mmu
+
+DECLARE_EVENT_CLASS(hfi1_mmu_rb_template,
+                   TP_PROTO(unsigned long addr, unsigned long len),
+                   TP_ARGS(addr, len),
+                   TP_STRUCT__entry(__field(unsigned long, addr)
+                                    __field(unsigned long, len)
+                           ),
+                   TP_fast_assign(__entry->addr = addr;
+                                  __entry->len = len;
+                           ),
+                   TP_printk("MMU node addr 0x%lx, len %lu",
+                             __entry->addr,
+                             __entry->len
+                           )
+);
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert,
+            TP_PROTO(unsigned long addr, unsigned long len),
+            TP_ARGS(addr, len));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
+            TP_PROTO(unsigned long addr, unsigned long len),
+            TP_ARGS(addr, len));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
+            TP_PROTO(unsigned long addr, unsigned long len),
+            TP_ARGS(addr, len));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
+            TP_PROTO(unsigned long addr, unsigned long len),
+            TP_ARGS(addr, len));
+
+#endif /* __HFI1_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mmu
+#include <trace/define_trace.h>
index c59809a..c57af3b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -198,6 +198,140 @@ TRACE_EVENT(hfi1_sdma_engine_select,
                      )
 );
 
+TRACE_EVENT(hfi1_sdma_user_free_queues,
+           TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt),
+           TP_ARGS(dd, ctxt, subctxt),
+           TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+                            __field(u16, ctxt)
+                            __field(u16, subctxt)
+                            ),
+           TP_fast_assign(DD_DEV_ASSIGN(dd);
+                          __entry->ctxt = ctxt;
+                          __entry->subctxt = subctxt;
+                          ),
+           TP_printk("[%s] SDMA [%u:%u] Freeing user SDMA queues",
+                     __get_str(dev),
+                     __entry->ctxt,
+                     __entry->subctxt
+                     )
+);
+
+TRACE_EVENT(hfi1_sdma_user_process_request,
+           TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                    u16 comp_idx),
+           TP_ARGS(dd, ctxt, subctxt, comp_idx),
+           TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+                            __field(u16, ctxt)
+                            __field(u16, subctxt)
+                            __field(u16, comp_idx)
+                            ),
+           TP_fast_assign(DD_DEV_ASSIGN(dd);
+                          __entry->ctxt = ctxt;
+                          __entry->subctxt = subctxt;
+                          __entry->comp_idx = comp_idx;
+                          ),
+           TP_printk("[%s] SDMA [%u:%u] Using req/comp entry: %u",
+                     __get_str(dev),
+                     __entry->ctxt,
+                     __entry->subctxt,
+                     __entry->comp_idx
+                     )
+);
+
+DECLARE_EVENT_CLASS(
+       hfi1_sdma_value_template,
+       TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, u16 comp_idx,
+                u32 value),
+       TP_ARGS(dd, ctxt, subctxt, comp_idx, value),
+       TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+                        __field(u16, ctxt)
+                        __field(u16, subctxt)
+                        __field(u16, comp_idx)
+                        __field(u32, value)
+               ),
+       TP_fast_assign(DD_DEV_ASSIGN(dd);
+                      __entry->ctxt = ctxt;
+                      __entry->subctxt = subctxt;
+                      __entry->comp_idx = comp_idx;
+                      __entry->value = value;
+               ),
+       TP_printk("[%s] SDMA [%u:%u:%u] value: %u",
+                 __get_str(dev),
+                 __entry->ctxt,
+                 __entry->subctxt,
+                 __entry->comp_idx,
+                 __entry->value
+               )
+);
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_initial_tidoffset,
+            TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                     u16 comp_idx, u32 tidoffset),
+            TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset));
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_data_length,
+            TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                     u16 comp_idx, u32 data_len),
+            TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_compute_length,
+            TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                     u16 comp_idx, u32 data_len),
+            TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
+
+TRACE_EVENT(hfi1_sdma_user_tid_info,
+           TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                    u16 comp_idx, u32 tidoffset, u32 units, u8 shift),
+           TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset, units, shift),
+           TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+                            __field(u16, ctxt)
+                            __field(u16, subctxt)
+                            __field(u16, comp_idx)
+                            __field(u32, tidoffset)
+                            __field(u32, units)
+                            __field(u8, shift)
+                            ),
+           TP_fast_assign(DD_DEV_ASSIGN(dd);
+                          __entry->ctxt = ctxt;
+                          __entry->subctxt = subctxt;
+                          __entry->comp_idx = comp_idx;
+                          __entry->tidoffset = tidoffset;
+                          __entry->units = units;
+                          __entry->shift = shift;
+                          ),
+           TP_printk("[%s] SDMA [%u:%u:%u] TID offset %ubytes %uunits om %u",
+                     __get_str(dev),
+                     __entry->ctxt,
+                     __entry->subctxt,
+                     __entry->comp_idx,
+                     __entry->tidoffset,
+                     __entry->units,
+                     __entry->shift
+                     )
+);
+
+TRACE_EVENT(hfi1_sdma_request,
+           TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+                    unsigned long dim),
+           TP_ARGS(dd, ctxt, subctxt, dim),
+           TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+                            __field(u16, ctxt)
+                            __field(u16, subctxt)
+                            __field(unsigned long, dim)
+                            ),
+           TP_fast_assign(DD_DEV_ASSIGN(dd);
+                          __entry->ctxt = ctxt;
+                          __entry->subctxt = subctxt;
+                          __entry->dim = dim;
+                          ),
+           TP_printk("[%s] SDMA from %u:%u (%lu)",
+                     __get_str(dev),
+                     __entry->ctxt,
+                     __entry->subctxt,
+                     __entry->dim
+                     )
+);
+
 DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
                    TP_PROTO(struct sdma_engine *sde, u64 status),
                    TP_ARGS(sde, status),
index dacb0fc..c0c0e04 100644 (file)
@@ -266,8 +266,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
 {
        struct hfi1_user_sdma_pkt_q *pq;
 
-       hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
-                 uctxt->ctxt, fd->subctxt);
+       trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
+
        pq = fd->pq;
        if (pq) {
                if (pq->handler)
@@ -349,7 +349,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 
        trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
                                     (u16 *)&info);
-
        if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
                hfi1_cdbg(SDMA,
                          "[%u:%u:%u:%u] Invalid comp index",
@@ -386,8 +385,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
        /*
         * All safety checks have been done and this request has been claimed.
         */
-       hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
-                 uctxt->ctxt, fd->subctxt, info.comp_idx);
+       trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
+                                            info.comp_idx);
        req = pq->reqs + info.comp_idx;
        req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
        req->data_len  = 0;
@@ -487,7 +486,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
        req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
                (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
                 KDETH_OM_LARGE : KDETH_OM_SMALL);
-       SDMA_DBG(req, "Initial TID offset %u", req->tidoffset);
+       trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
+                                              info.comp_idx, req->tidoffset);
        idx++;
 
        /* Save all the IO vector structures */
@@ -505,8 +505,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
                }
                req->data_len += req->iovs[i].iov.iov_len;
        }
-       SDMA_DBG(req, "total data length %u", req->data_len);
-
+       trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
+                                        info.comp_idx, req->data_len);
        if (pcount > req->info.npkts)
                pcount = req->info.npkts;
        /*
@@ -661,7 +661,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
        } else {
                len = min(req->data_len - req->sent, (u32)req->info.fragsize);
        }
-       SDMA_DBG(req, "Data Length = %u", len);
+       trace_hfi1_sdma_user_compute_length(req->pq->dd,
+                                           req->pq->ctxt,
+                                           req->pq->subctxt,
+                                           req->info.comp_idx,
+                                           len);
        return len;
 }
 
@@ -1231,9 +1235,10 @@ static int set_txreq_header(struct user_sdma_request *req,
                 * Set the KDETH.OFFSET and KDETH.OM based on size of
                 * transfer.
                 */
-               SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
-                        req->tidoffset, req->tidoffset >> omfactor,
-                        omfactor != KDETH_OM_SMALL_SHIFT);
+               trace_hfi1_sdma_user_tid_info(
+                       pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
+                       req->tidoffset, req->tidoffset >> omfactor,
+                       omfactor != KDETH_OM_SMALL_SHIFT);
                KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
                          req->tidoffset >> omfactor);
                KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
@@ -1441,8 +1446,6 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
                                  u16 idx, enum hfi1_sdma_comp_state state,
                                  int ret)
 {
-       hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
-                 pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
        if (state == ERROR)
                cq->comps[idx].errcode = -ret;
        smp_wmb(); /* make sure errcode is visible first */
index 6c10484..9b8bb56 100644 (file)
        hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
                 (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
                 ##__VA_ARGS__)
-#define SDMA_Q_DBG(pq, fmt, ...)                        \
-       hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
-                (pq)->subctxt, ##__VA_ARGS__)
 
 extern uint extended_psn;