Merge drm/drm-next into drm-intel-gt-next
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Mon, 5 Jun 2023 10:10:23 +0000 (11:10 +0100)
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>
Mon, 5 Jun 2023 10:10:23 +0000 (11:10 +0100)
For conflict avoidance we need the following commit:

  c9a9f18d3ad8 drm/i915/huc: use const struct bus_type pointers

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
16 files changed:
drivers/gpu/drm/i915/Kconfig.debug
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
drivers/gpu/drm/i915/gt/selftest_execlists.c
drivers/gpu/drm/i915/gt/selftest_tlb.c
drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c

index 47e8453..2d21930 100644 (file)
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
 config DRM_I915_DEBUG_GUC
        bool "Enable additional driver debugging for GuC"
        depends on DRM_I915
+       select STACKDEPOT
        default n
        help
          Choose this option to turn on extra driver debugging that may affect
index 5402a7b..9a9ff84 100644 (file)
@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
        RCU_INIT_POINTER(ce->gem_context, ctx);
 
        GEM_BUG_ON(intel_context_is_pinned(ce));
-       ce->ring_size = SZ_16K;
+
+       if (ce->engine->class == COMPUTE_CLASS)
+               ce->ring_size = SZ_512K;
+       else
+               ce->ring_size = SZ_16K;
 
        i915_vm_put(ce->vm);
        ce->vm = i915_gem_context_get_eb_vm(ctx);
index e1c76e5..23857cc 100644 (file)
@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
        return cs;
 }
 
+static int mtl_dummy_pipe_control(struct i915_request *rq)
+{
+       /* Wa_14016712196 */
+       if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
+           IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+               u32 *cs;
+
+               /* dummy PIPE_CONTROL + depth flush */
+               cs = intel_ring_begin(rq, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+               cs = gen12_emit_pipe_control(cs,
+                                            0,
+                                            PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+                                            LRC_PPHWSP_SCRATCH_ADDR);
+               intel_ring_advance(rq, cs);
+       }
+
+       return 0;
+}
+
 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 {
        struct intel_engine_cs *engine = rq->engine;
 
        if (mode & EMIT_FLUSH) {
                u32 flags = 0;
+               int err;
                u32 *cs;
 
+               err = mtl_dummy_pipe_control(rq);
+               if (err)
+                       return err;
+
                flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
                flags |= PIPE_CONTROL_FLUSH_L3;
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
        if (mode & EMIT_INVALIDATE) {
                u32 flags = 0;
                u32 *cs, count;
+               int err;
+
+               err = mtl_dummy_pipe_control(rq);
+               if (err)
+                       return err;
 
                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
                flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
                     PIPE_CONTROL_DC_FLUSH_ENABLE |
                     PIPE_CONTROL_FLUSH_ENABLE);
 
+       /* Wa_14016712196 */
+       if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+           IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+               /* dummy PIPE_CONTROL + depth flush */
+               cs = gen12_emit_pipe_control(cs, 0,
+                                            PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+
        if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
                /* Wa_1409600907 */
                flags |= PIPE_CONTROL_DEPTH_STALL;
index 2a7942f..1221977 100644 (file)
@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 /*
  * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
- * so these PTE encode functions are left with using cache_level.
+ * so the switch-case statements in these PTE encode functions are still valid.
  * See translation table LEGACY_CACHELEVEL.
  */
 static u64 snb_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
+                         unsigned int pat_index,
                          u32 flags)
 {
        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 
-       switch (level) {
+       switch (pat_index) {
        case I915_CACHE_L3_LLC:
        case I915_CACHE_LLC:
                pte |= GEN6_PTE_CACHE_LLC;
@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
                pte |= GEN6_PTE_UNCACHED;
                break;
        default:
-               MISSING_CASE(level);
+               MISSING_CASE(pat_index);
        }
 
        return pte;
 }
 
 static u64 ivb_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
+                         unsigned int pat_index,
                          u32 flags)
 {
        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 
-       switch (level) {
+       switch (pat_index) {
        case I915_CACHE_L3_LLC:
                pte |= GEN7_PTE_CACHE_L3_LLC;
                break;
@@ -1056,14 +1056,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
                pte |= GEN6_PTE_UNCACHED;
                break;
        default:
-               MISSING_CASE(level);
+               MISSING_CASE(pat_index);
        }
 
        return pte;
 }
 
 static u64 byt_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
+                         unsigned int pat_index,
                          u32 flags)
 {
        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
@@ -1071,31 +1071,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
        if (!(flags & PTE_READ_ONLY))
                pte |= BYT_PTE_WRITEABLE;
 
-       if (level != I915_CACHE_NONE)
+       if (pat_index != I915_CACHE_NONE)
                pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 
        return pte;
 }
 
 static u64 hsw_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
+                         unsigned int pat_index,
                          u32 flags)
 {
        gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 
-       if (level != I915_CACHE_NONE)
+       if (pat_index != I915_CACHE_NONE)
                pte |= HSW_WB_LLC_AGE3;
 
        return pte;
 }
 
 static u64 iris_pte_encode(dma_addr_t addr,
-                          enum i915_cache_level level,
+                          unsigned int pat_index,
                           u32 flags)
 {
        gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 
-       switch (level) {
+       switch (pat_index) {
        case I915_CACHE_NONE:
                break;
        case I915_CACHE_WT:
index d6a74ae..866c416 100644 (file)
 static void gmch_ggtt_insert_page(struct i915_address_space *vm,
                                  dma_addr_t addr,
                                  u64 offset,
-                                 enum i915_cache_level cache_level,
+                                 unsigned int pat_index,
                                  u32 unused)
 {
-       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+       unsigned int flags = (pat_index == I915_CACHE_NONE) ?
                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
        intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
@@ -29,10 +29,10 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm,
 
 static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
                                     struct i915_vma_resource *vma_res,
-                                    enum i915_cache_level cache_level,
+                                    unsigned int pat_index,
                                     u32 unused)
 {
-       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+       unsigned int flags = (pat_index == I915_CACHE_NONE) ?
                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
        intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
index 736b89a..4202df5 100644 (file)
@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
        enum intel_engine_id id;
-       int err = -ENOMEM;
        u32 *map;
+       int err;
 
        /*
         * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
         */
 
        ctx_hi = kernel_context(gt->i915, NULL);
-       if (!ctx_hi)
-               return -ENOMEM;
+       if (IS_ERR(ctx_hi))
+               return PTR_ERR(ctx_hi);
+
        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
 
        ctx_lo = kernel_context(gt->i915, NULL);
-       if (!ctx_lo)
+       if (IS_ERR(ctx_lo)) {
+               err = PTR_ERR(ctx_lo);
                goto err_ctx_hi;
+       }
+
        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
 
        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
index 4493c85..3bd6b54 100644 (file)
@@ -190,11 +190,18 @@ out:
 
 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
 {
+       struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
+       resource_size_t size = SZ_1G;
+
        /*
         * Allocation of largest possible page size allows to test all types
-        * of pages.
+        * of pages. To succeed with both allocations, especially in case of Small
+        * BAR, try to allocate no more than quarter of mappable memory.
         */
-       return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
+       if (mr && size > mr->io_size / 4)
+               size = mr->io_size / 4;
+
+       return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
 }
 
 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
index 28b8387..f7d70db 100644 (file)
@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
  * - **flags**, holds various bits to control message handling
  */
 
-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0]   message len (in dwords)
- * bit[7..5]   reserved
- * bit[8]      response (G2H only)
- * bit[8]      write fence to desc (H2G only)
- * bit[9]      write status to H2G buff (H2G only)
- * bit[10]     send status back via G2H (H2G only)
- * bit[15..11] reserved
- * bit[31..16] action code
- */
-#define GUC_CT_MSG_LEN_SHIFT                   0
-#define GUC_CT_MSG_LEN_MASK                    0x1F
-#define GUC_CT_MSG_IS_RESPONSE                 (1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC         (1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF                (1 << 9)
-#define GUC_CT_MSG_SEND_STATUS                 (1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT                        16
-#define GUC_CT_MSG_ACTION_MASK                 0xFFFF
-
 #endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
index 7d5ba4d..98eb4f4 100644 (file)
@@ -24,6 +24,7 @@
  *  |   | 30:28 | **TYPE** - message type                                      |
  *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
  *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_FAST_REQUEST` = 2                         |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
  *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
@@ -46,6 +47,7 @@
 #define GUC_HXG_MSG_0_TYPE                     (0x7 << 28)
 #define   GUC_HXG_TYPE_REQUEST                 0u
 #define   GUC_HXG_TYPE_EVENT                   1u
+#define   GUC_HXG_TYPE_FAST_REQUEST            2u
 #define   GUC_HXG_TYPE_NO_RESPONSE_BUSY                3u
 #define   GUC_HXG_TYPE_NO_RESPONSE_RETRY       5u
 #define   GUC_HXG_TYPE_RESPONSE_FAILURE                6u
 #define GUC_HXG_REQUEST_MSG_n_DATAn            GUC_HXG_MSG_n_PAYLOAD
 
 /**
+ * DOC: HXG Fast Request
+ *
+ * The `HXG Request`_ message should be used to initiate asynchronous activity
+ * for which confirmation or return data is not expected.
+ *
+ * If confirmation is required then `HXG Request`_ shall be used instead.
+ *
+ * The recipient of this message may only use `HXG Failure`_ message if it was
+ * unable to accept this request (like invalid data).
+ *
+ * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN - see `HXG Message`_                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 - see `HXG Request`_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION - see `HXG Request`_                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | DATAn - see `HXG Request`_                                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+/**
  * DOC: HXG Event
  *
  * The `HXG Event`_ message should be used to initiate asynchronous activity
index ebee0b5..5f138de 100644 (file)
@@ -5,8 +5,8 @@
 
 #include <linux/component.h>
 
-#include "drm/i915_component.h"
-#include "drm/i915_gsc_proxy_mei_interface.h"
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
 
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_print.h"
index a22e33f..f28a3a8 100644 (file)
@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
        }
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+       unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+       unsigned long entries[SZ_32];
+       unsigned int n;
+
+       n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+       /* May be called under spinlock, so avoid sleeping */
+       ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+       ct->requests.lost_and_found[lost].fence = fence;
+       ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
 static u32 ct_get_next_fence(struct intel_guc_ct *ct)
 {
        /* For now it's trivial */
@@ -426,11 +444,11 @@ static int ct_write(struct intel_guc_ct *ct,
                 FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
                 FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
 
-       type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT :
+       type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
                GUC_HXG_TYPE_REQUEST;
        hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
-               FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
-                          GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+               FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
+                          GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
 
        CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
                 tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
        }
        GEM_BUG_ON(tail > size);
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+       ct_track_lost_and_found(ct, fence,
+                               FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
        /*
         * make sure H2G buffer update and LRC tail update (if this triggering a
         * submission) are visible before updating the descriptor tail
@@ -675,7 +698,7 @@ static int ct_send(struct intel_guc_ct *ct,
 
        GEM_BUG_ON(!ct->enabled);
        GEM_BUG_ON(!len);
-       GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+       GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
        GEM_BUG_ON(!response_buf && response_buf_size);
        might_sleep();
 
@@ -953,6 +976,43 @@ corrupted:
        return -EPIPE;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+       unsigned int n;
+       char *buf = NULL;
+       bool found = false;
+
+       lockdep_assert_held(&ct->requests.lock);
+
+       for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+               if (ct->requests.lost_and_found[n].fence != fence)
+                       continue;
+               found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+               buf = kmalloc(SZ_4K, GFP_NOWAIT);
+               if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+                                              buf, SZ_4K, 0)) {
+                       CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+                                fence, ct->requests.lost_and_found[n].action, buf);
+                       break;
+               }
+#endif
+               CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+                        fence, ct->requests.lost_and_found[n].action);
+               break;
+       }
+       kfree(buf);
+       return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+       return false;
+}
+#endif
+
 static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
 {
        u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -994,12 +1054,13 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
                break;
        }
        if (!found) {
-               CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
-               CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
-                        ct->requests.last_fence);
-               list_for_each_entry(req, &ct->requests.pending, link)
-                       CT_ERROR(ct, "request %u awaits response\n",
-                                req->fence);
+               CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
+                        len, hxg[0], fence, ct->requests.last_fence);
+               if (!ct_check_lost_and_found(ct, fence)) {
+                       list_for_each_entry(req, &ct->requests.pending, link)
+                               CT_ERROR(ct, "request %u awaits response\n",
+                                        req->fence);
+               }
                err = -ENOKEY;
        }
        spin_unlock_irqrestore(&ct->requests.lock, flags);
index 818415b..58e4290 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 #include <linux/workqueue.h>
 #include <linux/ktime.h>
 #include <linux/wait.h>
@@ -81,6 +82,16 @@ struct intel_guc_ct {
 
                struct list_head incoming; /* incoming requests */
                struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+               struct {
+                       u16 fence;
+                       u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+                       depot_stack_handle_t stack;
+#endif
+               } lost_and_found[SZ_16];
+#endif
        } requests;
 
        /** @stall_time: time of first time a CTB submission is stalled */
index 19d5652..5828415 100644 (file)
@@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                        stream->oa_buffer.last_ctx_id = ctx_id;
                }
 
-               /*
-                * Clear out the report id and timestamp as a means to detect unlanded
-                * reports.
-                */
-               oa_report_id_clear(stream, report32);
-               oa_timestamp_clear(stream, report32);
+               if (is_power_of_2(report_size)) {
+                       /*
+                        * Clear out the report id and timestamp as a means
+                        * to detect unlanded reports.
+                        */
+                       oa_report_id_clear(stream, report32);
+                       oa_timestamp_clear(stream, report32);
+               } else {
+                       /* Zero out the entire report */
+                       memset(report32, 0, report_size);
+               }
        }
 
        if (start_offset != *offset) {
index a814583..f96fe92 100644 (file)
@@ -139,7 +139,7 @@ static u32 frequency_enabled_mask(void)
        return mask;
 }
 
-static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
        struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
        u32 enable;
@@ -158,16 +158,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
        enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
 
        /*
-        * When the GPU is idle per-engine counters do not need to be
-        * running so clear those bits out.
-        */
-       if (!gpu_active)
-               enable &= ~ENGINE_SAMPLE_MASK;
-       /*
         * Also there is software busyness tracking available we do not
         * need the timer for I915_SAMPLE_BUSY counter.
         */
-       else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+       if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
                enable &= ~BIT(I915_SAMPLE_BUSY);
 
        /*
@@ -197,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
        return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-       unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-       GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-       return idx;
-}
-
 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
 {
-       return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+       return pmu->sample[gt_id][sample].cur;
 }
 
 static void
 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
 {
-       pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+       pmu->sample[gt_id][sample].cur = val;
 }
 
 static void
 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
 {
-       pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+       pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
 }
 
 static u64 get_rc6(struct intel_gt *gt)
@@ -295,7 +279,7 @@ static void park_rc6(struct intel_gt *gt)
 
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
-       if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+       if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
                pmu->timer_enabled = true;
                pmu->timer_last = ktime_get();
                hrtimer_start_range_ns(&pmu->timer,
@@ -321,7 +305,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
         */
        pmu->unparked &= ~BIT(gt->info.id);
        if (pmu->unparked == 0)
-               pmu->timer_enabled = pmu_needs_timer(pmu, false);
+               pmu->timer_enabled = false;
 
        spin_unlock_irq(&pmu->lock);
 }
@@ -827,7 +811,7 @@ static void i915_pmu_disable(struct perf_event *event)
         */
        if (--pmu->enable_count[bit] == 0) {
                pmu->enable &= ~BIT(bit);
-               pmu->timer_enabled &= pmu_needs_timer(pmu, true);
+               pmu->timer_enabled &= pmu_needs_timer(pmu);
        }
 
        spin_unlock_irqrestore(&pmu->lock, flags);
index 33d80fb..d20592e 100644 (file)
@@ -127,7 +127,7 @@ struct i915_pmu {
         * Only global counters are held here, while the per-engine ones are in
         * struct intel_engine_cs.
         */
-       struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+       struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
        /**
         * @sleep_last: Last time GT parked for RC6 estimation.
         */
index 8dc41de..a217821 100644 (file)
@@ -143,7 +143,7 @@ gsccs_send_message(struct intel_pxp *pxp,
 
        reply_size = header->message_size - sizeof(*header);
        if (reply_size > msg_out_size_max) {
-               drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
+               drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%zu)\n",
                         reply_size, msg_out_size_max);
                reply_size = msg_out_size_max;
        }