*/
#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
+ [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
+ [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+ [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};
#define SAMPLE_OA_REPORT (1<<0)
kfree(oa_bo);
}
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+ return &stream->engine->oa_group->regs;
+}
+
static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}
i915_reg_t oaheadptr;
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+ __oa_regs(stream)->oa_head_ptr :
+ GEN8_OAHEADPTR;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
return -EIO;
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+ __oa_regs(stream)->oa_status :
+ GEN8_OASTATUS;
oastatus = intel_uncore_read(uncore, oastatus_reg);
return engine->oa_group;
}
+static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+ return engine->oa_group && engine->oa_group->type == type;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
- intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = gtt_offset;
* to enable proper functionality of the overflow
* bit."
*/
- intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
return err;
}
-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+ struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, num_regs);
+ err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
},
};
+ if (stream->engine->class != RENDER_CLASS)
+ return 0;
+
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
}
- intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
*/
oag_report_ctx_switches(stream));
- intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format->format;
+ const struct i915_perf_regs *regs;
+ u32 val;
/*
* If we don't want OA reports from the OA buffer, then we don't even
gen12_init_oa_buffer(stream);
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
- (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
- GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+ regs = __oa_regs(stream);
+ val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+ intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}
/**
{
struct intel_uncore *uncore = stream->uncore;
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
- GEN12_OAG_OACONTROL,
+ __oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
struct perf_open_properties *props)
{
struct drm_i915_gem_context_param_sseu user_sseu;
+ const struct i915_oa_format *f;
u64 __user *uprop = uprops;
bool config_instance = false;
bool config_class = false;
return -EINVAL;
}
+ i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
+ f = &perf->oa_formats[i];
+ if (!engine_supports_oa_format(props->engine, f->type)) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid OA format %d for class %d\n",
+ f->type, props->engine->class);
+ return -EINVAL;
+ }
+
if (config_sseu) {
ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
if (ret) {
{}
};
+static const struct i915_range mtl_oam_b_counters[] = {
+ { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
+ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
+ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
+ { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
+ {}
+};
+
static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
{ .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
+ {}
};
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}
+static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ if (HAS_OAM(perf->i915) &&
+ GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return reg_in_range_table(addr, mtl_oam_b_counters);
+
+ return false;
+}
+
static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, xehp_oa_b_counters) ||
- reg_in_range_table(addr, gen12_oa_b_counters);
+ reg_in_range_table(addr, gen12_oa_b_counters) ||
+ mtl_is_valid_oam_b_counter_addr(perf, addr);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
return 1;
}
+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
+ /*
+ * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+ * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+ */
+ drm_WARN_ON(&engine->i915->drm,
+ engine->gt->type != GT_MEDIA);
+
+ return PERF_GROUP_OAM_SAMEDIA_0;
+ }
+
+ return PERF_GROUP_INVALID;
+}
+
static u32 __oa_engine_group(struct intel_engine_cs *engine)
{
- if (engine->class == RENDER_CLASS)
+ switch (engine->class) {
+ case RENDER_CLASS:
return PERF_GROUP_OAG;
- else
+
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return __oam_engine_group(engine);
+
+ default:
return PERF_GROUP_INVALID;
+ }
+}
+
+static struct i915_perf_regs __oam_regs(u32 base)
+{
+ return (struct i915_perf_regs) {
+ base,
+ GEN12_OAM_HEAD_POINTER(base),
+ GEN12_OAM_TAIL_POINTER(base),
+ GEN12_OAM_BUFFER(base),
+ GEN12_OAM_CONTEXT_CONTROL(base),
+ GEN12_OAM_CONTROL(base),
+ GEN12_OAM_DEBUG(base),
+ GEN12_OAM_STATUS(base),
+ GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+ return (struct i915_perf_regs) {
+ 0,
+ GEN12_OAG_OAHEADPTR,
+ GEN12_OAG_OATAILPTR,
+ GEN12_OAG_OABUFFER,
+ GEN12_OAG_OAGLBCTXCTRL,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OA_DEBUG,
+ GEN12_OAG_OASTATUS,
+ GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+ int i, num_groups = gt->perf.num_perf_groups;
+
+ for (i = 0; i < num_groups; i++) {
+ struct i915_perf_group *g = >->perf.group[i];
+
+ /* Fused off engines can result in a group with num_engines == 0 */
+ if (g->num_engines == 0)
+ continue;
+
+ if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
+ g->regs = __oag_regs();
+ g->type = TYPE_OAG;
+ } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ g->regs = __oam_regs(mtl_oa_base[i]);
+ g->type = TYPE_OAM;
+ }
+ }
}
static int oa_init_gt(struct intel_gt *gt)
gt->perf.num_perf_groups = num_groups;
gt->perf.group = g;
+ oa_init_groups(gt);
+
return 0;
}
break;
case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
case INTEL_METEORLAKE:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
break;
default:
*
* 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
* DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
+ *
+ * 7: Add support for video decode and enhancement classes.
*/
- return 6;
+ return 7;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
#define GEN12_SQCNT1_OABPC REG_BIT(29)
+/* Gen12 OAM unit */
+#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0)
+#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4)
+#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_BUFFER_OFFSET (0x1a8)
+#define GEN12_OAM_BUFFER_SIZE_MASK (0x7)
+#define GEN12_OAM_BUFFER_SIZE_SHIFT (3)
+#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc)
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1)
+#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0)
+
+#define GEN12_OAM_CONTROL_OFFSET (0x194)
+#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
+#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0)
+
+#define GEN12_OAM_DEBUG_OFFSET (0x198)
+#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12)
+#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
+#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
+#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
+#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
+
+#define GEN12_OAM_STATUS_OFFSET (0x19c)
+#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
+#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1)
+#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0)
+
+#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0)
+
+#define GEN12_OAM_MMIO_TRG(base) \
+ _MMIO((base) + GEN12_OAM_MMIO_TRG_OFFSET)
+
+#define GEN12_OAM_HEAD_POINTER(base) \
+ _MMIO((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
+#define GEN12_OAM_TAIL_POINTER(base) \
+ _MMIO((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
+#define GEN12_OAM_BUFFER(base) \
+ _MMIO((base) + GEN12_OAM_BUFFER_OFFSET)
+#define GEN12_OAM_CONTEXT_CONTROL(base) \
+ _MMIO((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
+#define GEN12_OAM_CONTROL(base) \
+ _MMIO((base) + GEN12_OAM_CONTROL_OFFSET)
+#define GEN12_OAM_DEBUG(base) \
+ _MMIO((base) + GEN12_OAM_DEBUG_OFFSET)
+#define GEN12_OAM_STATUS(base) \
+ _MMIO((base) + GEN12_OAM_STATUS_OFFSET)
+
+#define GEN12_OAM_CEC0_0_OFFSET (0x40)
+#define GEN12_OAM_CEC7_1_OFFSET (0x7c)
+#define GEN12_OAM_CEC0_0(base) \
+ _MMIO((base) + GEN12_OAM_CEC0_0_OFFSET)
+#define GEN12_OAM_CEC7_1(base) \
+ _MMIO((base) + GEN12_OAM_CEC7_1_OFFSET)
+
+#define GEN12_OAM_STARTTRIG1_OFFSET (0x00)
+#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c)
+#define GEN12_OAM_STARTTRIG1(base) \
+ _MMIO((base) + GEN12_OAM_STARTTRIG1_OFFSET)
+#define GEN12_OAM_STARTTRIG8(base) \
+ _MMIO((base) + GEN12_OAM_STARTTRIG8_OFFSET)
+
+#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20)
+#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c)
+#define GEN12_OAM_REPORTTRIG1(base) \
+ _MMIO((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
+#define GEN12_OAM_REPORTTRIG8(base) \
+ _MMIO((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
+
+#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84)
+#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
+ _MMIO((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
+
#endif /* __INTEL_PERF_OA_REGS__ */