Merge drm/drm-next into drm-intel-gt-next
authorJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Tue, 12 Apr 2022 08:28:42 +0000 (11:28 +0300)
committerJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Tue, 12 Apr 2022 08:28:42 +0000 (11:28 +0300)
Pull in TTM changes needed for DG2 CCS enabling from Ram.

Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
126 files changed:
Documentation/gpu/drm-usage-stats.rst [new file with mode: 0644]
Documentation/gpu/i915.rst
Documentation/gpu/index.rst
drivers/gpu/drm/i915/Kconfig
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_dpt.c
drivers/gpu/drm/i915/display/intel_fb.c
drivers/gpu/drm/i915/display/intel_fb_pin.c
drivers/gpu/drm/i915/display/intel_fbdev.c
drivers/gpu/drm/i915/display/intel_plane_initial.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_context_types.h
drivers/gpu/drm/i915/gem/i915_gem_create.c
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_lmem.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_region.c
drivers/gpu/drm/i915/gem/i915_gem_region.h
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
drivers/gpu/drm/i915/gem/i915_gem_stolen.c
drivers/gpu/drm/i915/gem/i915_gem_stolen.h
drivers/gpu/drm/i915/gem/i915_gem_ttm.c
drivers/gpu/drm/i915/gem/i915_gem_ttm.h
drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
drivers/gpu/drm/i915/gem/selftests/mock_context.c
drivers/gpu/drm/i915/gt/gen6_ppgtt.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.h
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_context.h
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_engine_user.c
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_gpu_commands.h
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt.h
drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
drivers/gpu/drm/i915/gt/intel_gt_gmch.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_gmch.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_pm.c
drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_gt_sysfs.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_sysfs.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_types.h
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_gtt.h
drivers/gpu/drm/i915/gt/intel_hwconfig.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_lrc.h
drivers/gpu/drm/i915/gt/intel_migrate.c
drivers/gpu/drm/i915/gt/intel_ppgtt.c
drivers/gpu/drm/i915/gt/intel_rc6.c
drivers/gpu/drm/i915/gt/intel_region_lmem.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/intel_rps.h
drivers/gpu/drm/i915/gt/intel_rps_types.h
drivers/gpu/drm/i915/gt/intel_sseu.c
drivers/gpu/drm/i915/gt/intel_sseu.h
drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/selftest_execlists.c
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/gt/selftest_migrate.c
drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/uc/intel_guc.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/selftest_guc.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/i915/i915_drm_client.c [new file with mode: 0644]
drivers/gpu/drm/i915/i915_drm_client.h [new file with mode: 0644]
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_file_private.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_gpu_error.h
drivers/gpu/drm/i915/i915_params.c
drivers/gpu/drm/i915/i915_params.h
drivers/gpu/drm/i915/i915_query.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_reg_defs.h
drivers/gpu/drm/i915/i915_sysfs.c
drivers/gpu/drm/i915/i915_sysfs.h
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/i915_vma.h
drivers/gpu/drm/i915/i915_vma_resource.c
drivers/gpu/drm/i915/i915_vma_resource.h
drivers/gpu/drm/i915/i915_vma_types.h
drivers/gpu/drm/i915/intel_memory_region.c
drivers/gpu/drm/i915/intel_memory_region.h
drivers/gpu/drm/i915/intel_region_ttm.c
drivers/gpu/drm/i915/intel_region_ttm.h
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/i915/intel_uncore.h
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
drivers/gpu/drm/i915/selftests/mock_gem_device.c
drivers/gpu/drm/i915/selftests/mock_region.c
include/uapi/drm/i915_drm.h

diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst
new file mode 100644 (file)
index 0000000..6c9f166
--- /dev/null
@@ -0,0 +1,112 @@
+.. _drm-client-usage-stats:
+
+======================
+DRM client usage stats
+======================
+
+DRM drivers can choose to export partly standardised text output via the
+`fops->show_fdinfo()` as part of the driver specific file operations registered
+in the `struct drm_driver` object registered with the DRM core.
+
+One purpose of this output is to enable writing as generic as practicaly
+feasible `top(1)` like userspace monitoring tools.
+
+Given the differences between various DRM drivers the specification of the
+output is split between common and driver specific parts. Having said that,
+wherever possible effort should still be made to standardise as much as
+possible.
+
+File format specification
+=========================
+
+- File shall contain one key value pair per one line of text.
+- Colon character (`:`) must be used to delimit keys and values.
+- All keys shall be prefixed with `drm-`.
+- Whitespace between the delimiter and first non-whitespace character shall be
+  ignored when parsing.
+- Neither keys or values are allowed to contain whitespace characters.
+- Numerical key value pairs can end with optional unit string.
+- Data type of the value is fixed as defined in the specification.
+
+Key types
+---------
+
+1. Mandatory, fully standardised.
+2. Optional, fully standardised.
+3. Driver specific.
+
+Data types
+----------
+
+- <uint> - Unsigned integer without defining the maximum value.
+- <str> - String excluding any above defined reserved characters or whitespace.
+
+Mandatory fully standardised keys
+---------------------------------
+
+- drm-driver: <str>
+
+String shall contain the name this driver registered as via the respective
+`struct drm_driver` data structure.
+
+Optional fully standardised keys
+--------------------------------
+
+- drm-pdev: <aaaa:bb.cc.d>
+
+For PCI devices this should contain the PCI slot address of the device in
+question.
+
+- drm-client-id: <uint>
+
+Unique value relating to the open DRM file descriptor used to distinguish
+duplicated and shared file descriptors. Conceptually the value should map 1:1
+to the in kernel representation of `struct drm_file` instances.
+
+Uniqueness of the value shall be either globally unique, or unique within the
+scope of each device, in which case `drm-pdev` shall be present as well.
+
+Userspace should make sure to not double account any usage statistics by using
+the above described criteria in order to associate data to individual clients.
+
+- drm-engine-<str>: <uint> ns
+
+GPUs usually contain multiple execution engines. Each shall be given a stable
+and unique name (str), with possible values documented in the driver specific
+documentation.
+
+Value shall be in specified time units which the respective GPU engine spent
+busy executing workloads belonging to this client.
+
+Values are not required to be constantly monotonic if it makes the driver
+implementation easier, but are required to catch up with the previously reported
+larger value within a reasonable period. Upon observing a value lower than what
+was previously read, userspace is expected to stay with that larger previous
+value until a monotonic update is seen.
+
+- drm-engine-capacity-<str>: <uint>
+
+Engine identifier string must be the same as the one specified in the
+drm-engine-<str> tag and shall contain a greater than zero number in case the
+exported engine corresponds to a group of identical hardware engines.
+
+In the absence of this tag parser shall assume capacity of one. Zero capacity
+is not allowed.
+
+- drm-memory-<str>: <uint> [KiB|MiB]
+
+Each possible memory type which can be used to store buffer objects by the
+GPU in question shall be given a stable and unique name to be returned as the
+string here.
+
+Value shall reflect the amount of storage currently consumed by the buffer
+object belong to this client, in the respective memory region.
+
+Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB'
+indicating kibi- or mebi-bytes.
+
+===============================
+Driver specific implementations
+===============================
+
+:ref:`i915-usage-stats`
index bcaefc9..cfc64f5 100644 (file)
@@ -709,3 +709,31 @@ The style guide for ``i915_reg.h``.
 
 .. kernel-doc:: drivers/gpu/drm/i915/i915_reg.h
    :doc: The i915 register macro definition style guide
+
+.. _i915-usage-stats:
+
+i915 DRM client usage stats implementation
+==========================================
+
+The drm/i915 driver implements the DRM client usage stats specification as
+documented in :ref:`drm-client-usage-stats`.
+
+Example of the output showing the implemented key value pairs and entirety of
+the currently possible format options:
+
+::
+
+      pos:    0
+      flags:  0100002
+      mnt_id: 21
+      drm-driver: i915
+      drm-pdev:   0000:00:02.0
+      drm-client-id:      7
+      drm-engine-render:  9288864723 ns
+      drm-engine-copy:    2035071108 ns
+      drm-engine-video:   0 ns
+      drm-engine-capacity-video:   2
+      drm-engine-video-enhance:   0 ns
+
+Possible `drm-engine-` key names are: `render`, `copy`, `video` and
+`video-enhance`.
index b9c1214..b99dede 100644 (file)
@@ -10,6 +10,7 @@ Linux GPU Driver Developer's Guide
    drm-kms
    drm-kms-helpers
    drm-uapi
+   drm-usage-stats
    driver-uapi
    drm-client
    drivers
index 98c5450..b8d45d2 100644 (file)
@@ -4,7 +4,7 @@ config DRM_I915
        depends on DRM
        depends on X86 && PCI
        depends on !PREEMPT_RT
-       select INTEL_GTT
+       select INTEL_GTT if X86
        select INTERVAL_TREE
        # we need shmfs for the swappable backing store, and in particular
        # the shmem_readpage() which depends upon tmpfs
index 7df74a7..024b69b 100644 (file)
@@ -33,6 +33,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
 
 # core driver code
 i915-y += i915_driver.o \
+         i915_drm_client.o \
          i915_config.o \
          i915_getparam.o \
          i915_ioctl.o \
@@ -106,6 +107,8 @@ gt-y += \
        gt/intel_gt_pm_debugfs.o \
        gt/intel_gt_pm_irq.o \
        gt/intel_gt_requests.o \
+       gt/intel_gt_sysfs.o \
+       gt/intel_gt_sysfs_pm.o \
        gt/intel_gtt.o \
        gt/intel_llc.o \
        gt/intel_lrc.o \
@@ -125,6 +128,8 @@ gt-y += \
        gt/intel_workarounds.o \
        gt/shmem_utils.o \
        gt/sysfs_engines.o
+# x86 intel-gtt module support
+gt-$(CONFIG_X86) += gt/intel_gt_gmch.o
 # autogenerated null render state
 gt-y += \
        gt/gen6_renderstate.o \
@@ -185,9 +190,11 @@ i915-y += gt/uc/intel_uc.o \
          gt/uc/intel_uc_fw.o \
          gt/uc/intel_guc.o \
          gt/uc/intel_guc_ads.o \
+         gt/uc/intel_guc_capture.o \
          gt/uc/intel_guc_ct.o \
          gt/uc/intel_guc_debugfs.o \
          gt/uc/intel_guc_fw.o \
+         gt/uc/intel_guc_hwconfig.o \
          gt/uc/intel_guc_log.o \
          gt/uc/intel_guc_log_debugfs.o \
          gt/uc/intel_guc_rc.o \
index 05dd7db..3af4930 100644 (file)
@@ -300,5 +300,5 @@ void intel_dpt_destroy(struct i915_address_space *vm)
 {
        struct i915_dpt *dpt = i915_vm_to_dpt(vm);
 
-       i915_vm_close(&dpt->vm);
+       i915_vm_put(&dpt->vm);
 }
index 23cfe2e..421f723 100644 (file)
@@ -1981,7 +1981,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 
        /* object is backed with LMEM for discrete */
        i915 = to_i915(obj->base.dev);
-       if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) {
+       if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
                /* object is "remote", not in local memory */
                i915_gem_object_put(obj);
                return ERR_PTR(-EREMOTE);
index a307b49..bd6e7c9 100644 (file)
@@ -140,7 +140,7 @@ retry:
        if (!ret && phys_cursor)
                ret = i915_gem_object_attach_phys(obj, alignment);
        else if (!ret && HAS_LMEM(dev_priv))
-               ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM);
+               ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
        /* TODO: Do we need to sync when migration becomes async? */
        if (!ret)
                ret = i915_gem_object_pin_pages(obj);
index 2cd62a1..2213361 100644 (file)
@@ -279,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
                /* Our framebuffer is the entirety of fbdev's system memory */
                info->fix.smem_start =
                        (unsigned long)(ggtt->gmadr.start + vma->node.start);
-               info->fix.smem_len = vma->node.size;
+               info->fix.smem_len = vma->size;
        }
 
        vaddr = i915_vma_pin_iomap(vma);
@@ -290,7 +290,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
                goto out_unpin;
        }
        info->screen_base = vaddr;
-       info->screen_size = vma->node.size;
+       info->screen_size = vma->size;
 
        drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
 
index d7b1de4..41ff7bf 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright Â© 2021 Intel Corporation
  */
 
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
 #include "intel_atomic_plane.h"
 #include "intel_display.h"
@@ -46,16 +47,55 @@ static struct i915_vma *
 initial_plane_vma(struct drm_i915_private *i915,
                  struct intel_initial_plane_config *plane_config)
 {
-       struct intel_memory_region *mem = i915->mm.stolen_region;
+       struct intel_memory_region *mem;
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
+       resource_size_t phys_base;
        u32 base, size;
+       u64 pinctl;
 
-       if (!mem || plane_config->size == 0)
+       if (plane_config->size == 0)
+               return NULL;
+
+       base = round_down(plane_config->base, I915_GTT_MIN_ALIGNMENT);
+       if (IS_DGFX(i915)) {
+               gen8_pte_t __iomem *gte = to_gt(i915)->ggtt->gsm;
+               gen8_pte_t pte;
+
+               gte += base / I915_GTT_PAGE_SIZE;
+
+               pte = ioread64(gte);
+               if (!(pte & GEN12_GGTT_PTE_LM)) {
+                       drm_err(&i915->drm,
+                               "Initial plane programming missing PTE_LM bit\n");
+                       return NULL;
+               }
+
+               phys_base = pte & I915_GTT_PAGE_MASK;
+               mem = i915->mm.regions[INTEL_REGION_LMEM_0];
+
+               /*
+                * We don't currently expect this to ever be placed in the
+                * stolen portion.
+                */
+               if (phys_base >= resource_size(&mem->region)) {
+                       drm_err(&i915->drm,
+                               "Initial plane programming using invalid range, phys_base=%pa\n",
+                               &phys_base);
+                       return NULL;
+               }
+
+               drm_dbg(&i915->drm,
+                       "Using phys_base=%pa, based on initial plane programming\n",
+                       &phys_base);
+       } else {
+               phys_base = base;
+               mem = i915->mm.stolen_region;
+       }
+
+       if (!mem)
                return NULL;
 
-       base = round_down(plane_config->base,
-                         I915_GTT_MIN_ALIGNMENT);
        size = round_up(plane_config->base + plane_config->size,
                        mem->min_page_size);
        size -= base;
@@ -66,10 +106,11 @@ initial_plane_vma(struct drm_i915_private *i915,
         * features.
         */
        if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) &&
+           mem == i915->mm.stolen_region &&
            size * 2 > i915->stolen_usable_size)
                return NULL;
 
-       obj = i915_gem_object_create_stolen_for_preallocated(i915, base, size);
+       obj = i915_gem_object_create_region_at(mem, phys_base, size, 0);
        if (IS_ERR(obj))
                return NULL;
 
@@ -99,7 +140,10 @@ initial_plane_vma(struct drm_i915_private *i915,
        if (IS_ERR(vma))
                goto err_obj;
 
-       if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+       pinctl = PIN_GLOBAL | PIN_OFFSET_FIXED | base;
+       if (HAS_GMCH(i915))
+               pinctl |= PIN_MAPPABLE;
+       if (i915_vma_pin(vma, 0, 0, pinctl))
                goto err_obj;
 
        if (i915_gem_object_is_tiled(obj) &&
index 9ae294e..aa4f623 100644 (file)
@@ -1030,23 +1030,44 @@ static void free_engines_rcu(struct rcu_head *rcu)
        free_engines(engines);
 }
 
+static void accumulate_runtime(struct i915_drm_client *client,
+                              struct i915_gem_engines *engines)
+{
+       struct i915_gem_engines_iter it;
+       struct intel_context *ce;
+
+       if (!client)
+               return;
+
+       /* Transfer accumulated runtime to the parent GEM context. */
+       for_each_gem_engine(ce, engines, it) {
+               unsigned int class = ce->engine->uabi_class;
+
+               GEM_BUG_ON(class >= ARRAY_SIZE(client->past_runtime));
+               atomic64_add(intel_context_get_total_runtime_ns(ce),
+                            &client->past_runtime[class]);
+       }
+}
+
 static int
 engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
        struct i915_gem_engines *engines =
                container_of(fence, typeof(*engines), fence);
+       struct i915_gem_context *ctx = engines->ctx;
 
        switch (state) {
        case FENCE_COMPLETE:
                if (!list_empty(&engines->link)) {
-                       struct i915_gem_context *ctx = engines->ctx;
                        unsigned long flags;
 
                        spin_lock_irqsave(&ctx->stale.lock, flags);
                        list_del(&engines->link);
                        spin_unlock_irqrestore(&ctx->stale.lock, flags);
                }
-               i915_gem_context_put(engines->ctx);
+               accumulate_runtime(ctx->client, engines);
+               i915_gem_context_put(ctx);
+
                break;
 
        case FENCE_FREE:
@@ -1256,6 +1277,9 @@ static void i915_gem_context_release_work(struct work_struct *work)
        if (ctx->pxp_wakeref)
                intel_runtime_pm_put(&ctx->i915->runtime_pm, ctx->pxp_wakeref);
 
+       if (ctx->client)
+               i915_drm_client_put(ctx->client);
+
        mutex_destroy(&ctx->engines_mutex);
        mutex_destroy(&ctx->lut_mutex);
 
@@ -1466,7 +1490,7 @@ static void set_closed_name(struct i915_gem_context *ctx)
 
 static void context_close(struct i915_gem_context *ctx)
 {
-       struct i915_address_space *vm;
+       struct i915_drm_client *client;
 
        /* Flush any concurrent set_engines() */
        mutex_lock(&ctx->engines_mutex);
@@ -1479,19 +1503,6 @@ static void context_close(struct i915_gem_context *ctx)
 
        set_closed_name(ctx);
 
-       vm = ctx->vm;
-       if (vm) {
-               /* i915_vm_close drops the final reference, which is a bit too
-                * early and could result in surprises with concurrent
-                * operations racing with thist ctx close. Keep a full reference
-                * until the end.
-                */
-               i915_vm_get(vm);
-               i915_vm_close(vm);
-       }
-
-       ctx->file_priv = ERR_PTR(-EBADF);
-
        /*
         * The LUT uses the VMA as a backpointer to unref the object,
         * so we need to clear the LUT before we close all the VMA (inside
@@ -1499,10 +1510,19 @@ static void context_close(struct i915_gem_context *ctx)
         */
        lut_close(ctx);
 
+       ctx->file_priv = ERR_PTR(-EBADF);
+
        spin_lock(&ctx->i915->gem.contexts.lock);
        list_del(&ctx->link);
        spin_unlock(&ctx->i915->gem.contexts.lock);
 
+       client = ctx->client;
+       if (client) {
+               spin_lock(&client->ctx_lock);
+               list_del_rcu(&ctx->client_link);
+               spin_unlock(&client->ctx_lock);
+       }
+
        mutex_unlock(&ctx->mutex);
 
        /*
@@ -1597,12 +1617,8 @@ i915_gem_create_context(struct drm_i915_private *i915,
                }
                vm = &ppgtt->vm;
        }
-       if (vm) {
-               ctx->vm = i915_vm_open(vm);
-
-               /* i915_vm_open() takes a reference */
-               i915_vm_put(vm);
-       }
+       if (vm)
+               ctx->vm = vm;
 
        mutex_init(&ctx->engines_mutex);
        if (pc->num_user_engines >= 0) {
@@ -1652,7 +1668,7 @@ err_engines:
        free_engines(e);
 err_vm:
        if (ctx->vm)
-               i915_vm_close(ctx->vm);
+               i915_vm_put(ctx->vm);
 err_ctx:
        kfree(ctx);
        return ERR_PTR(err);
@@ -1679,6 +1695,8 @@ static void gem_context_register(struct i915_gem_context *ctx,
        ctx->file_priv = fpriv;
 
        ctx->pid = get_task_pid(current, PIDTYPE_PID);
+       ctx->client = i915_drm_client_get(fpriv->client);
+
        snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
                 current->comm, pid_nr(ctx->pid));
 
@@ -1686,6 +1704,10 @@ static void gem_context_register(struct i915_gem_context *ctx,
        old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
        WARN_ON(old);
 
+       spin_lock(&ctx->client->ctx_lock);
+       list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list);
+       spin_unlock(&ctx->client->ctx_lock);
+
        spin_lock(&i915->gem.contexts.lock);
        list_add_tail(&ctx->link, &i915->gem.contexts.list);
        spin_unlock(&i915->gem.contexts.lock);
@@ -1836,7 +1858,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
        if (err)
                return err;
 
-       i915_vm_open(vm);
+       i915_vm_get(vm);
 
        GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
        args->value = id;
index 282cdb8..cb78214 100644 (file)
@@ -293,6 +293,12 @@ struct i915_gem_context {
        /** @link: place with &drm_i915_private.context_list */
        struct list_head link;
 
+       /** @client: struct i915_drm_client */
+       struct i915_drm_client *client;
+
+       /** @client_link: for linking onto &i915_drm_client.ctx_list */
+       struct list_head client_link;
+
        /**
         * @ref: reference count
         *
index c6eb023..5802692 100644 (file)
@@ -123,7 +123,7 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size,
         */
        flags = I915_BO_ALLOC_USER;
 
-       ret = mr->ops->init_object(mr, obj, size, 0, flags);
+       ret = mr->ops->init_object(mr, obj, I915_BO_INVALID_OFFSET, size, 0, flags);
        if (ret)
                goto object_free;
 
index 1391723..f5062d0 100644 (file)
@@ -66,15 +66,6 @@ err:
        return ERR_PTR(ret);
 }
 
-static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
-                                  struct sg_table *sg,
-                                  enum dma_data_direction dir)
-{
-       dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
-       sg_free_table(sg);
-       kfree(sg);
-}
-
 static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf,
                                struct iosys_map *map)
 {
@@ -102,11 +93,15 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf,
 static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 {
        struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
        int ret;
 
        if (obj->base.size < vma->vm_end - vma->vm_start)
                return -EINVAL;
 
+       if (HAS_LMEM(i915))
+               return drm_gem_prime_mmap(&obj->base, vma);
+
        if (!obj->base.filp)
                return -ENODEV;
 
@@ -209,7 +204,7 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
        .attach = i915_gem_dmabuf_attach,
        .detach = i915_gem_dmabuf_detach,
        .map_dma_buf = i915_gem_map_dma_buf,
-       .unmap_dma_buf = i915_gem_unmap_dma_buf,
+       .unmap_dma_buf = drm_gem_unmap_dma_buf,
        .release = drm_gem_dmabuf_release,
        .mmap = i915_gem_dmabuf_mmap,
        .vmap = i915_gem_dmabuf_vmap,
index 78f8797..0d79ba8 100644 (file)
@@ -1320,10 +1320,8 @@ static void *reloc_vaddr(struct i915_vma *vma,
 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 {
        if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
-               if (flushes & CLFLUSH_BEFORE) {
-                       clflushopt(addr);
-                       mb();
-               }
+               if (flushes & CLFLUSH_BEFORE)
+                       drm_clflush_virt_range(addr, sizeof(*addr));
 
                *addr = value;
 
@@ -1335,7 +1333,7 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
                 * to ensure ordering of clflush wrt to the system.
                 */
                if (flushes & CLFLUSH_AFTER)
-                       clflushopt(addr);
+                       drm_clflush_virt_range(addr, sizeof(*addr));
        } else
                *addr = value;
 }
@@ -2689,6 +2687,11 @@ eb_select_engine(struct i915_execbuffer *eb)
        if (err)
                goto err;
 
+       if (!i915_vm_tryget(ce->vm)) {
+               err = -ENOENT;
+               goto err;
+       }
+
        eb->context = ce;
        eb->gt = ce->engine->gt;
 
@@ -2712,6 +2715,7 @@ eb_put_engine(struct i915_execbuffer *eb)
 {
        struct intel_context *child;
 
+       i915_vm_put(eb->context->vm);
        intel_gt_pm_put(eb->gt);
        for_each_child(eb->context, child)
                intel_context_put(child);
index 444f826..47e43dc 100644 (file)
@@ -100,7 +100,7 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
                                      resource_size_t page_size,
                                      unsigned int flags)
 {
-       return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+       return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
                                             size, page_size, flags);
 }
 
@@ -135,6 +135,6 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915,
                            resource_size_t size,
                            unsigned int flags)
 {
-       return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+       return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
                                             size, 0, flags);
 }
index c3ea243..0c5c438 100644 (file)
@@ -70,7 +70,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
         * mmap ioctl is disallowed for all discrete platforms,
         * and for all platforms with GRAPHICS_VER > 12.
         */
-       if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12)
+       if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
                return -EOPNOTSUPP;
 
        if (args->flags & ~(I915_MMAP_WC))
index fd54eb8..2c88bdb 100644 (file)
@@ -631,6 +631,8 @@ struct drm_i915_gem_object {
 
                struct drm_mm_node *stolen;
 
+               resource_size_t bo_offset;
+
                unsigned long scratch;
                u64 encode;
 
index 6cf9446..3428ddf 100644 (file)
@@ -27,11 +27,12 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
        mutex_unlock(&mem->objects.lock);
 }
 
-struct drm_i915_gem_object *
-i915_gem_object_create_region(struct intel_memory_region *mem,
-                             resource_size_t size,
-                             resource_size_t page_size,
-                             unsigned int flags)
+static struct drm_i915_gem_object *
+__i915_gem_object_create_region(struct intel_memory_region *mem,
+                               resource_size_t offset,
+                               resource_size_t size,
+                               resource_size_t page_size,
+                               unsigned int flags)
 {
        struct drm_i915_gem_object *obj;
        resource_size_t default_page_size;
@@ -62,6 +63,9 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
 
        size = round_up(size, default_page_size);
 
+       if (default_page_size == size)
+               flags |= I915_BO_ALLOC_CONTIGUOUS;
+
        GEM_BUG_ON(!size);
        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
 
@@ -83,7 +87,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
        if (default_page_size < mem->min_page_size)
                flags |= I915_BO_ALLOC_PM_EARLY;
 
-       err = mem->ops->init_object(mem, obj, size, page_size, flags);
+       err = mem->ops->init_object(mem, obj, offset, size, page_size, flags);
        if (err)
                goto err_object_free;
 
@@ -95,6 +99,40 @@ err_object_free:
        return ERR_PTR(err);
 }
 
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+                             resource_size_t size,
+                             resource_size_t page_size,
+                             unsigned int flags)
+{
+       return __i915_gem_object_create_region(mem, I915_BO_INVALID_OFFSET,
+                                              size, page_size, flags);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+                                resource_size_t offset,
+                                resource_size_t size,
+                                unsigned int flags)
+{
+       GEM_BUG_ON(offset == I915_BO_INVALID_OFFSET);
+
+       if (GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
+           GEM_WARN_ON(!IS_ALIGNED(offset, mem->min_page_size)))
+               return ERR_PTR(-EINVAL);
+
+       if (range_overflows(offset, size, resource_size(&mem->region)))
+               return ERR_PTR(-EINVAL);
+
+       if (!(flags & I915_BO_ALLOC_GPU_ONLY) &&
+           offset + size > mem->io_size &&
+           !i915_ggtt_has_aperture(to_gt(mem->i915)->ggtt))
+               return ERR_PTR(-ENOSPC);
+
+       return __i915_gem_object_create_region(mem, offset, size, 0,
+                                              flags | I915_BO_ALLOC_CONTIGUOUS);
+}
+
 /**
  * i915_gem_process_region - Iterate over all objects of a region using ops
  * to process and optionally skip objects
index fcaa12d..2dfcc41 100644 (file)
@@ -14,6 +14,8 @@ struct sg_table;
 
 struct i915_gem_apply_to_region;
 
+#define I915_BO_INVALID_OFFSET ((resource_size_t)-1)
+
 /**
  * struct i915_gem_apply_to_region_ops - ops to use when iterating over all
  * region objects.
@@ -56,6 +58,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
                              resource_size_t size,
                              resource_size_t page_size,
                              unsigned int flags);
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+                                resource_size_t offset,
+                                resource_size_t size,
+                                unsigned int flags);
 
 int i915_gem_process_region(struct intel_memory_region *mr,
                            struct i915_gem_apply_to_region *apply);
index 3a1c782..9e5faf0 100644 (file)
@@ -552,6 +552,7 @@ static int __create_shmem(struct drm_i915_private *i915,
 
 static int shmem_object_init(struct intel_memory_region *mem,
                             struct drm_i915_gem_object *obj,
+                            resource_size_t offset,
                             resource_size_t size,
                             resource_size_t page_size,
                             unsigned int flags)
index 0bf8f61..143f61a 100644 (file)
@@ -12,6 +12,8 @@
 
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_region_lmem.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
 #include "i915_reg.h"
@@ -492,7 +494,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
 
        /* Exclude the reserved region from driver use */
        mem->region.end = reserved_base - 1;
-       mem->io_size = resource_size(&mem->region);
+       mem->io_size = min(mem->io_size, resource_size(&mem->region));
 
        /* It is possible for the reserved area to end before the end of stolen
         * memory, so just consider the start. */
@@ -679,6 +681,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
 
 static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
                                        struct drm_i915_gem_object *obj,
+                                       resource_size_t offset,
                                        resource_size_t size,
                                        resource_size_t page_size,
                                        unsigned int flags)
@@ -693,12 +696,32 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
        if (size == 0)
                return -EINVAL;
 
+       /*
+        * With discrete devices, where we lack a mappable aperture there is no
+        * possible way to ever access this memory on the CPU side.
+        */
+       if (mem->type == INTEL_MEMORY_STOLEN_LOCAL && !mem->io_size &&
+           !(flags & I915_BO_ALLOC_GPU_ONLY))
+               return -ENOSPC;
+
        stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
        if (!stolen)
                return -ENOMEM;
 
-       ret = i915_gem_stolen_insert_node(i915, stolen, size,
-                                         mem->min_page_size);
+       if (offset != I915_BO_INVALID_OFFSET) {
+               drm_dbg(&i915->drm,
+                       "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n",
+                       &offset, &size);
+
+               stolen->start = offset;
+               stolen->size = size;
+               mutex_lock(&i915->mm.stolen_lock);
+               ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
+               mutex_unlock(&i915->mm.stolen_lock);
+       } else {
+               ret = i915_gem_stolen_insert_node(i915, stolen, size,
+                                                 mem->min_page_size);
+       }
        if (ret)
                goto err_free;
 
@@ -750,11 +773,6 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
        if (GEM_WARN_ON(resource_size(&mem->region) == 0))
                return -ENODEV;
 
-       if (!io_mapping_init_wc(&mem->iomap,
-                               mem->io_start,
-                               mem->io_size))
-               return -EIO;
-
        /*
         * TODO: For stolen lmem we mostly just care about populating the dsm
         * related bits and setting up the drm_mm allocator for the range.
@@ -762,18 +780,26 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
         */
        err = i915_gem_init_stolen(mem);
        if (err)
-               goto err_fini;
+               return err;
+
+       if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
+                                               mem->io_start,
+                                               mem->io_size)) {
+               err = -EIO;
+               goto err_cleanup;
+       }
 
        return 0;
 
-err_fini:
-       io_mapping_fini(&mem->iomap);
+err_cleanup:
+       i915_gem_cleanup_stolen(mem->i915);
        return err;
 }
 
 static int release_stolen_lmem(struct intel_memory_region *mem)
 {
-       io_mapping_fini(&mem->iomap);
+       if (mem->io_size)
+               io_mapping_fini(&mem->iomap);
        i915_gem_cleanup_stolen(mem->i915);
        return 0;
 }
@@ -790,25 +816,43 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 {
        struct intel_uncore *uncore = &i915->uncore;
        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       resource_size_t dsm_size, dsm_base, lmem_size;
        struct intel_memory_region *mem;
+       resource_size_t io_start, io_size;
        resource_size_t min_page_size;
-       resource_size_t io_start;
-       resource_size_t lmem_size;
-       u64 lmem_base;
 
-       lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
-       if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2)))
+       if (WARN_ON_ONCE(instance))
                return ERR_PTR(-ENODEV);
 
-       lmem_size = pci_resource_len(pdev, 2) - lmem_base;
-       io_start = pci_resource_start(pdev, 2) + lmem_base;
+       /* Use DSM base address instead for stolen memory */
+       dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
+       if (IS_DG1(uncore->i915)) {
+               lmem_size = pci_resource_len(pdev, 2);
+               if (WARN_ON(lmem_size < dsm_base))
+                       return ERR_PTR(-ENODEV);
+       } else {
+               resource_size_t lmem_range;
+
+               lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF;
+               lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
+               lmem_size *= SZ_1G;
+       }
+
+       dsm_size = lmem_size - dsm_base;
+       if (pci_resource_len(pdev, 2) < lmem_size) {
+               io_start = 0;
+               io_size = 0;
+       } else {
+               io_start = pci_resource_start(pdev, 2) + dsm_base;
+               io_size = dsm_size;
+       }
 
        min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
                                                I915_GTT_PAGE_SIZE_4K;
 
-       mem = intel_memory_region_create(i915, lmem_base, lmem_size,
+       mem = intel_memory_region_create(i915, dsm_base, dsm_size,
                                         min_page_size,
-                                        io_start, lmem_size,
+                                        io_start, io_size,
                                         type, instance,
                                         &i915_region_stolen_lmem_ops);
        if (IS_ERR(mem))
@@ -822,6 +866,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 
        drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
                &mem->io_start);
+       drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
 
        intel_memory_region_set_name(mem, "stolen-local");
 
@@ -850,63 +895,6 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
        return mem;
 }
 
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
-                                              resource_size_t stolen_offset,
-                                              resource_size_t size)
-{
-       struct intel_memory_region *mem = i915->mm.stolen_region;
-       struct drm_i915_gem_object *obj;
-       struct drm_mm_node *stolen;
-       int ret;
-
-       if (!drm_mm_initialized(&i915->mm.stolen))
-               return ERR_PTR(-ENODEV);
-
-       drm_dbg(&i915->drm,
-               "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n",
-               &stolen_offset, &size);
-
-       /* KISS and expect everything to be page-aligned */
-       if (GEM_WARN_ON(size == 0) ||
-           GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
-           GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size)))
-               return ERR_PTR(-EINVAL);
-
-       stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
-       if (!stolen)
-               return ERR_PTR(-ENOMEM);
-
-       stolen->start = stolen_offset;
-       stolen->size = size;
-       mutex_lock(&i915->mm.stolen_lock);
-       ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
-       mutex_unlock(&i915->mm.stolen_lock);
-       if (ret)
-               goto err_free;
-
-       obj = i915_gem_object_alloc();
-       if (!obj) {
-               ret = -ENOMEM;
-               goto err_stolen;
-       }
-
-       ret = __i915_gem_object_create_stolen(mem, obj, stolen);
-       if (ret)
-               goto err_object_free;
-
-       i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
-       return obj;
-
-err_object_free:
-       i915_gem_object_free(obj);
-err_stolen:
-       i915_gem_stolen_remove_node(i915, stolen);
-err_free:
-       kfree(stolen);
-       return ERR_PTR(ret);
-}
-
 bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj)
 {
        return obj->ops == &i915_gem_object_stolen_ops;
index ccdf7be..d5005a3 100644 (file)
@@ -31,10 +31,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 struct drm_i915_gem_object *
 i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
                              resource_size_t size);
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
-                                              resource_size_t stolen_offset,
-                                              resource_size_t size);
 
 bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
 
index b9ae6b0..a878910 100644 (file)
@@ -126,14 +126,22 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 static void
 i915_ttm_place_from_region(const struct intel_memory_region *mr,
                           struct ttm_place *place,
+                          resource_size_t offset,
+                          resource_size_t size,
                           unsigned int flags)
 {
        memset(place, 0, sizeof(*place));
        place->mem_type = intel_region_to_ttm_type(mr);
 
+       if (mr->type == INTEL_MEMORY_SYSTEM)
+               return;
+
        if (flags & I915_BO_ALLOC_CONTIGUOUS)
                place->flags |= TTM_PL_FLAG_CONTIGUOUS;
-       if (mr->io_size && mr->io_size < mr->total) {
+       if (offset != I915_BO_INVALID_OFFSET) {
+               place->fpfn = offset >> PAGE_SHIFT;
+               place->lpfn = place->fpfn + (size >> PAGE_SHIFT);
+       } else if (mr->io_size && mr->io_size < mr->total) {
                if (flags & I915_BO_ALLOC_GPU_ONLY) {
                        place->flags |= TTM_PL_FLAG_TOPDOWN;
                } else {
@@ -155,12 +163,14 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
 
        placement->num_placement = 1;
        i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
-                                  obj->mm.region, requested, flags);
+                                  obj->mm.region, requested, obj->bo_offset,
+                                  obj->base.size, flags);
 
        /* Cache this on object? */
        placement->num_busy_placement = num_allowed;
        for (i = 0; i < placement->num_busy_placement; ++i)
-               i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags);
+               i915_ttm_place_from_region(obj->mm.placements[i], busy + i,
+                                          obj->bo_offset, obj->base.size, flags);
 
        if (num_allowed == 0) {
                *busy = *requested;
@@ -802,7 +812,8 @@ static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
        struct ttm_placement placement;
        int ret;
 
-       i915_ttm_place_from_region(mr, &requested, flags);
+       i915_ttm_place_from_region(mr, &requested, obj->bo_offset,
+                                  obj->base.size, flags);
        placement.num_placement = 1;
        placement.num_busy_placement = 1;
        placement.placement = &requested;
@@ -1142,6 +1153,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
  */
 int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
                               struct drm_i915_gem_object *obj,
+                              resource_size_t offset,
                               resource_size_t size,
                               resource_size_t page_size,
                               unsigned int flags)
@@ -1158,6 +1170,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
        drm_gem_private_object_init(&i915->drm, &obj->base, size);
        i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
 
+       obj->bo_offset = offset;
+
        /* Don't put on a region list until we're either locked or fully initialized. */
        obj->mm.region = mem;
        INIT_LIST_HEAD(&obj->mm.region_link);
index 9d698ad..73e371a 100644 (file)
@@ -45,6 +45,7 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo)
 
 int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
                               struct drm_i915_gem_object *obj,
+                              resource_size_t offset,
                               resource_size_t size,
                               resource_size_t page_size,
                               unsigned int flags);
index b071a58..a342fd3 100644 (file)
@@ -88,7 +88,7 @@ out:
 static int igt_dmabuf_import_same_driver_lmem(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM];
+       struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM_0];
        struct drm_i915_gem_object *obj;
        struct drm_gem_object *import;
        struct dma_buf *dmabuf;
@@ -252,10 +252,10 @@ static int igt_dmabuf_import_same_driver_lmem_smem(void *arg)
        struct drm_i915_private *i915 = arg;
        struct intel_memory_region *regions[2];
 
-       if (!i915->mm.regions[INTEL_REGION_LMEM])
+       if (!i915->mm.regions[INTEL_REGION_LMEM_0])
                return 0;
 
-       regions[0] = i915->mm.regions[INTEL_REGION_LMEM];
+       regions[0] = i915->mm.regions[INTEL_REGION_LMEM_0];
        regions[1] = i915->mm.regions[INTEL_REGION_SMEM];
        return igt_dmabuf_import_same_driver(i915, regions, 2);
 }
index 0e52eb8..9922ac9 100644 (file)
@@ -92,17 +92,17 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src,
 
 static int igt_smem_create_migrate(void *arg)
 {
-       return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM);
+       return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_SMEM);
 }
 
 static int igt_lmem_create_migrate(void *arg)
 {
-       return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM);
+       return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM_0);
 }
 
 static int igt_same_create_migrate(void *arg)
 {
-       return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM);
+       return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_LMEM_0);
 }
 
 static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
@@ -152,7 +152,7 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
                }
 
        } else {
-               err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM);
+               err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0);
                if (err) {
                        pr_err("Object failed migration to lmem\n");
                        if (err)
index 6d6082b..8ac6726 100644 (file)
@@ -42,8 +42,7 @@ mock_context(struct drm_i915_private *i915,
                if (!ppgtt)
                        goto err_free;
 
-               ctx->vm = i915_vm_open(&ppgtt->vm);
-               i915_vm_put(&ppgtt->vm);
+               ctx->vm = &ppgtt->vm;
        }
 
        mutex_init(&ctx->engines_mutex);
@@ -59,7 +58,7 @@ mock_context(struct drm_i915_private *i915,
 
 err_vm:
        if (ctx->vm)
-               i915_vm_close(ctx->vm);
+               i915_vm_put(ctx->vm);
 err_free:
        kfree(ctx);
        return NULL;
index 871fe7b..1bb766c 100644 (file)
@@ -322,7 +322,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
        int err;
 
-       GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+       GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref));
 
        /*
         * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
index b1b9c3f..047b5a7 100644 (file)
@@ -6,7 +6,6 @@
 #include "gen8_engine_cs.h"
 #include "i915_drv.h"
 #include "intel_gpu_commands.h"
-#include "intel_gt_regs.h"
 #include "intel_lrc.h"
 #include "intel_ring.h"
 
@@ -165,33 +164,9 @@ static u32 preparser_disable(bool state)
        return MI_ARB_CHECK | 1 << 8 | state;
 }
 
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg)
 {
-       static const i915_reg_t vd[] = {
-               GEN12_VD0_AUX_NV,
-               GEN12_VD1_AUX_NV,
-               GEN12_VD2_AUX_NV,
-               GEN12_VD3_AUX_NV,
-       };
-
-       static const i915_reg_t ve[] = {
-               GEN12_VE0_AUX_NV,
-               GEN12_VE1_AUX_NV,
-       };
-
-       if (engine->class == VIDEO_DECODE_CLASS)
-               return vd[engine->instance];
-
-       if (engine->class == VIDEO_ENHANCEMENT_CLASS)
-               return ve[engine->instance];
-
-       GEM_BUG_ON("unknown aux_inv reg\n");
-       return INVALID_MMIO_REG;
-}
-
-static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
-{
-       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
        *cs++ = i915_mmio_reg_offset(inv_reg);
        *cs++ = AUX_INV;
        *cs++ = MI_NOOP;
@@ -236,7 +211,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
        if (mode & EMIT_INVALIDATE) {
                u32 flags = 0;
-               u32 *cs;
+               u32 *cs, count;
 
                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
                flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -254,7 +229,12 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
                if (engine->class == COMPUTE_CLASS)
                        flags &= ~PIPE_CONTROL_3D_FLAGS;
 
-               cs = intel_ring_begin(rq, 8 + 4);
+               if (!HAS_FLAT_CCS(rq->engine->i915))
+                       count = 8 + 4;
+               else
+                       count = 8;
+
+               cs = intel_ring_begin(rq, count);
                if (IS_ERR(cs))
                        return PTR_ERR(cs);
 
@@ -267,8 +247,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 
-               /* hsdes: 1809175790 */
-               cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+               if (!HAS_FLAT_CCS(rq->engine->i915)) {
+                       /* hsdes: 1809175790 */
+                       cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
+               }
 
                *cs++ = preparser_disable(false);
                intel_ring_advance(rq, cs);
@@ -283,12 +265,17 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
        u32 cmd, *cs;
 
        cmd = 4;
-       if (mode & EMIT_INVALIDATE)
+       if (mode & EMIT_INVALIDATE) {
                cmd += 2;
-       if (mode & EMIT_INVALIDATE)
-               aux_inv = rq->engine->mask & ~BIT(BCS0);
-       if (aux_inv)
-               cmd += 2 * hweight32(aux_inv) + 2;
+
+               if (!HAS_FLAT_CCS(rq->engine->i915) &&
+                   (rq->engine->class == VIDEO_DECODE_CLASS ||
+                    rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
+                       aux_inv = rq->engine->mask & ~BIT(BCS0);
+                       if (aux_inv)
+                               cmd += 4;
+               }
+       }
 
        cs = intel_ring_begin(rq, cmd);
        if (IS_ERR(cs))
@@ -319,15 +306,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
        *cs++ = 0; /* value */
 
        if (aux_inv) { /* hsdes: 1809175790 */
-               struct intel_engine_cs *engine;
-               unsigned int tmp;
-
-               *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
-               for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
-                       *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
-                       *cs++ = AUX_INV;
-               }
-               *cs++ = MI_NOOP;
+               if (rq->engine->class == VIDEO_DECODE_CLASS)
+                       cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
+               else
+                       cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
        }
 
        if (mode & EMIT_INVALIDATE)
index cc6e21d..107ab42 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/types.h>
 
 #include "i915_gem.h" /* GEM_BUG_ON */
-
+#include "intel_gt_regs.h"
 #include "intel_gpu_commands.h"
 
 struct i915_request;
@@ -38,6 +38,8 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 
+u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg);
+
 static inline u32 *
 __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
 {
index f574da0..c7bd5d7 100644 (file)
@@ -454,11 +454,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
                                pd = pdp->entry[gen8_pd_index(idx, 2)];
                        }
 
-                       clflush_cache_range(vaddr, PAGE_SIZE);
+                       drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
                }
        } while (1);
-       clflush_cache_range(vaddr, PAGE_SIZE);
+       drm_clflush_virt_range(vaddr, PAGE_SIZE);
 
        return idx;
 }
@@ -631,7 +631,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
                        }
                } while (rem >= page_size && index < I915_PDES);
 
-               clflush_cache_range(vaddr, PAGE_SIZE);
+               drm_clflush_virt_range(vaddr, PAGE_SIZE);
 
                /*
                 * Is it safe to mark the 2M block as 64K? -- Either we have
@@ -647,7 +647,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
                                              I915_GTT_PAGE_SIZE_2M)))) {
                        vaddr = px_vaddr(pd);
                        vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
-                       clflush_cache_range(vaddr, PAGE_SIZE);
+                       drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        page_size = I915_GTT_PAGE_SIZE_64K;
 
                        /*
@@ -668,7 +668,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
                                for (i = 1; i < index; i += 16)
                                        memset64(vaddr + i, encode, 15);
 
-                               clflush_cache_range(vaddr, PAGE_SIZE);
+                               drm_clflush_virt_range(vaddr, PAGE_SIZE);
                        }
                }
 
@@ -722,7 +722,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
 
        vaddr = px_vaddr(pt);
        vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
-       clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
+       drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
index 5d0ec7c..4070cb5 100644 (file)
@@ -386,7 +386,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
        ce->ring = NULL;
        ce->ring_size = SZ_4K;
 
-       ewma_runtime_init(&ce->runtime.avg);
+       ewma_runtime_init(&ce->stats.runtime.avg);
 
        ce->vm = i915_vm_get(engine->gt->vm);
 
@@ -400,7 +400,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
        INIT_LIST_HEAD(&ce->guc_state.fences);
        INIT_LIST_HEAD(&ce->guc_state.requests);
 
-       ce->guc_id.id = GUC_INVALID_LRC_ID;
+       ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
        INIT_LIST_HEAD(&ce->guc_id.link);
 
        INIT_LIST_HEAD(&ce->destroyed_link);
@@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent,
        child->parallel.parent = parent;
 }
 
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+{
+       u64 total, active;
+
+       total = ce->stats.runtime.total;
+       if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+               total *= ce->engine->gt->clock_period_ns;
+
+       active = READ_ONCE(ce->stats.active);
+       if (active)
+               active = intel_context_clock() - active;
+
+       return total + active;
+}
+
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+       u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
+
+       if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+               avg *= ce->engine->gt->clock_period_ns;
+
+       return avg;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
index d8c74bb..b7d3214 100644 (file)
@@ -351,18 +351,13 @@ intel_context_clear_nopreempt(struct intel_context *ce)
        clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
 }
 
-static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
-{
-       const u32 period = ce->engine->gt->clock_period_ns;
-
-       return READ_ONCE(ce->runtime.total) * period;
-}
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
 
-static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+static inline u64 intel_context_clock(void)
 {
-       const u32 period = ce->engine->gt->clock_period_ns;
-
-       return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+       /* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */
+       return ktime_get_raw_fast_ns();
 }
 
 #endif /* __INTEL_CONTEXT_H__ */
index 30cd81a..09f8254 100644 (file)
@@ -35,6 +35,9 @@ struct intel_context_ops {
 #define COPS_HAS_INFLIGHT_BIT 0
 #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
 
+#define COPS_RUNTIME_CYCLES_BIT 1
+#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)
+
        int (*alloc)(struct intel_context *ce);
 
        void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -134,14 +137,19 @@ struct intel_context {
        } lrc;
        u32 tag; /* cookie passed to HW to track this context on submission */
 
-       /* Time on GPU as tracked by the hw. */
-       struct {
-               struct ewma_runtime avg;
-               u64 total;
-               u32 last;
-               I915_SELFTEST_DECLARE(u32 num_underflow);
-               I915_SELFTEST_DECLARE(u32 max_underflow);
-       } runtime;
+       /** stats: Context GPU engine busyness tracking. */
+       struct intel_context_stats {
+               u64 active;
+
+               /* Time on GPU as tracked by the hw. */
+               struct {
+                       struct ewma_runtime avg;
+                       u64 total;
+                       u32 last;
+                       I915_SELFTEST_DECLARE(u32 num_underflow);
+                       I915_SELFTEST_DECLARE(u32 max_underflow);
+               } runtime;
+       } stats;
 
        unsigned int active_count; /* protected by timeline->mutex */
 
index 1c0ab05..1431f1e 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/cacheflush.h>
 #include <drm/drm_util.h>
+#include <drm/drm_cache.h>
 
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
@@ -143,15 +144,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
         * of extra paranoia to try and ensure that the HWS takes the value
         * we give and that it doesn't end up trapped inside the CPU!
         */
-       if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               mb();
-               clflush(&engine->status_page.addr[reg]);
-               engine->status_page.addr[reg] = value;
-               clflush(&engine->status_page.addr[reg]);
-               mb();
-       } else {
-               WRITE_ONCE(engine->status_page.addr[reg], value);
-       }
+       drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value));
+       WRITE_ONCE(engine->status_page.addr[reg], value);
+       drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value));
 }
 
 /*
index e1aa78b..a3035a7 100644 (file)
@@ -434,6 +434,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
        if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
                engine->props.preempt_timeout_ms = 0;
 
+       if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
+            __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
+            engine->class == RENDER_CLASS)
+               engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
+
        /* features common between engines sharing EUs */
        if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
                engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
@@ -724,12 +729,24 @@ static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
 
 static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
 {
-       int i;
-       u8 map[MAX_ENGINE_INSTANCE + 1];
+       /*
+        * Logical to physical mapping is needed for proper support
+        * to split-frame feature.
+        */
+       if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) {
+               const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 };
 
-       for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
-               map[i] = i;
-       populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+               populate_logical_ids(gt, logical_ids, class,
+                                    map, ARRAY_SIZE(map));
+       } else {
+               int i;
+               u8 map[MAX_ENGINE_INSTANCE + 1];
+
+               for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+                       map[i] = i;
+               populate_logical_ids(gt, logical_ids, class,
+                                    map, ARRAY_SIZE(map));
+       }
 }
 
 /**
@@ -1695,9 +1712,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
        }
 
-       if (intel_engine_uses_guc(engine)) {
-               /* nothing to print yet */
-       } else if (HAS_EXECLISTS(dev_priv)) {
+       if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) {
                struct i915_request * const *port, *rq;
                const u32 *hws =
                        &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
index 19ff875..eac2011 100644 (file)
@@ -96,7 +96,9 @@ struct i915_ctx_workarounds {
 
 #define I915_MAX_VCS   8
 #define I915_MAX_VECS  4
+#define I915_MAX_SFC   (I915_MAX_VCS / 2)
 #define I915_MAX_CCS   4
+#define I915_MAX_RCS   1
 
 /*
  * Engine IDs definitions.
@@ -526,6 +528,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
 #define I915_ENGINE_HAS_RCS_REG_STATE  BIT(9)
 #define I915_ENGINE_HAS_EU_PRIORITY    BIT(10)
+#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
        unsigned int flags;
 
        /*
@@ -643,7 +646,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
 
 #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
        for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
-            (iter_) < GEN_MAX_SUBSLICES; \
+            (iter_) < GEN_SS_MASK_SIZE; \
             (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
             (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
                for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
index b8c9b6b..0f6cd96 100644 (file)
@@ -193,7 +193,6 @@ static void add_legacy_ring(struct legacy_ring *ring,
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
        struct legacy_ring ring = {};
-       u8 uabi_instances[5] = {};
        struct list_head *it, *next;
        struct rb_node **p, *prev;
        LIST_HEAD(engines);
@@ -214,8 +213,10 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
                GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
                engine->uabi_class = uabi_classes[engine->class];
 
-               GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances));
-               engine->uabi_instance = uabi_instances[engine->uabi_class]++;
+               GEM_BUG_ON(engine->uabi_class >=
+                          ARRAY_SIZE(i915->engine_uabi_class_count));
+               engine->uabi_instance =
+                       i915->engine_uabi_class_count[engine->uabi_class]++;
 
                /* Replace the internal name with the final user facing name */
                memcpy(old, engine->name, sizeof(engine->name));
@@ -245,8 +246,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
                int class, inst;
                int errors = 0;
 
-               for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) {
-                       for (inst = 0; inst < uabi_instances[class]; inst++) {
+               for (class = 0; class < ARRAY_SIZE(i915->engine_uabi_class_count); class++) {
+                       for (inst = 0; inst < i915->engine_uabi_class_count[class]; inst++) {
                                engine = intel_engine_lookup_user(i915,
                                                                  class, inst);
                                if (!engine) {
index 3e0c81f..94d41a0 100644 (file)
@@ -624,8 +624,6 @@ static void __execlists_schedule_out(struct i915_request * const rq,
                GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
                __set_bit(ccid - 1, &engine->context_tag);
        }
-
-       lrc_update_runtime(ce);
        intel_engine_context_out(engine);
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
        if (engine->fw_domain && !--engine->fw_active)
@@ -1650,12 +1648,6 @@ cancel_port_requests(struct intel_engine_execlists * const execlists,
        return inactive;
 }
 
-static void invalidate_csb_entries(const u64 *first, const u64 *last)
-{
-       clflush((void *)first);
-       clflush((void *)last);
-}
-
 /*
  * Starting with Gen12, the status has a new format:
  *
@@ -2003,15 +1995,30 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
         * the wash as hardware, working or not, will need to do the
         * invalidation before.
         */
-       invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
+       drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
 
        /*
         * We assume that any event reflects a change in context flow
         * and merits a fresh timeslice. We reinstall the timer after
         * inspecting the queue to see if we need to resumbit.
         */
-       if (*prev != *execlists->active) /* elide lite-restores */
+       if (*prev != *execlists->active) { /* elide lite-restores */
+               /*
+                * Note the inherent discrepancy between the HW runtime,
+                * recorded as part of the context switch, and the CPU
+                * adjustment for active contexts. We have to hope that
+                * the delay in processing the CS event is very small
+                * and consistent. It works to our advantage to have
+                * the CPU adjustment _undershoot_ (i.e. start later than)
+                * the CS timestamp so we never overreport the runtime
+                * and correct overselves later when updating from HW.
+                */
+               if (*prev)
+                       lrc_runtime_stop((*prev)->context);
+               if (*execlists->active)
+                       lrc_runtime_start((*execlists->active)->context);
                new_timeslice(execlists);
+       }
 
        return inactive;
 }
@@ -2235,11 +2242,11 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
        if (!cap->error)
                goto err_cap;
 
-       cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+       cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE);
        if (!cap->error->gt)
                goto err_gpu;
 
-       cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+       cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE);
        if (!cap->error->gt->engine)
                goto err_gt;
 
@@ -2643,7 +2650,7 @@ unwind:
 }
 
 static const struct intel_context_ops execlists_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
 
        .alloc = execlists_context_alloc,
 
@@ -2787,8 +2794,9 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 
        /* Check that the GPU does indeed update the CSB entries! */
        memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
-       invalidate_csb_entries(&execlists->csb_status[0],
-                              &execlists->csb_status[reset_value]);
+       drm_clflush_virt_range(execlists->csb_status,
+                              execlists->csb_size *
+                              sizeof(execlists->csb_status));
 
        /* Once more for luck and our trusty paranoia */
        ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
@@ -2832,7 +2840,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
        sanitize_hwsp(engine);
 
        /* And scrub the dirty cachelines for the HWSP */
-       clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+       drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
 
        intel_engine_reset_pinned_contexts(engine);
 }
@@ -2911,7 +2919,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
 
        enable_execlists(engine);
 
-       if (engine->class == RENDER_CLASS)
+       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
                xehp_enable_ccs_engines(engine);
 
        return 0;
@@ -2957,9 +2965,8 @@ reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
 
-       mb(); /* paranoia: read the CSB pointers from after the reset */
-       clflush(execlists->csb_write);
-       mb();
+       drm_clflush_virt_range(execlists->csb_write,
+                              sizeof(execlists->csb_write[0]));
 
        inactive = process_csb(engine, inactive); /* drain preemption events */
 
@@ -3701,7 +3708,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
 }
 
 static const struct intel_context_ops virtual_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
 
        .alloc = virtual_context_alloc,
 
index 8850d4e..7581777 100644 (file)
@@ -3,18 +3,16 @@
  * Copyright Â© 2020 Intel Corporation
  */
 
-#include <linux/agp_backend.h>
-#include <linux/stop_machine.h>
-
+#include <linux/types.h>
 #include <asm/set_memory.h>
 #include <asm/smp.h>
 
 #include <drm/i915_drm.h>
-#include <drm/intel-gtt.h>
 
 #include "gem/i915_gem_lmem.h"
 
 #include "intel_gt.h"
+#include "intel_gt_gmch.h"
 #include "intel_gt_regs.h"
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -94,28 +92,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
        return 0;
 }
 
-/*
- * Certain Gen5 chipsets require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
- */
-static bool needs_idle_maps(struct drm_i915_private *i915)
-{
-       /*
-        * Query intel_iommu to see if we need the workaround. Presumably that
-        * was loaded first.
-        */
-       if (!intel_vtd_active(i915))
-               return false;
-
-       if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
-               return true;
-
-       if (GRAPHICS_VER(i915) == 12)
-               return true; /* XXX DMAR fault reason 7 */
-
-       return false;
-}
-
 /**
  * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
  * @vm: The VM to suspend the mappings for
@@ -126,7 +102,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
 void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 {
        struct i915_vma *vma, *vn;
-       int open;
+       int save_skip_rewrite;
 
        drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
 
@@ -135,8 +111,12 @@ retry:
 
        mutex_lock(&vm->mutex);
 
-       /* Skip rewriting PTE on VMA unbind. */
-       open = atomic_xchg(&vm->open, 0);
+       /*
+        * Skip rewriting PTE on VMA unbind.
+        * FIXME: Use an argument to i915_vma_unbind() instead?
+        */
+       save_skip_rewrite = vm->skip_pte_rewrite;
+       vm->skip_pte_rewrite = true;
 
        list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
                struct drm_i915_gem_object *obj = vma->obj;
@@ -154,16 +134,14 @@ retry:
                         */
                        i915_gem_object_get(obj);
 
-                       atomic_set(&vm->open, open);
                        mutex_unlock(&vm->mutex);
 
                        i915_gem_object_lock(obj, NULL);
-                       open = i915_vma_unbind(vma);
+                       GEM_WARN_ON(i915_vma_unbind(vma));
                        i915_gem_object_unlock(obj);
-
-                       GEM_WARN_ON(open);
-
                        i915_gem_object_put(obj);
+
+                       vm->skip_pte_rewrite = save_skip_rewrite;
                        goto retry;
                }
 
@@ -179,7 +157,7 @@ retry:
 
        vm->clear_range(vm, 0, vm->total);
 
-       atomic_set(&vm->open, open);
+       vm->skip_pte_rewrite = save_skip_rewrite;
 
        mutex_unlock(&vm->mutex);
 }
@@ -202,7 +180,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
        spin_unlock_irq(&uncore->lock);
 }
 
-static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
@@ -227,11 +205,6 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
                intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 }
 
-static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-       intel_gtt_chipset_flush();
-}
-
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
                         enum i915_cache_level level,
                         u32 flags)
@@ -244,258 +217,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
        return pte;
 }
 
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
-{
-       writeq(pte, addr);
-}
-
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
-                                 dma_addr_t addr,
-                                 u64 offset,
-                                 enum i915_cache_level level,
-                                 u32 flags)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       gen8_pte_t __iomem *pte =
-               (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-       gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
-
-       ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-                                    struct i915_vma_resource *vma_res,
-                                    enum i915_cache_level level,
-                                    u32 flags)
-{
-       const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       gen8_pte_t __iomem *gte;
-       gen8_pte_t __iomem *end;
-       struct sgt_iter iter;
-       dma_addr_t addr;
-
-       /*
-        * Note that we ignore PTE_READ_ONLY here. The caller must be careful
-        * not to allow the user to override access to a read only page.
-        */
-
-       gte = (gen8_pte_t __iomem *)ggtt->gsm;
-       gte += vma_res->start / I915_GTT_PAGE_SIZE;
-       end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
-       for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
-               gen8_set_pte(gte++, pte_encode | addr);
-       GEM_BUG_ON(gte > end);
-
-       /* Fill the allocated but "unused" space beyond the end of the buffer */
-       while (gte < end)
-               gen8_set_pte(gte++, vm->scratch[0]->encode);
-
-       /*
-        * We want to flush the TLBs only after we're certain all the PTE
-        * updates have finished.
-        */
-       ggtt->invalidate(ggtt);
-}
-
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
-                                 dma_addr_t addr,
-                                 u64 offset,
-                                 enum i915_cache_level level,
-                                 u32 flags)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       gen6_pte_t __iomem *pte =
-               (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-       iowrite32(vm->pte_encode(addr, level, flags), pte);
-
-       ggtt->invalidate(ggtt);
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level.
- * The object will be accessible to the GPU via commands whose operands
- * reference offsets within the global GTT as well as accessible by the GPU
- * through the GMADR mapped BAR (i915->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-                                    struct i915_vma_resource *vma_res,
-                                    enum i915_cache_level level,
-                                    u32 flags)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       gen6_pte_t __iomem *gte;
-       gen6_pte_t __iomem *end;
-       struct sgt_iter iter;
-       dma_addr_t addr;
-
-       gte = (gen6_pte_t __iomem *)ggtt->gsm;
-       gte += vma_res->start / I915_GTT_PAGE_SIZE;
-       end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
-       for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
-               iowrite32(vm->pte_encode(addr, level, flags), gte++);
-       GEM_BUG_ON(gte > end);
-
-       /* Fill the allocated but "unused" space beyond the end of the buffer */
-       while (gte < end)
-               iowrite32(vm->scratch[0]->encode, gte++);
-
-       /*
-        * We want to flush the TLBs only after we're certain all the PTE
-        * updates have finished.
-        */
-       ggtt->invalidate(ggtt);
-}
-
-static void nop_clear_range(struct i915_address_space *vm,
-                           u64 start, u64 length)
-{
-}
-
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-                                 u64 start, u64 length)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-       unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-       const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
-       gen8_pte_t __iomem *gtt_base =
-               (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-       const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-       int i;
-
-       if (WARN(num_entries > max_entries,
-                "First entry = %d; Num entries = %d (max=%d)\n",
-                first_entry, num_entries, max_entries))
-               num_entries = max_entries;
-
-       for (i = 0; i < num_entries; i++)
-               gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
-{
-       /*
-        * Make sure the internal GAM fifo has been cleared of all GTT
-        * writes before exiting stop_machine(). This guarantees that
-        * any aperture accesses waiting to start in another process
-        * cannot back up behind the GTT writes causing a hang.
-        * The register can be any arbitrary GAM register.
-        */
-       intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
-}
-
-struct insert_page {
-       struct i915_address_space *vm;
-       dma_addr_t addr;
-       u64 offset;
-       enum i915_cache_level level;
-};
-
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
-{
-       struct insert_page *arg = _arg;
-
-       gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
-       bxt_vtd_ggtt_wa(arg->vm);
-
-       return 0;
-}
-
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
-                                         dma_addr_t addr,
-                                         u64 offset,
-                                         enum i915_cache_level level,
-                                         u32 unused)
-{
-       struct insert_page arg = { vm, addr, offset, level };
-
-       stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
-}
-
-struct insert_entries {
-       struct i915_address_space *vm;
-       struct i915_vma_resource *vma_res;
-       enum i915_cache_level level;
-       u32 flags;
-};
-
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
-{
-       struct insert_entries *arg = _arg;
-
-       gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
-       bxt_vtd_ggtt_wa(arg->vm);
-
-       return 0;
-}
-
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-                                            struct i915_vma_resource *vma_res,
-                                            enum i915_cache_level level,
-                                            u32 flags)
-{
-       struct insert_entries arg = { vm, vma_res, level, flags };
-
-       stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
-}
-
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-                                 u64 start, u64 length)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-       unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-       gen6_pte_t scratch_pte, __iomem *gtt_base =
-               (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-       const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-       int i;
-
-       if (WARN(num_entries > max_entries,
-                "First entry = %d; Num entries = %d (max=%d)\n",
-                first_entry, num_entries, max_entries))
-               num_entries = max_entries;
-
-       scratch_pte = vm->scratch[0]->encode;
-       for (i = 0; i < num_entries; i++)
-               iowrite32(scratch_pte, &gtt_base[i]);
-}
-
-static void i915_ggtt_insert_page(struct i915_address_space *vm,
-                                 dma_addr_t addr,
-                                 u64 offset,
-                                 enum i915_cache_level cache_level,
-                                 u32 unused)
-{
-       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-               AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-       intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-}
-
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-                                    struct i915_vma_resource *vma_res,
-                                    enum i915_cache_level cache_level,
-                                    u32 unused)
-{
-       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-               AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-       intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
-                                   flags);
-}
-
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
-                                 u64 start, u64 length)
-{
-       intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
-}
-
-static void ggtt_bind_vma(struct i915_address_space *vm,
+void intel_ggtt_bind_vma(struct i915_address_space *vm,
                          struct i915_vm_pt_stash *stash,
                          struct i915_vma_resource *vma_res,
                          enum i915_cache_level cache_level,
@@ -519,7 +241,7 @@ static void ggtt_bind_vma(struct i915_address_space *vm,
        vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 }
 
-static void ggtt_unbind_vma(struct i915_address_space *vm,
+void intel_ggtt_unbind_vma(struct i915_address_space *vm,
                            struct i915_vma_resource *vma_res)
 {
        vm->clear_range(vm, vma_res->start, vma_res->vma_size);
@@ -722,10 +444,10 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
        ggtt->alias = ppgtt;
        ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 
-       GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+       GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
        ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
 
-       GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+       GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
        ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
 
        i915_vm_free_pt_stash(&ppgtt->vm, &stash);
@@ -748,8 +470,8 @@ static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
 
        i915_vm_put(&ppgtt->vm);
 
-       ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
-       ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+       ggtt->vm.vma_ops.bind_vma   = intel_ggtt_bind_vma;
+       ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
 }
 
 int i915_init_ggtt(struct drm_i915_private *i915)
@@ -773,13 +495,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 {
        struct i915_vma *vma, *vn;
 
-       atomic_set(&ggtt->vm.open, 0);
-
        flush_workqueue(ggtt->vm.i915->wq);
        i915_gem_drain_freed_objects(ggtt->vm.i915);
 
        mutex_lock(&ggtt->vm.mutex);
 
+       ggtt->vm.skip_pte_rewrite = true;
+
        list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
                struct drm_i915_gem_object *obj = vma->obj;
                bool trylock;
@@ -837,364 +559,12 @@ void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
        dma_resv_fini(&ggtt->vm._resv);
 }
 
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
-       snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
-       snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
-       return snb_gmch_ctl << 20;
-}
-
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-{
-       bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
-       bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-       if (bdw_gmch_ctl)
-               bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
-#ifdef CONFIG_X86_32
-       /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
-       if (bdw_gmch_ctl > 4)
-               bdw_gmch_ctl = 4;
-#endif
-
-       return bdw_gmch_ctl << 20;
-}
-
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
-{
-       gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
-       gmch_ctrl &= SNB_GMCH_GGMS_MASK;
-
-       if (gmch_ctrl)
-               return 1 << (20 + gmch_ctrl);
-
-       return 0;
-}
-
-static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
-{
-       /*
-        * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
-        * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
-        */
-       GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
-       return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
-}
-
-static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
-{
-       return gen6_gttmmadr_size(i915) / 2;
-}
-
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
-{
-       struct drm_i915_private *i915 = ggtt->vm.i915;
-       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-       phys_addr_t phys_addr;
-       u32 pte_flags;
-       int ret;
-
-       GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
-       phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
-
-       /*
-        * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
-        * will be dropped. For WC mappings in general we have 64 byte burst
-        * writes when the WC buffer is flushed, so we can't use it, but have to
-        * resort to an uncached mapping. The WC issue is easily caught by the
-        * readback check when writing GTT PTE entries.
-        */
-       if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
-               ggtt->gsm = ioremap(phys_addr, size);
-       else
-               ggtt->gsm = ioremap_wc(phys_addr, size);
-       if (!ggtt->gsm) {
-               drm_err(&i915->drm, "Failed to map the ggtt page table\n");
-               return -ENOMEM;
-       }
-
-       kref_init(&ggtt->vm.resv_ref);
-       ret = setup_scratch_page(&ggtt->vm);
-       if (ret) {
-               drm_err(&i915->drm, "Scratch setup failed\n");
-               /* iounmap will also get called at remove, but meh */
-               iounmap(ggtt->gsm);
-               return ret;
-       }
-
-       pte_flags = 0;
-       if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
-               pte_flags |= PTE_LM;
-
-       ggtt->vm.scratch[0]->encode =
-               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
-                                   I915_CACHE_NONE, pte_flags);
-
-       return 0;
-}
-
-static void gen6_gmch_remove(struct i915_address_space *vm)
-{
-       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-
-       iounmap(ggtt->gsm);
-       free_scratch(vm);
-}
-
-static struct resource pci_resource(struct pci_dev *pdev, int bar)
+struct resource intel_pci_resource(struct pci_dev *pdev, int bar)
 {
        return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
                                               pci_resource_len(pdev, bar));
 }
 
-static int gen8_gmch_probe(struct i915_ggtt *ggtt)
-{
-       struct drm_i915_private *i915 = ggtt->vm.i915;
-       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-       unsigned int size;
-       u16 snb_gmch_ctl;
-
-       /* TODO: We're not aware of mappable constraints on gen8 yet */
-       if (!HAS_LMEM(i915)) {
-               ggtt->gmadr = pci_resource(pdev, 2);
-               ggtt->mappable_end = resource_size(&ggtt->gmadr);
-       }
-
-       pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-       if (IS_CHERRYVIEW(i915))
-               size = chv_get_total_gtt_size(snb_gmch_ctl);
-       else
-               size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
-       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-       ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
-
-       ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
-       ggtt->vm.cleanup = gen6_gmch_remove;
-       ggtt->vm.insert_page = gen8_ggtt_insert_page;
-       ggtt->vm.clear_range = nop_clear_range;
-       if (intel_scanout_needs_vtd_wa(i915))
-               ggtt->vm.clear_range = gen8_ggtt_clear_range;
-
-       ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
-
-       /*
-        * Serialize GTT updates with aperture access on BXT if VT-d is on,
-        * and always on CHV.
-        */
-       if (intel_vm_no_concurrent_access_wa(i915)) {
-               ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
-               ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-               ggtt->vm.bind_async_flags =
-                       I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
-       }
-
-       ggtt->invalidate = gen8_ggtt_invalidate;
-
-       ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-       ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-
-       ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
-
-       setup_private_pat(ggtt->vm.gt->uncore);
-
-       return ggtt_probe_common(ggtt, size);
-}
-
-static u64 snb_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
-                         u32 flags)
-{
-       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-       switch (level) {
-       case I915_CACHE_L3_LLC:
-       case I915_CACHE_LLC:
-               pte |= GEN6_PTE_CACHE_LLC;
-               break;
-       case I915_CACHE_NONE:
-               pte |= GEN6_PTE_UNCACHED;
-               break;
-       default:
-               MISSING_CASE(level);
-       }
-
-       return pte;
-}
-
-static u64 ivb_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
-                         u32 flags)
-{
-       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-       switch (level) {
-       case I915_CACHE_L3_LLC:
-               pte |= GEN7_PTE_CACHE_L3_LLC;
-               break;
-       case I915_CACHE_LLC:
-               pte |= GEN6_PTE_CACHE_LLC;
-               break;
-       case I915_CACHE_NONE:
-               pte |= GEN6_PTE_UNCACHED;
-               break;
-       default:
-               MISSING_CASE(level);
-       }
-
-       return pte;
-}
-
-static u64 byt_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
-                         u32 flags)
-{
-       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-       if (!(flags & PTE_READ_ONLY))
-               pte |= BYT_PTE_WRITEABLE;
-
-       if (level != I915_CACHE_NONE)
-               pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
-       return pte;
-}
-
-static u64 hsw_pte_encode(dma_addr_t addr,
-                         enum i915_cache_level level,
-                         u32 flags)
-{
-       gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-       if (level != I915_CACHE_NONE)
-               pte |= HSW_WB_LLC_AGE3;
-
-       return pte;
-}
-
-static u64 iris_pte_encode(dma_addr_t addr,
-                          enum i915_cache_level level,
-                          u32 flags)
-{
-       gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-       switch (level) {
-       case I915_CACHE_NONE:
-               break;
-       case I915_CACHE_WT:
-               pte |= HSW_WT_ELLC_LLC_AGE3;
-               break;
-       default:
-               pte |= HSW_WB_ELLC_LLC_AGE3;
-               break;
-       }
-
-       return pte;
-}
-
-static int gen6_gmch_probe(struct i915_ggtt *ggtt)
-{
-       struct drm_i915_private *i915 = ggtt->vm.i915;
-       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-       unsigned int size;
-       u16 snb_gmch_ctl;
-
-       ggtt->gmadr = pci_resource(pdev, 2);
-       ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
-       /*
-        * 64/512MB is the current min/max we actually know of, but this is
-        * just a coarse sanity check.
-        */
-       if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-               drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
-                       &ggtt->mappable_end);
-               return -ENXIO;
-       }
-
-       pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
-       size = gen6_get_total_gtt_size(snb_gmch_ctl);
-       ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
-
-       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
-       ggtt->vm.clear_range = nop_clear_range;
-       if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
-               ggtt->vm.clear_range = gen6_ggtt_clear_range;
-       ggtt->vm.insert_page = gen6_ggtt_insert_page;
-       ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
-       ggtt->vm.cleanup = gen6_gmch_remove;
-
-       ggtt->invalidate = gen6_ggtt_invalidate;
-
-       if (HAS_EDRAM(i915))
-               ggtt->vm.pte_encode = iris_pte_encode;
-       else if (IS_HASWELL(i915))
-               ggtt->vm.pte_encode = hsw_pte_encode;
-       else if (IS_VALLEYVIEW(i915))
-               ggtt->vm.pte_encode = byt_pte_encode;
-       else if (GRAPHICS_VER(i915) >= 7)
-               ggtt->vm.pte_encode = ivb_pte_encode;
-       else
-               ggtt->vm.pte_encode = snb_pte_encode;
-
-       ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-       ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-
-       return ggtt_probe_common(ggtt, size);
-}
-
-static void i915_gmch_remove(struct i915_address_space *vm)
-{
-       intel_gmch_remove();
-}
-
-static int i915_gmch_probe(struct i915_ggtt *ggtt)
-{
-       struct drm_i915_private *i915 = ggtt->vm.i915;
-       phys_addr_t gmadr_base;
-       int ret;
-
-       ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
-       if (!ret) {
-               drm_err(&i915->drm, "failed to set up gmch\n");
-               return -EIO;
-       }
-
-       intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
-
-       ggtt->gmadr =
-               (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
-
-       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
-       if (needs_idle_maps(i915)) {
-               drm_notice(&i915->drm,
-                          "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
-               ggtt->do_idle_maps = true;
-       }
-
-       ggtt->vm.insert_page = i915_ggtt_insert_page;
-       ggtt->vm.insert_entries = i915_ggtt_insert_entries;
-       ggtt->vm.clear_range = i915_ggtt_clear_range;
-       ggtt->vm.cleanup = i915_gmch_remove;
-
-       ggtt->invalidate = gmch_ggtt_invalidate;
-
-       ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
-       ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
-
-       if (unlikely(ggtt->do_idle_maps))
-               drm_notice(&i915->drm,
-                          "Applying Ironlake quirks for intel_iommu\n");
-
-       return 0;
-}
-
 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
@@ -1206,11 +576,11 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
        dma_resv_init(&ggtt->vm._resv);
 
        if (GRAPHICS_VER(i915) <= 5)
-               ret = i915_gmch_probe(ggtt);
+               ret = intel_gt_gmch_gen5_probe(ggtt);
        else if (GRAPHICS_VER(i915) < 8)
-               ret = gen6_gmch_probe(ggtt);
+               ret = intel_gt_gmch_gen6_probe(ggtt);
        else
-               ret = gen8_gmch_probe(ggtt);
+               ret = intel_gt_gmch_gen8_probe(ggtt);
        if (ret) {
                dma_resv_fini(&ggtt->vm._resv);
                return ret;
@@ -1264,10 +634,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 
 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
 {
-       if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
-               return -EIO;
-
-       return 0;
+       return intel_gt_gmch_gen5_enable_hw(i915);
 }
 
 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
@@ -1307,16 +674,12 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 {
        struct i915_vma *vma;
        bool write_domain_objs = false;
-       int open;
 
        drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
 
        /* First fill our portion of the GTT with scratch pages */
        vm->clear_range(vm, 0, vm->total);
 
-       /* Skip rewriting PTE on VMA unbind. */
-       open = atomic_xchg(&vm->open, 0);
-
        /* clflush objects bound into the GGTT and rebind them. */
        list_for_each_entry(vma, &vm->bound_list, vm_link) {
                struct drm_i915_gem_object *obj = vma->obj;
@@ -1333,8 +696,6 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
                }
        }
 
-       atomic_set(&vm->open, open);
-
        return write_domain_objs;
 }
 
index d112ffd..4243be0 100644 (file)
 #define MI_LOAD_REGISTER_IMM(x)        MI_INSTR(0x22, 2*(x)-1)
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_LRM_CS_MMIO           REG_BIT(19)
+#define   MI_LRI_MMIO_REMAP_EN         REG_BIT(17)
 #define   MI_LRI_FORCE_POSTED          (1<<12)
 #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
index 8a2483c..f0014c5 100644 (file)
@@ -4,7 +4,6 @@
  */
 
 #include <drm/drm_managed.h>
-#include <drm/intel-gtt.h>
 
 #include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
@@ -17,6 +16,7 @@
 #include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_debugfs.h"
+#include "intel_gt_gmch.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
 #include "intel_gt_requests.h"
 #include "intel_rc6.h"
 #include "intel_renderstate.h"
 #include "intel_rps.h"
+#include "intel_gt_sysfs.h"
 #include "intel_uncore.h"
 #include "shmem_utils.h"
 
-void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+static void __intel_gt_init_early(struct intel_gt *gt)
 {
        spin_lock_init(&gt->irq_lock);
 
@@ -51,17 +52,23 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
        intel_rps_init_early(&gt->rps);
 }
 
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+/* Preliminary initialization of Tile 0 */
+void intel_root_gt_init_early(struct drm_i915_private *i915)
 {
+       struct intel_gt *gt = to_gt(i915);
+
        gt->i915 = i915;
        gt->uncore = &i915->uncore;
+
+       __intel_gt_init_early(gt);
 }
 
-int intel_gt_probe_lmem(struct intel_gt *gt)
+static int intel_gt_probe_lmem(struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
+       unsigned int instance = gt->info.id;
+       int id = INTEL_REGION_LMEM_0 + instance;
        struct intel_memory_region *mem;
-       int id;
        int err;
 
        mem = intel_gt_setup_lmem(gt);
@@ -76,9 +83,8 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
                return err;
        }
 
-       id = INTEL_REGION_LMEM;
-
        mem->id = id;
+       mem->instance = instance;
 
        intel_memory_region_set_name(mem, "local%u", mem->instance);
 
@@ -96,6 +102,12 @@ int intel_gt_assign_ggtt(struct intel_gt *gt)
        return gt->ggtt ? 0 : -ENOMEM;
 }
 
+static const char * const intel_steering_types[] = {
+       "L3BANK",
+       "MSLICE",
+       "LNCF",
+};
+
 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
        { 0x00B100, 0x00B3FF },
        {},
@@ -439,7 +451,7 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
 {
        wmb();
        if (GRAPHICS_VER(gt->i915) < 6)
-               intel_gtt_chipset_flush();
+               intel_gt_gmch_gen5_chipset_flush(gt);
 }
 
 void intel_gt_driver_register(struct intel_gt *gt)
@@ -447,6 +459,7 @@ void intel_gt_driver_register(struct intel_gt *gt)
        intel_rps_driver_register(&gt->rps);
 
        intel_gt_debugfs_register(gt);
+       intel_gt_sysfs_register(gt);
 }
 
 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -712,6 +725,11 @@ int intel_gt_init(struct intel_gt *gt)
        if (err)
                goto err_uc_init;
 
+       err = intel_gt_init_hwconfig(gt);
+       if (err)
+               drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
+                       ERR_PTR(err));
+
        err = __engines_record_defaults(gt);
        if (err)
                goto err_gt;
@@ -793,18 +811,24 @@ void intel_gt_driver_release(struct intel_gt *gt)
        intel_gt_pm_fini(gt);
        intel_gt_fini_scratch(gt);
        intel_gt_fini_buffer_pool(gt);
+       intel_gt_fini_hwconfig(gt);
 }
 
-void intel_gt_driver_late_release(struct intel_gt *gt)
+void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
 {
+       struct intel_gt *gt;
+       unsigned int id;
+
        /* We need to wait for inflight RCU frees to release their grip */
        rcu_barrier();
 
-       intel_uc_driver_late_release(&gt->uc);
-       intel_gt_fini_requests(gt);
-       intel_gt_fini_reset(gt);
-       intel_gt_fini_timelines(gt);
-       intel_engines_free(gt);
+       for_each_gt(gt, i915, id) {
+               intel_uc_driver_late_release(&gt->uc);
+               intel_gt_fini_requests(gt);
+               intel_gt_fini_reset(gt);
+               intel_gt_fini_timelines(gt);
+               intel_engines_free(gt);
+       }
 }
 
 /**
@@ -913,6 +937,35 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
        return intel_uncore_read_fw(gt->uncore, reg);
 }
 
+/**
+ * intel_gt_get_valid_steering_for_reg - get a valid steering for a register
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @sliceid: return variable for slice steering
+ * @subsliceid: return variable for subslice steering
+ *
+ * This function returns a slice/subslice pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ */
+void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
+                                        u8 *sliceid, u8 *subsliceid)
+{
+       int type;
+
+       for (type = 0; type < NUM_STEERING_TYPES; type++) {
+               if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
+                       intel_gt_get_valid_steering(gt, type, sliceid,
+                                                   subsliceid);
+                       return;
+               }
+       }
+
+       *sliceid = gt->default_steering.groupid;
+       *subsliceid = gt->default_steering.instanceid;
+}
+
 u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
 {
        int type;
@@ -932,6 +985,145 @@ u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
        return intel_uncore_read(gt->uncore, reg);
 }
 
+static void report_steering_type(struct drm_printer *p,
+                                struct intel_gt *gt,
+                                enum intel_steering_type type,
+                                bool dump_table)
+{
+       const struct intel_mmio_range *entry;
+       u8 slice, subslice;
+
+       BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
+
+       if (!gt->steering_table[type]) {
+               drm_printf(p, "%s steering: uses default steering\n",
+                          intel_steering_types[type]);
+               return;
+       }
+
+       intel_gt_get_valid_steering(gt, type, &slice, &subslice);
+       drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n",
+                  intel_steering_types[type], slice, subslice);
+
+       if (!dump_table)
+               return;
+
+       for (entry = gt->steering_table[type]; entry->end; entry++)
+               drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
+}
+
+void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
+                             bool dump_table)
+{
+       drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n",
+                  gt->default_steering.groupid,
+                  gt->default_steering.instanceid);
+
+       if (HAS_MSLICES(gt->i915)) {
+               report_steering_type(p, gt, MSLICE, dump_table);
+               report_steering_type(p, gt, LNCF, dump_table);
+       }
+}
+
+static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
+{
+       int ret;
+
+       if (!gt_is_root(gt)) {
+               struct intel_uncore_mmio_debug *mmio_debug;
+               struct intel_uncore *uncore;
+
+               uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
+               if (!uncore)
+                       return -ENOMEM;
+
+               mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
+               if (!mmio_debug) {
+                       kfree(uncore);
+                       return -ENOMEM;
+               }
+
+               gt->uncore = uncore;
+               gt->uncore->debug = mmio_debug;
+
+               __intel_gt_init_early(gt);
+       }
+
+       intel_uncore_init_early(gt->uncore, gt);
+
+       ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
+       if (ret)
+               return ret;
+
+       gt->phys_addr = phys_addr;
+
+       return 0;
+}
+
+static void
+intel_gt_tile_cleanup(struct intel_gt *gt)
+{
+       intel_uncore_cleanup_mmio(gt->uncore);
+
+       if (!gt_is_root(gt)) {
+               kfree(gt->uncore->debug);
+               kfree(gt->uncore);
+               kfree(gt);
+       }
+}
+
+int intel_gt_probe_all(struct drm_i915_private *i915)
+{
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       struct intel_gt *gt = &i915->gt0;
+       phys_addr_t phys_addr;
+       unsigned int mmio_bar;
+       int ret;
+
+       mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
+       phys_addr = pci_resource_start(pdev, mmio_bar);
+
+       /*
+        * We always have at least one primary GT on any device
+        * and it has been already initialized early during probe
+        * in i915_driver_probe()
+        */
+       ret = intel_gt_tile_setup(gt, phys_addr);
+       if (ret)
+               return ret;
+
+       i915->gt[0] = gt;
+
+       /* TODO: add more tiles */
+       return 0;
+}
+
+int intel_gt_tiles_init(struct drm_i915_private *i915)
+{
+       struct intel_gt *gt;
+       unsigned int id;
+       int ret;
+
+       for_each_gt(gt, i915, id) {
+               ret = intel_gt_probe_lmem(gt);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+void intel_gt_release_all(struct drm_i915_private *i915)
+{
+       struct intel_gt *gt;
+       unsigned int id;
+
+       for_each_gt(gt, i915, id) {
+               intel_gt_tile_cleanup(gt);
+               i915->gt[id] = NULL;
+       }
+}
+
 void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p)
 {
index 0f571c8..0163bba 100644 (file)
 struct drm_i915_private;
 struct drm_printer;
 
+struct insert_entries {
+       struct i915_address_space *vm;
+       struct i915_vma_resource *vma_res;
+       enum i915_cache_level level;
+       u32 flags;
+};
+
 #define GT_TRACE(gt, fmt, ...) do {                                    \
        const struct intel_gt *gt__ __maybe_unused = (gt);              \
        GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),             \
                  ##__VA_ARGS__);                                       \
 } while (0)
 
+static inline bool gt_is_root(struct intel_gt *gt)
+{
+       return !gt->info.id;
+}
+
 static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
 {
        return container_of(uc, struct intel_gt, uc);
@@ -34,10 +46,8 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
        return container_of(huc, struct intel_gt, uc.huc);
 }
 
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
+void intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
-int intel_gt_probe_lmem(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
 int __must_check intel_gt_init_hw(struct intel_gt *gt);
 int intel_gt_init(struct intel_gt *gt);
@@ -47,7 +57,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt);
 void intel_gt_driver_remove(struct intel_gt *gt);
 void intel_gt_driver_release(struct intel_gt *gt);
 
-void intel_gt_driver_late_release(struct intel_gt *gt);
+void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
 
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
 
@@ -84,9 +94,25 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt,
        return gt->steering_table[type];
 }
 
+void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
+                                        u8 *sliceid, u8 *subsliceid);
+
 u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
 u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
 
+void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
+                             bool dump_table);
+
+int intel_gt_probe_all(struct drm_i915_private *i915);
+int intel_gt_tiles_init(struct drm_i915_private *i915);
+void intel_gt_release_all(struct drm_i915_private *i915);
+
+#define for_each_gt(gt__, i915__, id__) \
+       for ((id__) = 0; \
+            (id__) < I915_MAX_GT; \
+            (id__)++) \
+               for_each_if(((gt__) = (i915__)->gt[(id__)]))
+
 void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p);
 
@@ -94,4 +120,6 @@ void intel_gt_watchdog_work(struct work_struct *work);
 
 void intel_gt_invalidate_tlbs(struct intel_gt *gt);
 
+struct resource intel_pci_resource(struct pci_dev *pdev, int bar);
+
 #endif /* __INTEL_GT_H__ */
index 0db822c..d5d1b04 100644 (file)
@@ -161,6 +161,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
        if (gt->clock_frequency)
                gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
 
+       /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
+       if (GRAPHICS_VER(gt->i915) == 11)
+               gt->clock_period_ns = NSEC_PER_SEC / 13750000;
+
        GT_TRACE(gt,
                 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
                 gt->clock_frequency / 1000,
index f103664..d886fdc 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/debugfs.h>
 
 #include "i915_drv.h"
+#include "intel_gt.h"
 #include "intel_gt_debugfs.h"
 #include "intel_gt_engines_debugfs.h"
 #include "intel_gt_pm_debugfs.h"
@@ -29,7 +30,7 @@ int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
        }
 }
 
-int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
+void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
 {
        /* Flush any previous reset before applying for a new one */
        wait_event(gt->reset.queue,
@@ -37,7 +38,6 @@ int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
 
        intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
                              "Manually reset engine mask to %llx", val);
-       return 0;
 }
 
 /*
@@ -51,16 +51,30 @@ static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
 
 static int __intel_gt_debugfs_reset_store(void *data, u64 val)
 {
-       return intel_gt_debugfs_reset_store(data, val);
+       intel_gt_debugfs_reset_store(data, val);
+
+       return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
                        __intel_gt_debugfs_reset_store, "%llu\n");
 
+static int steering_show(struct seq_file *m, void *data)
+{
+       struct drm_printer p = drm_seq_file_printer(m);
+       struct intel_gt *gt = m->private;
+
+       intel_gt_report_steering(&p, gt, true);
+
+       return 0;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(steering);
+
 static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
 {
        static const struct intel_gt_debugfs_file files[] = {
                { "reset", &reset_fops, NULL },
+               { "steering", &steering_fops },
        };
 
        intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
index 17e79b7..e4110ee 100644 (file)
@@ -48,6 +48,6 @@ void intel_gt_debugfs_register_files(struct dentry *root,
 
 /* functions that need to be accessed by the upper level non-gt interfaces */
 int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
-int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
+void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
 
 #endif /* INTEL_GT_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
new file mode 100644 (file)
index 0000000..64e9ad4
--- /dev/null
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#include <drm/intel-gtt.h>
+#include <drm/i915_drm.h>
+
+#include <linux/agp_backend.h>
+#include <linux/stop_machine.h>
+
+#include "i915_drv.h"
+#include "intel_gt_gmch.h"
+#include "intel_gt_regs.h"
+#include "intel_gt.h"
+#include "i915_utils.h"
+
+#include "gen8_ppgtt.h"
+
+struct insert_page {
+       struct i915_address_space *vm;
+       dma_addr_t addr;
+       u64 offset;
+       enum i915_cache_level level;
+};
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+       writeq(pte, addr);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+                           u64 start, u64 length)
+{
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+                         enum i915_cache_level level,
+                         u32 flags)
+{
+       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+       switch (level) {
+       case I915_CACHE_L3_LLC:
+       case I915_CACHE_LLC:
+               pte |= GEN6_PTE_CACHE_LLC;
+               break;
+       case I915_CACHE_NONE:
+               pte |= GEN6_PTE_UNCACHED;
+               break;
+       default:
+               MISSING_CASE(level);
+       }
+
+       return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+                         enum i915_cache_level level,
+                         u32 flags)
+{
+       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+       switch (level) {
+       case I915_CACHE_L3_LLC:
+               pte |= GEN7_PTE_CACHE_L3_LLC;
+               break;
+       case I915_CACHE_LLC:
+               pte |= GEN6_PTE_CACHE_LLC;
+               break;
+       case I915_CACHE_NONE:
+               pte |= GEN6_PTE_UNCACHED;
+               break;
+       default:
+               MISSING_CASE(level);
+       }
+
+       return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+                         enum i915_cache_level level,
+                         u32 flags)
+{
+       gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+       if (!(flags & PTE_READ_ONLY))
+               pte |= BYT_PTE_WRITEABLE;
+
+       if (level != I915_CACHE_NONE)
+               pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+       return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+                         enum i915_cache_level level,
+                         u32 flags)
+{
+       gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+       if (level != I915_CACHE_NONE)
+               pte |= HSW_WB_LLC_AGE3;
+
+       return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+                          enum i915_cache_level level,
+                          u32 flags)
+{
+       gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+       switch (level) {
+       case I915_CACHE_NONE:
+               break;
+       case I915_CACHE_WT:
+               pte |= HSW_WT_ELLC_LLC_AGE3;
+               break;
+       default:
+               pte |= HSW_WB_ELLC_LLC_AGE3;
+               break;
+       }
+
+       return pte;
+}
+
+static void gen5_ggtt_insert_page(struct i915_address_space *vm,
+                                 dma_addr_t addr,
+                                 u64 offset,
+                                 enum i915_cache_level cache_level,
+                                 u32 unused)
+{
+       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+               AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+       intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+                                 dma_addr_t addr,
+                                 u64 offset,
+                                 enum i915_cache_level level,
+                                 u32 flags)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       gen6_pte_t __iomem *pte =
+               (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+       iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+       ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+                                 dma_addr_t addr,
+                                 u64 offset,
+                                 enum i915_cache_level level,
+                                 u32 flags)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       gen8_pte_t __iomem *pte =
+               (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+       gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+
+       ggtt->invalidate(ggtt);
+}
+
+static void gen5_ggtt_insert_entries(struct i915_address_space *vm,
+                                    struct i915_vma_resource *vma_res,
+                                    enum i915_cache_level cache_level,
+                                    u32 unused)
+{
+       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+               AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+       intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
+                                   flags);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+                                    struct i915_vma_resource *vma_res,
+                                    enum i915_cache_level level,
+                                    u32 flags)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       gen6_pte_t __iomem *gte;
+       gen6_pte_t __iomem *end;
+       struct sgt_iter iter;
+       dma_addr_t addr;
+
+       gte = (gen6_pte_t __iomem *)ggtt->gsm;
+       gte += vma_res->start / I915_GTT_PAGE_SIZE;
+       end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+       for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+               iowrite32(vm->pte_encode(addr, level, flags), gte++);
+       GEM_BUG_ON(gte > end);
+
+       /* Fill the allocated but "unused" space beyond the end of the buffer */
+       while (gte < end)
+               iowrite32(vm->scratch[0]->encode, gte++);
+
+       /*
+        * We want to flush the TLBs only after we're certain all the PTE
+        * updates have finished.
+        */
+       ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+                                    struct i915_vma_resource *vma_res,
+                                    enum i915_cache_level level,
+                                    u32 flags)
+{
+       const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       gen8_pte_t __iomem *gte;
+       gen8_pte_t __iomem *end;
+       struct sgt_iter iter;
+       dma_addr_t addr;
+
+       /*
+        * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+        * not to allow the user to override access to a read only page.
+        */
+
+       gte = (gen8_pte_t __iomem *)ggtt->gsm;
+       gte += vma_res->start / I915_GTT_PAGE_SIZE;
+       end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+       for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+               gen8_set_pte(gte++, pte_encode | addr);
+       GEM_BUG_ON(gte > end);
+
+       /* Fill the allocated but "unused" space beyond the end of the buffer */
+       while (gte < end)
+               gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+       /*
+        * We want to flush the TLBs only after we're certain all the PTE
+        * updates have finished.
+        */
+       ggtt->invalidate(ggtt);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+       /*
+        * Make sure the internal GAM fifo has been cleared of all GTT
+        * writes before exiting stop_machine(). This guarantees that
+        * any aperture accesses waiting to start in another process
+        * cannot back up behind the GTT writes causing a hang.
+        * The register can be any arbitrary GAM register.
+        */
+       intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+       struct insert_page *arg = _arg;
+
+       gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+       bxt_vtd_ggtt_wa(arg->vm);
+
+       return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+                                         dma_addr_t addr,
+                                         u64 offset,
+                                         enum i915_cache_level level,
+                                         u32 unused)
+{
+       struct insert_page arg = { vm, addr, offset, level };
+
+       stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+       struct insert_entries *arg = _arg;
+
+       gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
+       bxt_vtd_ggtt_wa(arg->vm);
+
+       return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+                                            struct i915_vma_resource *vma_res,
+                                            enum i915_cache_level level,
+                                            u32 flags)
+{
+       struct insert_entries arg = { vm, vma_res, level, flags };
+
+       stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
+{
+       intel_gtt_chipset_flush();
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+       intel_gtt_chipset_flush();
+}
+
+static void gen5_ggtt_clear_range(struct i915_address_space *vm,
+                                        u64 start, u64 length)
+{
+       intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+                                 u64 start, u64 length)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+       unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+       gen6_pte_t scratch_pte, __iomem *gtt_base =
+               (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+       const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+       int i;
+
+       if (WARN(num_entries > max_entries,
+                "First entry = %d; Num entries = %d (max=%d)\n",
+                first_entry, num_entries, max_entries))
+               num_entries = max_entries;
+
+       scratch_pte = vm->scratch[0]->encode;
+       for (i = 0; i < num_entries; i++)
+               iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+                                 u64 start, u64 length)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+       unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+       const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+       gen8_pte_t __iomem *gtt_base =
+               (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+       const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+       int i;
+
+       if (WARN(num_entries > max_entries,
+                "First entry = %d; Num entries = %d (max=%d)\n",
+                first_entry, num_entries, max_entries))
+               num_entries = max_entries;
+
+       for (i = 0; i < num_entries; i++)
+               gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void gen5_gmch_remove(struct i915_address_space *vm)
+{
+       intel_gmch_remove();
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+       iounmap(ggtt->gsm);
+       free_scratch(vm);
+}
+
+/*
+ * Certain Gen5 chipsets require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+       /*
+        * Query intel_iommu to see if we need the workaround. Presumably that
+        * was loaded first.
+        */
+       if (!intel_vtd_active(i915))
+               return false;
+
+       if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
+               return true;
+
+       if (GRAPHICS_VER(i915) == 12)
+               return true; /* XXX DMAR fault reason 7 */
+
+       return false;
+}
+
+static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
+{
+       /*
+        * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
+        * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
+        */
+       GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
+       return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+       snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+       snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+       return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+       bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+       bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+       if (bdw_gmch_ctl)
+               bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+       /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+       if (bdw_gmch_ctl > 4)
+               bdw_gmch_ctl = 4;
+#endif
+
+       return bdw_gmch_ctl << 20;
+}
+
+static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
+{
+       return gen6_gttmmadr_size(i915) / 2;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+       struct drm_i915_private *i915 = ggtt->vm.i915;
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       phys_addr_t phys_addr;
+       u32 pte_flags;
+       int ret;
+
+       GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
+       phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
+
+       /*
+        * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+        * will be dropped. For WC mappings in general we have 64 byte burst
+        * writes when the WC buffer is flushed, so we can't use it, but have to
+        * resort to an uncached mapping. The WC issue is easily caught by the
+        * readback check when writing GTT PTE entries.
+        */
+       if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
+               ggtt->gsm = ioremap(phys_addr, size);
+       else
+               ggtt->gsm = ioremap_wc(phys_addr, size);
+       if (!ggtt->gsm) {
+               drm_err(&i915->drm, "Failed to map the ggtt page table\n");
+               return -ENOMEM;
+       }
+
+       kref_init(&ggtt->vm.resv_ref);
+       ret = setup_scratch_page(&ggtt->vm);
+       if (ret) {
+               drm_err(&i915->drm, "Scratch setup failed\n");
+               /* iounmap will also get called at remove, but meh */
+               iounmap(ggtt->gsm);
+               return ret;
+       }
+
+       pte_flags = 0;
+       if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
+               pte_flags |= PTE_LM;
+
+       ggtt->vm.scratch[0]->encode =
+               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
+                                   I915_CACHE_NONE, pte_flags);
+
+       return 0;
+}
+
+int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
+{
+       struct drm_i915_private *i915 = ggtt->vm.i915;
+       phys_addr_t gmadr_base;
+       int ret;
+
+       ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
+       if (!ret) {
+               drm_err(&i915->drm, "failed to set up gmch\n");
+               return -EIO;
+       }
+
+       intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+       ggtt->gmadr =
+               (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+       if (needs_idle_maps(i915)) {
+               drm_notice(&i915->drm,
+                          "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
+               ggtt->do_idle_maps = true;
+       }
+
+       ggtt->vm.insert_page = gen5_ggtt_insert_page;
+       ggtt->vm.insert_entries = gen5_ggtt_insert_entries;
+       ggtt->vm.clear_range = gen5_ggtt_clear_range;
+       ggtt->vm.cleanup = gen5_gmch_remove;
+
+       ggtt->invalidate = gmch_ggtt_invalidate;
+
+       ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+       ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+       if (unlikely(ggtt->do_idle_maps))
+               drm_notice(&i915->drm,
+                          "Applying Ironlake quirks for intel_iommu\n");
+
+       return 0;
+}
+
+int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
+{
+       struct drm_i915_private *i915 = ggtt->vm.i915;
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       unsigned int size;
+       u16 snb_gmch_ctl;
+
+       ggtt->gmadr = intel_pci_resource(pdev, 2);
+       ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+       /*
+        * 64/512MB is the current min/max we actually know of, but this is
+        * just a coarse sanity check.
+        */
+       if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+               drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+                       &ggtt->mappable_end);
+               return -ENXIO;
+       }
+
+       pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+       size = gen6_get_total_gtt_size(snb_gmch_ctl);
+       ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+       ggtt->vm.clear_range = nop_clear_range;
+       if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+               ggtt->vm.clear_range = gen6_ggtt_clear_range;
+       ggtt->vm.insert_page = gen6_ggtt_insert_page;
+       ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+       ggtt->vm.cleanup = gen6_gmch_remove;
+
+       ggtt->invalidate = gen6_ggtt_invalidate;
+
+       if (HAS_EDRAM(i915))
+               ggtt->vm.pte_encode = iris_pte_encode;
+       else if (IS_HASWELL(i915))
+               ggtt->vm.pte_encode = hsw_pte_encode;
+       else if (IS_VALLEYVIEW(i915))
+               ggtt->vm.pte_encode = byt_pte_encode;
+       else if (GRAPHICS_VER(i915) >= 7)
+               ggtt->vm.pte_encode = ivb_pte_encode;
+       else
+               ggtt->vm.pte_encode = snb_pte_encode;
+
+       ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+       ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+       return ggtt_probe_common(ggtt, size);
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+       gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+       gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+       if (gmch_ctrl)
+               return 1 << (20 + gmch_ctrl);
+
+       return 0;
+}
+
+int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
+{
+       struct drm_i915_private *i915 = ggtt->vm.i915;
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       unsigned int size;
+       u16 snb_gmch_ctl;
+
+       /* TODO: We're not aware of mappable constraints on gen8 yet */
+       if (!HAS_LMEM(i915)) {
+               ggtt->gmadr = intel_pci_resource(pdev, 2);
+               ggtt->mappable_end = resource_size(&ggtt->gmadr);
+       }
+
+       pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+       if (IS_CHERRYVIEW(i915))
+               size = chv_get_total_gtt_size(snb_gmch_ctl);
+       else
+               size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+       ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+       ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
+
+       ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+       ggtt->vm.cleanup = gen6_gmch_remove;
+       ggtt->vm.insert_page = gen8_ggtt_insert_page;
+       ggtt->vm.clear_range = nop_clear_range;
+       if (intel_scanout_needs_vtd_wa(i915))
+               ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+       ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+       /*
+        * Serialize GTT updates with aperture access on BXT if VT-d is on,
+        * and always on CHV.
+        */
+       if (intel_vm_no_concurrent_access_wa(i915)) {
+               ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+               ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+               ggtt->vm.bind_async_flags =
+                       I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+       }
+
+       ggtt->invalidate = gen8_ggtt_invalidate;
+
+       ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+       ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+       ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+
+       setup_private_pat(ggtt->vm.gt->uncore);
+
+       return ggtt_probe_common(ggtt, size);
+}
+
+int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
+{
+       if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
+               return -EIO;
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
new file mode 100644 (file)
index 0000000..75ed55c
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_GMCH_H__
+#define __INTEL_GT_GMCH_H__
+
+#include "intel_gtt.h"
+
+/* For x86 platforms */
+#if IS_ENABLED(CONFIG_X86)
+void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt);
+int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915);
+
+/* Stubs for non-x86 platforms */
+#else
+static inline void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
+{
+}
+static inline int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
+{
+       /* No HW should be probed for this case yet, return fail */
+       return -ENODEV;
+}
+static inline int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
+{
+       /* No HW should be probed for this case yet, return fail */
+       return -ENODEV;
+}
+static inline int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
+{
+       /* No HW should be probed for this case yet, return fail */
+       return -ENODEV;
+}
+static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
+{
+       /* No HW should be enabled for this case yet, return fail */
+       return -ENODEV;
+}
+#endif
+
+#endif /* __INTEL_GT_GMCH_H__ */
index c0fa41e..e66479d 100644 (file)
@@ -128,7 +128,14 @@ static const struct intel_wakeref_ops wf_ops = {
 
 void intel_gt_pm_init_early(struct intel_gt *gt)
 {
-       intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
+       /*
+        * We access the runtime_pm structure via gt->i915 here rather than
+        * gt->uncore as we do elsewhere in the file because gt->uncore is not
+        * yet initialized for all tiles at this point in the driver startup.
+        * runtime_pm is per-device rather than per-tile, so this is still the
+        * correct structure.
+        */
+       intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
        seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
 }
 
index 3776591..8592eb1 100644 (file)
 #include "intel_uncore.h"
 #include "vlv_sideband.h"
 
-int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
+void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
 {
        atomic_inc(&gt->user_wakeref);
        intel_gt_pm_get(gt);
        if (GRAPHICS_VER(gt->i915) >= 6)
                intel_uncore_forcewake_user_get(gt->uncore);
-
-       return 0;
 }
 
-int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
+void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
 {
        if (GRAPHICS_VER(gt->i915) >= 6)
                intel_uncore_forcewake_user_put(gt->uncore);
        intel_gt_pm_put(gt);
        atomic_dec(&gt->user_wakeref);
-
-       return 0;
 }
 
 static int forcewake_user_open(struct inode *inode, struct file *file)
 {
        struct intel_gt *gt = inode->i_private;
 
-       return intel_gt_pm_debugfs_forcewake_user_open(gt);
+       intel_gt_pm_debugfs_forcewake_user_open(gt);
+
+       return 0;
 }
 
 static int forcewake_user_release(struct inode *inode, struct file *file)
 {
        struct intel_gt *gt = inode->i_private;
 
-       return intel_gt_pm_debugfs_forcewake_user_release(gt);
+       intel_gt_pm_debugfs_forcewake_user_release(gt);
+
+       return 0;
 }
 
 static const struct file_operations forcewake_user_fops = {
@@ -341,17 +341,16 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
        } else if (GRAPHICS_VER(i915) >= 6) {
                u32 rp_state_limits;
                u32 gt_perf_status;
-               u32 rp_state_cap;
+               struct intel_rps_freq_caps caps;
                u32 rpmodectl, rpinclimit, rpdeclimit;
                u32 rpstat, cagf, reqf;
                u32 rpcurupei, rpcurup, rpprevup;
                u32 rpcurdownei, rpcurdown, rpprevdown;
                u32 rpupei, rpupt, rpdownei, rpdownt;
                u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
-               int max_freq;
 
                rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
-               rp_state_cap = intel_rps_read_state_cap(rps);
+               gen6_rps_get_freq_caps(rps, &caps);
                if (IS_GEN9_LP(i915))
                        gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
                else
@@ -474,25 +473,12 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
                drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
                           rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
 
-               max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
-                           rp_state_cap >> 16) & 0xff;
-               max_freq *= (IS_GEN9_BC(i915) ||
-                            GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
                drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, max_freq));
-
-               max_freq = (rp_state_cap & 0xff00) >> 8;
-               max_freq *= (IS_GEN9_BC(i915) ||
-                            GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
+                          intel_gpu_freq(rps, caps.min_freq));
                drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, max_freq));
-
-               max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
-                           rp_state_cap >> 0) & 0xff;
-               max_freq *= (IS_GEN9_BC(i915) ||
-                            GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
+                          intel_gpu_freq(rps, caps.rp1_freq));
                drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
-                          intel_gpu_freq(rps, max_freq));
+                          intel_gpu_freq(rps, caps.rp0_freq));
                drm_printf(p, "Max overclocked frequency: %dMHz\n",
                           intel_gpu_freq(rps, rps->max_freq));
 
index a845788..0ace8c2 100644 (file)
@@ -14,7 +14,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
 void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
 
 /* functions that need to be accessed by the upper level non-gt interfaces */
-int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
-int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
+void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
+void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
 
 #endif /* INTEL_GT_PM_DEBUGFS_H */
index 19cd34f..17432b0 100644 (file)
@@ -46,6 +46,7 @@
 #define   GEN8_MCR_SLICE_MASK                  GEN8_MCR_SLICE(3)
 #define   GEN8_MCR_SUBSLICE(subslice)          (((subslice) & 3) << 24)
 #define   GEN8_MCR_SUBSLICE_MASK               GEN8_MCR_SUBSLICE(3)
+#define   GEN11_MCR_MULTICAST                  REG_BIT(31)
 #define   GEN11_MCR_SLICE(slice)               (((slice) & 0xf) << 27)
 #define   GEN11_MCR_SLICE_MASK                 GEN11_MCR_SLICE(0xf)
 #define   GEN11_MCR_SUBSLICE(subslice)         (((subslice) & 0x7) << 24)
 #define EU_PERF_CNTL3                          _MMIO(0xe758)
 
 #define LSC_CHICKEN_BIT_0                      _MMIO(0xe7c8)
+#define   DISABLE_D8_D16_COASLESCE             REG_BIT(30)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT     REG_BIT(15)
 #define LSC_CHICKEN_BIT_0_UDW                  _MMIO(0xe7c8 + 4)
 #define   DIS_CHAIN_2XSIMD8                    REG_BIT(55 - 32)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
new file mode 100644 (file)
index 0000000..8ec8bc6
--- /dev/null
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <linux/device.h>
+#include <linux/kobject.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "i915_sysfs.h"
+#include "intel_gt.h"
+#include "intel_gt_sysfs.h"
+#include "intel_gt_sysfs_pm.h"
+#include "intel_gt_types.h"
+#include "intel_rc6.h"
+
+bool is_object_gt(struct kobject *kobj)
+{
+       return !strncmp(kobj->name, "gt", 2);
+}
+
+static struct intel_gt *kobj_to_gt(struct kobject *kobj)
+{
+       return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+                                           const char *name)
+{
+       struct kobject *kobj = &dev->kobj;
+
+       /*
+        * We are interested at knowing from where the interface
+        * has been called, whether it's called from gt/ or from
+        * the parent directory.
+        * From the interface position it depends also the value of
+        * the private data.
+        * If the interface is called from gt/ then private data is
+        * of the "struct intel_gt *" type, otherwise it's * a
+        * "struct drm_i915_private *" type.
+        */
+       if (!is_object_gt(kobj)) {
+               struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+               return to_gt(i915);
+       }
+
+       return kobj_to_gt(kobj);
+}
+
+static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
+{
+       return &gt->i915->drm.primary->kdev->kobj;
+}
+
+static ssize_t id_show(struct device *dev,
+                      struct device_attribute *attr,
+                      char *buf)
+{
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+
+       return sysfs_emit(buf, "%u\n", gt->info.id);
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *id_attrs[] = {
+       &dev_attr_id.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(id);
+
+static void kobj_gt_release(struct kobject *kobj)
+{
+       kfree(kobj);
+}
+
+static struct kobj_type kobj_gt_type = {
+       .release = kobj_gt_release,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .default_groups = id_groups,
+};
+
+void intel_gt_sysfs_register(struct intel_gt *gt)
+{
+       struct kobj_gt *kg;
+
+       /*
+        * We need to make things right with the
+        * ABI compatibility. The files were originally
+        * generated under the parent directory.
+        *
+        * We generate the files only for gt 0
+        * to avoid duplicates.
+        */
+       if (gt_is_root(gt))
+               intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
+
+       kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+       if (!kg)
+               goto exit_fail;
+
+       kobject_init(&kg->base, &kobj_gt_type);
+       kg->gt = gt;
+
+       /* xfer ownership to sysfs tree */
+       if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
+               goto exit_kobj_put;
+
+       intel_gt_sysfs_pm_init(gt, &kg->base);
+
+       return;
+
+exit_kobj_put:
+       kobject_put(&kg->base);
+
+exit_fail:
+       drm_warn(&gt->i915->drm,
+                "failed to initialize gt%d sysfs root\n", gt->info.id);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
new file mode 100644 (file)
index 0000000..9471b26
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#ifndef __SYSFS_GT_H__
+#define __SYSFS_GT_H__
+
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+
+struct intel_gt;
+
+struct kobj_gt {
+       struct kobject base;
+       struct intel_gt *gt;
+};
+
+bool is_object_gt(struct kobject *kobj);
+
+struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
+
+struct kobject *
+intel_gt_create_kobj(struct intel_gt *gt,
+                    struct kobject *dir,
+                    const char *name);
+
+void intel_gt_sysfs_register(struct intel_gt *gt);
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+                                           const char *name);
+
+#endif /* SYSFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
new file mode 100644 (file)
index 0000000..26cbfa6
--- /dev/null
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <linux/sysfs.h>
+#include <linux/printk.h>
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_sysfs.h"
+#include "intel_gt.h"
+#include "intel_gt_regs.h"
+#include "intel_gt_sysfs.h"
+#include "intel_gt_sysfs_pm.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+
+#ifdef CONFIG_PM
+enum intel_gt_sysfs_op {
+       INTEL_GT_SYSFS_MIN = 0,
+       INTEL_GT_SYSFS_MAX,
+};
+
+static int
+sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
+                         int (func)(struct intel_gt *gt, u32 val), u32 val)
+{
+       struct intel_gt *gt;
+       int ret;
+
+       if (!is_object_gt(&dev->kobj)) {
+               int i;
+               struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+               for_each_gt(gt, i915, i) {
+                       ret = func(gt, val);
+                       if (ret)
+                               break;
+               }
+       } else {
+               gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+               ret = func(gt, val);
+       }
+
+       return ret;
+}
+
+static u32
+sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
+                         u32 (func)(struct intel_gt *gt),
+                         enum intel_gt_sysfs_op op)
+{
+       struct intel_gt *gt;
+       u32 ret;
+
+       ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
+
+       if (!is_object_gt(&dev->kobj)) {
+               int i;
+               struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+               for_each_gt(gt, i915, i) {
+                       u32 val = func(gt);
+
+                       switch (op) {
+                       case INTEL_GT_SYSFS_MIN:
+                               if (val < ret)
+                                       ret = val;
+                               break;
+
+                       case INTEL_GT_SYSFS_MAX:
+                               if (val > ret)
+                                       ret = val;
+                               break;
+                       }
+               }
+       } else {
+               gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+               ret = func(gt);
+       }
+
+       return ret;
+}
+
+/* RC6 interfaces will show the minimum RC6 residency value */
+#define sysfs_gt_attribute_r_min_func(d, a, f) \
+               sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MIN)
+
+/* Frequency interfaces will show the maximum frequency value */
+#define sysfs_gt_attribute_r_max_func(d, a, f) \
+               sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
+
+static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+{
+       intel_wakeref_t wakeref;
+       u64 res = 0;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               res = intel_rc6_residency_us(&gt->rc6, reg);
+
+       return DIV_ROUND_CLOSEST_ULL(res, 1000);
+}
+
+static ssize_t rc6_enable_show(struct device *dev,
+                              struct device_attribute *attr,
+                              char *buff)
+{
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       u8 mask = 0;
+
+       if (HAS_RC6(gt->i915))
+               mask |= BIT(0);
+       if (HAS_RC6p(gt->i915))
+               mask |= BIT(1);
+       if (HAS_RC6pp(gt->i915))
+               mask |= BIT(2);
+
+       return sysfs_emit(buff, "%x\n", mask);
+}
+
+static u32 __rc6_residency_ms_show(struct intel_gt *gt)
+{
+       return get_residency(gt, GEN6_GT_GFX_RC6);
+}
+
+static ssize_t rc6_residency_ms_show(struct device *dev,
+                                    struct device_attribute *attr,
+                                    char *buff)
+{
+       u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+                                                     __rc6_residency_ms_show);
+
+       return sysfs_emit(buff, "%u\n", rc6_residency);
+}
+
+static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
+{
+       return get_residency(gt, GEN6_GT_GFX_RC6p);
+}
+
+static ssize_t rc6p_residency_ms_show(struct device *dev,
+                                     struct device_attribute *attr,
+                                     char *buff)
+{
+       u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+                                               __rc6p_residency_ms_show);
+
+       return sysfs_emit(buff, "%u\n", rc6p_residency);
+}
+
+static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
+{
+       return get_residency(gt, GEN6_GT_GFX_RC6pp);
+}
+
+static ssize_t rc6pp_residency_ms_show(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buff)
+{
+       u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+                                               __rc6pp_residency_ms_show);
+
+       return sysfs_emit(buff, "%u\n", rc6pp_residency);
+}
+
+static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
+{
+       return get_residency(gt, VLV_GT_MEDIA_RC6);
+}
+
+static ssize_t media_rc6_residency_ms_show(struct device *dev,
+                                          struct device_attribute *attr,
+                                          char *buff)
+{
+       u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+                                               __media_rc6_residency_ms_show);
+
+       return sysfs_emit(buff, "%u\n", rc6_residency);
+}
+
+static DEVICE_ATTR_RO(rc6_enable);
+static DEVICE_ATTR_RO(rc6_residency_ms);
+static DEVICE_ATTR_RO(rc6p_residency_ms);
+static DEVICE_ATTR_RO(rc6pp_residency_ms);
+static DEVICE_ATTR_RO(media_rc6_residency_ms);
+
+static struct attribute *rc6_attrs[] = {
+       &dev_attr_rc6_enable.attr,
+       &dev_attr_rc6_residency_ms.attr,
+       NULL
+};
+
+static struct attribute *rc6p_attrs[] = {
+       &dev_attr_rc6p_residency_ms.attr,
+       &dev_attr_rc6pp_residency_ms.attr,
+       NULL
+};
+
+static struct attribute *media_rc6_attrs[] = {
+       &dev_attr_media_rc6_residency_ms.attr,
+       NULL
+};
+
+static const struct attribute_group rc6_attr_group[] = {
+       { .attrs = rc6_attrs, },
+       { .name = power_group_name, .attrs = rc6_attrs, },
+};
+
+static const struct attribute_group rc6p_attr_group[] = {
+       { .attrs = rc6p_attrs, },
+       { .name = power_group_name, .attrs = rc6p_attrs, },
+};
+
+static const struct attribute_group media_rc6_attr_group[] = {
+       { .attrs = media_rc6_attrs, },
+       { .name = power_group_name, .attrs = media_rc6_attrs, },
+};
+
+static int __intel_gt_sysfs_create_group(struct kobject *kobj,
+                                        const struct attribute_group *grp)
+{
+       return is_object_gt(kobj) ?
+              sysfs_create_group(kobj, &grp[0]) :
+              sysfs_merge_group(kobj, &grp[1]);
+}
+
+static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj)
+{
+       int ret;
+
+       if (!HAS_RC6(gt->i915))
+               return;
+
+       ret = __intel_gt_sysfs_create_group(kobj, rc6_attr_group);
+       if (ret)
+               drm_warn(&gt->i915->drm,
+                        "failed to create gt%u RC6 sysfs files (%pe)\n",
+                        gt->info.id, ERR_PTR(ret));
+
+       /*
+        * cannot use the is_visible() attribute because
+        * the upper object inherits from the parent group.
+        */
+       if (HAS_RC6p(gt->i915)) {
+               ret = __intel_gt_sysfs_create_group(kobj, rc6p_attr_group);
+               if (ret)
+                       drm_warn(&gt->i915->drm,
+                                "failed to create gt%u RC6p sysfs files (%pe)\n",
+                                gt->info.id, ERR_PTR(ret));
+       }
+
+       if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) {
+               ret = __intel_gt_sysfs_create_group(kobj, media_rc6_attr_group);
+               if (ret)
+                       drm_warn(&gt->i915->drm,
+                                "failed to create media %u RC6 sysfs files (%pe)\n",
+                                gt->info.id, ERR_PTR(ret));
+       }
+}
+#else
+static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj)
+{
+}
+#endif /* CONFIG_PM */
+
+static u32 __act_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_read_actual_frequency(&gt->rps);
+}
+
+static ssize_t act_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                   __act_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", actual_freq);
+}
+
+static u32 __cur_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_requested_frequency(&gt->rps);
+}
+
+static ssize_t cur_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                __cur_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", cur_freq);
+}
+
+static u32 __boost_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_boost_frequency(&gt->rps);
+}
+
+static ssize_t boost_freq_mhz_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buff)
+{
+       u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                  __boost_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", boost_freq);
+}
+
+static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
+{
+       return intel_rps_set_boost_frequency(&gt->rps, val);
+}
+
+static ssize_t boost_freq_mhz_store(struct device *dev,
+                                   struct device_attribute *attr,
+                                   const char *buff, size_t count)
+{
+       ssize_t ret;
+       u32 val;
+
+       ret = kstrtou32(buff, 0, &val);
+       if (ret)
+               return ret;
+
+       return sysfs_gt_attribute_w_func(dev, attr,
+                                        __boost_freq_mhz_store, val) ?: count;
+}
+
+static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_rp0_frequency(&gt->rps);
+}
+
+static ssize_t RP0_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                    __rp0_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", rp0_freq);
+}
+
+static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_rp1_frequency(&gt->rps);
+}
+
+static ssize_t RP1_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                    __rp1_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", rp1_freq);
+}
+
+static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_rpn_frequency(&gt->rps);
+}
+
+static ssize_t RPn_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                    __rpn_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", rpn_freq);
+}
+
+static u32 __max_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_max_frequency(&gt->rps);
+}
+
+static ssize_t max_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                    __max_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", max_freq);
+}
+
+static int __set_max_freq(struct intel_gt *gt, u32 val)
+{
+       return intel_rps_set_max_frequency(&gt->rps, val);
+}
+
+static ssize_t max_freq_mhz_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buff, size_t count)
+{
+       int ret;
+       u32 val;
+
+       ret = kstrtou32(buff, 0, &val);
+       if (ret)
+               return ret;
+
+       ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
+
+       return ret ?: count;
+}
+
+static u32 __min_freq_mhz_show(struct intel_gt *gt)
+{
+       return intel_rps_get_min_frequency(&gt->rps);
+}
+
+static ssize_t min_freq_mhz_show(struct device *dev,
+                                struct device_attribute *attr, char *buff)
+{
+       u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
+                                                    __min_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", min_freq);
+}
+
+static int __set_min_freq(struct intel_gt *gt, u32 val)
+{
+       return intel_rps_set_min_frequency(&gt->rps, val);
+}
+
+static ssize_t min_freq_mhz_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buff, size_t count)
+{
+       int ret;
+       u32 val;
+
+       ret = kstrtou32(buff, 0, &val);
+       if (ret)
+               return ret;
+
+       ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
+
+       return ret ?: count;
+}
+
+static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
+{
+       struct intel_rps *rps = &gt->rps;
+
+       return intel_gpu_freq(rps, rps->efficient_freq);
+}
+
+static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
+                                    struct device_attribute *attr, char *buff)
+{
+       u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+                                                __vlv_rpe_freq_mhz_show);
+
+       return sysfs_emit(buff, "%u\n", rpe_freq);
+}
+
+#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
+       struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
+       struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
+
+#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name)                              \
+               INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
+#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name)                              \
+               INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
+
+static INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(cur_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(boost_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RP0_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
+
+static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
+
+#define GEN6_ATTR(s) { \
+               &dev_attr_##s##_act_freq_mhz.attr, \
+               &dev_attr_##s##_cur_freq_mhz.attr, \
+               &dev_attr_##s##_boost_freq_mhz.attr, \
+               &dev_attr_##s##_max_freq_mhz.attr, \
+               &dev_attr_##s##_min_freq_mhz.attr, \
+               &dev_attr_##s##_RP0_freq_mhz.attr, \
+               &dev_attr_##s##_RP1_freq_mhz.attr, \
+               &dev_attr_##s##_RPn_freq_mhz.attr, \
+               NULL, \
+       }
+
+#define GEN6_RPS_ATTR GEN6_ATTR(rps)
+#define GEN6_GT_ATTR  GEN6_ATTR(gt)
+
+static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
+static const struct attribute * const gen6_gt_attrs[]  = GEN6_GT_ATTR;
+
+static ssize_t punit_req_freq_mhz_show(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buff)
+{
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       u32 preq = intel_rps_read_punit_req_frequency(&gt->rps);
+
+       return sysfs_emit(buff, "%u\n", preq);
+}
+
+struct intel_gt_bool_throttle_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+                       char *buf);
+       i915_reg_t reg32;
+       u32 mask;
+};
+
+static ssize_t throttle_reason_bool_show(struct device *dev,
+                                        struct device_attribute *attr,
+                                        char *buff)
+{
+       struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+       struct intel_gt_bool_throttle_attr *t_attr =
+                               (struct intel_gt_bool_throttle_attr *) attr;
+       bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32, t_attr->mask);
+
+       return sysfs_emit(buff, "%u\n", val);
+}
+
+#define INTEL_GT_RPS_BOOL_ATTR_RO(sysfs_func__, mask__) \
+struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
+       .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
+       .show = throttle_reason_bool_show, \
+       .reg32 = GT0_PERF_LIMIT_REASONS, \
+       .mask = mask__, \
+}
+
+static DEVICE_ATTR_RO(punit_req_freq_mhz);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl4, POWER_LIMIT_4_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_thermal, THERMAL_LIMIT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_prochot, PROCHOT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, RATL_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, VR_THERMALERT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK);
+
+static const struct attribute *freq_attrs[] = {
+       &dev_attr_punit_req_freq_mhz.attr,
+       &attr_throttle_reason_status.attr,
+       &attr_throttle_reason_pl1.attr,
+       &attr_throttle_reason_pl2.attr,
+       &attr_throttle_reason_pl4.attr,
+       &attr_throttle_reason_thermal.attr,
+       &attr_throttle_reason_prochot.attr,
+       &attr_throttle_reason_ratl.attr,
+       &attr_throttle_reason_vr_thermalert.attr,
+       &attr_throttle_reason_vr_tdc.attr,
+       NULL
+};
+
+static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
+                               const struct attribute * const *attrs)
+{
+       int ret;
+
+       if (GRAPHICS_VER(gt->i915) < 6)
+               return 0;
+
+       ret = sysfs_create_files(kobj, attrs);
+       if (ret)
+               return ret;
+
+       if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+               ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
+
+       return ret;
+}
+
+void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
+{
+       int ret;
+
+       intel_sysfs_rc6_init(gt, kobj);
+
+       ret = is_object_gt(kobj) ?
+             intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
+             intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
+       if (ret)
+               drm_warn(&gt->i915->drm,
+                        "failed to create gt%u RPS sysfs files (%pe)",
+                        gt->info.id, ERR_PTR(ret));
+
+       /* end of the legacy interfaces */
+       if (!is_object_gt(kobj))
+               return;
+
+       ret = sysfs_create_files(kobj, freq_attrs);
+       if (ret)
+               drm_warn(&gt->i915->drm,
+                        "failed to create gt%u throttle sysfs files (%pe)",
+                        gt->info.id, ERR_PTR(ret));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h
new file mode 100644 (file)
index 0000000..f567105
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#ifndef __SYSFS_GT_PM_H__
+#define __SYSFS_GT_PM_H__
+
+#include <linux/kobject.h>
+
+#include "intel_gt_types.h"
+
+void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj);
+
+#endif /* SYSFS_RC6_H */
index f206877..937b2e1 100644 (file)
@@ -20,6 +20,7 @@
 #include "i915_vma.h"
 #include "intel_engine_types.h"
 #include "intel_gt_buffer_pool_types.h"
+#include "intel_hwconfig.h"
 #include "intel_llc_types.h"
 #include "intel_reset_types.h"
 #include "intel_rc6_types.h"
@@ -182,7 +183,19 @@ struct intel_gt {
 
        const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES];
 
+       struct {
+               u8 groupid;
+               u8 instanceid;
+       } default_steering;
+
+       /*
+        * Base of per-tile GTTMMADR where we can derive the MMIO and the GGTT.
+        */
+       phys_addr_t phys_addr;
+
        struct intel_gt_info {
+               unsigned int id;
+
                intel_engine_mask_t engine_mask;
 
                u32 l3bank_mask;
@@ -199,6 +212,9 @@ struct intel_gt {
                struct sseu_dev_info sseu;
 
                unsigned long mslice_mask;
+
+               /** @hwconfig: hardware configuration data */
+               struct intel_hwconfig hwconfig;
        } info;
 
        struct {
index a5f5b2d..719fd31 100644 (file)
@@ -97,32 +97,52 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
        return 0;
 }
 
-void __i915_vm_close(struct i915_address_space *vm)
+static void clear_vm_list(struct list_head *list)
 {
        struct i915_vma *vma, *vn;
 
-       if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
-               return;
-
-       list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+       list_for_each_entry_safe(vma, vn, list, vm_link) {
                struct drm_i915_gem_object *obj = vma->obj;
 
-               if (!kref_get_unless_zero(&obj->base.refcount)) {
+               if (!i915_gem_object_get_rcu(obj)) {
                        /*
-                        * Unbind the dying vma to ensure the bound_list
+                        * Object is dying, but has not yet cleared its
+                        * vma list.
+                        * Unbind the dying vma to ensure our list
                         * is completely drained. We leave the destruction to
-                        * the object destructor.
+                        * the object destructor to avoid the vma
+                        * disappearing under it.
                         */
                        atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
                        WARN_ON(__i915_vma_unbind(vma));
-                       continue;
+
+                       /* Remove from the unbound list */
+                       list_del_init(&vma->vm_link);
+
+                       /*
+                        * Delay the vm and vm mutex freeing until the
+                        * object is done with destruction.
+                        */
+                       i915_vm_resv_get(vma->vm);
+                       vma->vm_ddestroy = true;
+               } else {
+                       i915_vma_destroy_locked(vma);
+                       i915_gem_object_put(obj);
                }
 
-               /* Keep the obj (and hence the vma) alive as _we_ destroy it */
-               i915_vma_destroy_locked(vma);
-               i915_gem_object_put(obj);
        }
+}
+
+static void __i915_vm_close(struct i915_address_space *vm)
+{
+       mutex_lock(&vm->mutex);
+
+       clear_vm_list(&vm->bound_list);
+       clear_vm_list(&vm->unbound_list);
+
+       /* Check for must-fix unanticipated side-effects */
        GEM_BUG_ON(!list_empty(&vm->bound_list));
+       GEM_BUG_ON(!list_empty(&vm->unbound_list));
 
        mutex_unlock(&vm->mutex);
 }
@@ -144,7 +164,6 @@ int i915_vm_lock_objects(struct i915_address_space *vm,
 void i915_address_space_fini(struct i915_address_space *vm)
 {
        drm_mm_takedown(&vm->mm);
-       mutex_destroy(&vm->mutex);
 }
 
 /**
@@ -152,7 +171,8 @@ void i915_address_space_fini(struct i915_address_space *vm)
  * @kref: Pointer to the &i915_address_space.resv_ref member.
  *
  * This function is called when the last lock sharer no longer shares the
- * &i915_address_space._resv lock.
+ * &i915_address_space._resv lock, and also if we raced when
+ * destroying a vma by the vma destruction
  */
 void i915_vm_resv_release(struct kref *kref)
 {
@@ -160,6 +180,8 @@ void i915_vm_resv_release(struct kref *kref)
                container_of(kref, typeof(*vm), resv_ref);
 
        dma_resv_fini(&vm->_resv);
+       mutex_destroy(&vm->mutex);
+
        kfree(vm);
 }
 
@@ -168,6 +190,8 @@ static void __i915_vm_release(struct work_struct *work)
        struct i915_address_space *vm =
                container_of(work, struct i915_address_space, release_work);
 
+       __i915_vm_close(vm);
+
        /* Synchronize async unbinds. */
        i915_vma_resource_bind_dep_sync_all(vm);
 
@@ -201,7 +225,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
        vm->pending_unbind = RB_ROOT_CACHED;
        INIT_WORK(&vm->release_work, __i915_vm_release);
-       atomic_set(&vm->open, 1);
 
        /*
         * The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -246,6 +269,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
        vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
        INIT_LIST_HEAD(&vm->bound_list);
+       INIT_LIST_HEAD(&vm->unbound_list);
 }
 
 void *__px_vaddr(struct drm_i915_gem_object *p)
@@ -274,7 +298,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
        void *vaddr = __px_vaddr(p);
 
        memset64(vaddr, val, count);
-       clflush_cache_range(vaddr, PAGE_SIZE);
+       drm_clflush_virt_range(vaddr, PAGE_SIZE);
 }
 
 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
index 9d83c2d..b4b9cc5 100644 (file)
@@ -240,15 +240,6 @@ struct i915_address_space {
 
        unsigned int bind_async_flags;
 
-       /*
-        * Each active user context has its own address space (in full-ppgtt).
-        * Since the vm may be shared between multiple contexts, we count how
-        * many contexts keep us "open". Once open hits zero, we are closed
-        * and do not allow any new attachments, and proceed to shutdown our
-        * vma and page directories.
-        */
-       atomic_t open;
-
        struct mutex mutex; /* protects vma and our lists */
 
        struct kref resv_ref; /* kref to keep the reservation lock alive. */
@@ -263,6 +254,11 @@ struct i915_address_space {
         */
        struct list_head bound_list;
 
+       /**
+        * List of vmas not yet bound or evicted.
+        */
+       struct list_head unbound_list;
+
        /* Global GTT */
        bool is_ggtt:1;
 
@@ -272,6 +268,9 @@ struct i915_address_space {
        /* Some systems support read-only mappings for GGTT and/or PPGTT */
        bool has_read_only:1;
 
+       /* Skip pte rewrite on unbind for suspend. Protected by @mutex */
+       bool skip_pte_rewrite:1;
+
        u8 top;
        u8 pd_shift;
        u8 scratch_order;
@@ -446,6 +445,17 @@ i915_vm_get(struct i915_address_space *vm)
        return vm;
 }
 
+static inline struct i915_address_space *
+i915_vm_tryget(struct i915_address_space *vm)
+{
+       return kref_get_unless_zero(&vm->ref) ? vm : NULL;
+}
+
+static inline void assert_vm_alive(struct i915_address_space *vm)
+{
+       GEM_BUG_ON(!kref_read(&vm->ref));
+}
+
 /**
  * i915_vm_resv_get - Obtain a reference on the vm's reservation lock
  * @vm: The vm whose reservation lock we want to share.
@@ -476,34 +486,6 @@ static inline void i915_vm_resv_put(struct i915_address_space *vm)
        kref_put(&vm->resv_ref, i915_vm_resv_release);
 }
 
-static inline struct i915_address_space *
-i915_vm_open(struct i915_address_space *vm)
-{
-       GEM_BUG_ON(!atomic_read(&vm->open));
-       atomic_inc(&vm->open);
-       return i915_vm_get(vm);
-}
-
-static inline bool
-i915_vm_tryopen(struct i915_address_space *vm)
-{
-       if (atomic_add_unless(&vm->open, 1, 0))
-               return i915_vm_get(vm);
-
-       return false;
-}
-
-void __i915_vm_close(struct i915_address_space *vm);
-
-static inline void
-i915_vm_close(struct i915_address_space *vm)
-{
-       GEM_BUG_ON(!atomic_read(&vm->open));
-       __i915_vm_close(vm);
-
-       i915_vm_put(vm);
-}
-
 void i915_address_space_init(struct i915_address_space *vm, int subclass);
 void i915_address_space_fini(struct i915_address_space *vm);
 
@@ -565,6 +547,14 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
                unsigned long lmem_pt_obj_flags);
 
+void intel_ggtt_bind_vma(struct i915_address_space *vm,
+                         struct i915_vm_pt_stash *stash,
+                         struct i915_vma_resource *vma_res,
+                         enum i915_cache_level cache_level,
+                         u32 flags);
+void intel_ggtt_unbind_vma(struct i915_address_space *vm,
+                           struct i915_vma_resource *vma_res);
+
 int i915_ggtt_probe_hw(struct drm_i915_private *i915);
 int i915_ggtt_init_hw(struct drm_i915_private *i915);
 int i915_ggtt_enable_hw(struct drm_i915_private *i915);
@@ -635,6 +625,7 @@ release_pd_entry(struct i915_page_directory * const pd,
                 struct i915_page_table * const pt,
                 const struct drm_i915_gem_object * const scratch);
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+void gen8_ggtt_invalidate(struct i915_ggtt *ggtt);
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
                    struct i915_vm_pt_stash *stash,
diff --git a/drivers/gpu/drm/i915/gt/intel_hwconfig.h b/drivers/gpu/drm/i915/gt/intel_hwconfig.h
new file mode 100644 (file)
index 0000000..3222907
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#ifndef _INTEL_HWCONFIG_H_
+#define _INTEL_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+struct intel_hwconfig {
+       u32 size;
+       void *ptr;
+};
+
+int intel_gt_init_hwconfig(struct intel_gt *gt);
+void intel_gt_fini_hwconfig(struct intel_gt *gt);
+
+#endif /* _INTEL_HWCONFIG_H_ */
index 07bef71..3f83a90 100644 (file)
@@ -778,7 +778,7 @@ static void init_common_regs(u32 * const regs,
                                           CTX_CTRL_RS_CTX_ENABLE);
        regs[CTX_CONTEXT_CONTROL] = ctl;
 
-       regs[CTX_TIMESTAMP] = ce->runtime.last;
+       regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
 }
 
 static void init_wa_bb_regs(u32 * const regs,
@@ -1208,6 +1208,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
            IS_DG2_G11(ce->engine->i915))
                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
 
+       /* hsdes: 1809175790 */
+       if (!HAS_FLAT_CCS(ce->engine->i915))
+               cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
+
        return cs;
 }
 
@@ -1225,6 +1229,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
                                                    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
                                                    0);
 
+       /* hsdes: 1809175790 */
+       if (!HAS_FLAT_CCS(ce->engine->i915)) {
+               if (ce->engine->class == VIDEO_DECODE_CLASS)
+                       cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
+               else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
+                       cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
+       }
+
        return cs;
 }
 
@@ -1722,11 +1734,12 @@ err:
        }
 }
 
-static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
 {
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-       ce->runtime.num_underflow++;
-       ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+       stats->runtime.num_underflow++;
+       stats->runtime.max_underflow =
+               max_t(u32, stats->runtime.max_underflow, -dt);
 #endif
 }
 
@@ -1743,25 +1756,25 @@ static u32 lrc_get_runtime(const struct intel_context *ce)
 
 void lrc_update_runtime(struct intel_context *ce)
 {
+       struct intel_context_stats *stats = &ce->stats;
        u32 old;
        s32 dt;
 
-       if (intel_context_is_barrier(ce))
+       old = stats->runtime.last;
+       stats->runtime.last = lrc_get_runtime(ce);
+       dt = stats->runtime.last - old;
+       if (!dt)
                return;
 
-       old = ce->runtime.last;
-       ce->runtime.last = lrc_get_runtime(ce);
-       dt = ce->runtime.last - old;
-
        if (unlikely(dt < 0)) {
                CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
-                        old, ce->runtime.last, dt);
-               st_update_runtime_underflow(ce, dt);
+                        old, stats->runtime.last, dt);
+               st_runtime_underflow(stats, dt);
                return;
        }
 
-       ewma_runtime_add(&ce->runtime.avg, dt);
-       ce->runtime.total += dt;
+       ewma_runtime_add(&stats->runtime.avg, dt);
+       stats->runtime.total += dt;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
index 6e4f9f5..7371bb5 100644 (file)
 #include <linux/bitfield.h>
 #include <linux/types.h>
 
+#include "intel_context.h"
+
 struct drm_i915_gem_object;
 struct i915_gem_ww_ctx;
-struct intel_context;
 struct intel_engine_cs;
 struct intel_ring;
 struct kref;
@@ -120,4 +121,28 @@ static inline u32 lrc_desc_priority(int prio)
                return GEN12_CTX_PRIORITY_NORMAL;
 }
 
+static inline void lrc_runtime_start(struct intel_context *ce)
+{
+       struct intel_context_stats *stats = &ce->stats;
+
+       if (intel_context_is_barrier(ce))
+               return;
+
+       if (stats->active)
+               return;
+
+       WRITE_ONCE(stats->active, intel_context_clock());
+}
+
+static inline void lrc_runtime_stop(struct intel_context *ce)
+{
+       struct intel_context_stats *stats = &ce->stats;
+
+       if (!stats->active)
+               return;
+
+       lrc_update_runtime(ce);
+       WRITE_ONCE(stats->active, 0);
+}
+
 #endif /* __INTEL_LRC_H__ */
index 20444d6..950fd6d 100644 (file)
@@ -530,6 +530,7 @@ intel_context_migrate_copy(struct intel_context *ce,
        int err;
 
        GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
+       GEM_BUG_ON(IS_DGFX(ce->engine->i915) && (!src_is_lmem && !dst_is_lmem));
        *out = NULL;
 
        GEM_BUG_ON(ce->ring->size < SZ_64K);
@@ -566,8 +567,6 @@ intel_context_migrate_copy(struct intel_context *ce,
                src_offset = 0;
                dst_offset = CHUNK_SZ;
                if (HAS_64K_PAGES(ce->engine->i915)) {
-                       GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
-
                        src_offset = 0;
                        dst_offset = 0;
                        if (src_is_lmem)
index d91e2be..d8b94d6 100644 (file)
@@ -91,7 +91,7 @@ write_dma_entry(struct drm_i915_gem_object * const pdma,
        u64 * const vaddr = __px_vaddr(pdma);
 
        vaddr[idx] = encoded_entry;
-       clflush_cache_range(&vaddr[idx], sizeof(u64));
+       drm_clflush_virt_range(&vaddr[idx], sizeof(u64));
 }
 
 void
index 6df359c..4f11f28 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/pm_runtime.h>
 
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "i915_vgpu.h"
@@ -324,9 +325,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
                resource_size_t pcbr_offset;
 
                pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
-               pctx = i915_gem_object_create_stolen_for_preallocated(i915,
-                                                                     pcbr_offset,
-                                                                     pctx_size);
+               pctx = i915_gem_object_create_region_at(i915->mm.stolen_region,
+                                                       pcbr_offset,
+                                                       pctx_size,
+                                                       0);
                if (IS_ERR(pctx))
                        return PTR_ERR(pctx);
 
index 6cecfda..f5111c0 100644 (file)
@@ -93,6 +93,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
        struct intel_memory_region *mem;
        resource_size_t min_page_size;
        resource_size_t io_start;
+       resource_size_t io_size;
        resource_size_t lmem_size;
        int err;
 
@@ -122,9 +123,14 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
                lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
        }
 
+       if (i915->params.lmem_size > 0) {
+               lmem_size = min_t(resource_size_t, lmem_size,
+                                 mul_u32_u32(i915->params.lmem_size, SZ_1M));
+       }
 
        io_start = pci_resource_start(pdev, 2);
-       if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
+       io_size = min(pci_resource_len(pdev, 2), lmem_size);
+       if (!io_size)
                return ERR_PTR(-ENODEV);
 
        min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@@ -134,7 +140,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
                                         lmem_size,
                                         min_page_size,
                                         io_start,
-                                        lmem_size,
+                                        io_size,
                                         INTEL_MEMORY_LOCAL,
                                         0,
                                         &intel_region_lmem_ops);
index 8271326..2d120bd 100644 (file)
@@ -1318,7 +1318,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
        engine_mask &= gt->info.engine_mask;
 
        if (flags & I915_ERROR_CAPTURE) {
-               i915_capture_error_state(gt, engine_mask);
+               i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_NONE);
                intel_gt_clear_error_registers(gt, engine_mask);
        }
 
index 6d7ec3b..5423bfd 100644 (file)
@@ -767,7 +767,7 @@ static int mi_set_context(struct i915_request *rq,
        if (GRAPHICS_VER(i915) == 7) {
                if (num_engines) {
                        struct intel_engine_cs *signaller;
-                       i915_reg_t last_reg = {}; /* keep gcc quiet */
+                       i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
 
                        *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
                        for_each_engine(signaller, engine->gt, id) {
index c812410..bedaee9 100644 (file)
@@ -1067,24 +1067,67 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
        return 0;
 }
 
-static void gen6_rps_init(struct intel_rps *rps)
+static u32 intel_rps_read_state_cap(struct intel_rps *rps)
 {
        struct drm_i915_private *i915 = rps_to_i915(rps);
-       u32 rp_state_cap = intel_rps_read_state_cap(rps);
+       struct intel_uncore *uncore = rps_to_uncore(rps);
 
-       /* All of these values are in units of 50MHz */
+       if (IS_XEHPSDV(i915))
+               return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
+       else if (IS_GEN9_LP(i915))
+               return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+       else
+               return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+}
+
+/**
+ * gen6_rps_get_freq_caps - Get freq caps exposed by HW
+ * @rps: the intel_rps structure
+ * @caps: returned freq caps
+ *
+ * Returned "caps" frequencies should be converted to MHz using
+ * intel_gpu_freq()
+ */
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+       struct drm_i915_private *i915 = rps_to_i915(rps);
+       u32 rp_state_cap;
+
+       rp_state_cap = intel_rps_read_state_cap(rps);
 
        /* static values from HW: RP0 > RP1 > RPn (min_freq) */
        if (IS_GEN9_LP(i915)) {
-               rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
-               rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-               rps->min_freq = (rp_state_cap >>  0) & 0xff;
+               caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+               caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+               caps->min_freq = (rp_state_cap >>  0) & 0xff;
        } else {
-               rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
-               rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-               rps->min_freq = (rp_state_cap >> 16) & 0xff;
+               caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
+               caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+               caps->min_freq = (rp_state_cap >> 16) & 0xff;
        }
 
+       if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
+               /*
+                * In this case rp_state_cap register reports frequencies in
+                * units of 50 MHz. Convert these to the actual "hw unit", i.e.
+                * units of 16.67 MHz
+                */
+               caps->rp0_freq *= GEN9_FREQ_SCALER;
+               caps->rp1_freq *= GEN9_FREQ_SCALER;
+               caps->min_freq *= GEN9_FREQ_SCALER;
+       }
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+       struct drm_i915_private *i915 = rps_to_i915(rps);
+       struct intel_rps_freq_caps caps;
+
+       gen6_rps_get_freq_caps(rps, &caps);
+       rps->rp0_freq = caps.rp0_freq;
+       rps->rp1_freq = caps.rp1_freq;
+       rps->min_freq = caps.min_freq;
+
        /* hw_max = RP0 until we check for overclocking */
        rps->max_freq = rps->rp0_freq;
 
@@ -1092,26 +1135,18 @@ static void gen6_rps_init(struct intel_rps *rps)
        if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
            IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
                u32 ddcc_status = 0;
+               u32 mult = 1;
 
+               if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
+                       mult = GEN9_FREQ_SCALER;
                if (snb_pcode_read(i915, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
                                   &ddcc_status, NULL) == 0)
                        rps->efficient_freq =
-                               clamp_t(u8,
-                                       (ddcc_status >> 8) & 0xff,
+                               clamp_t(u32,
+                                       ((ddcc_status >> 8) & 0xff) * mult,
                                        rps->min_freq,
                                        rps->max_freq);
        }
-
-       if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
-               /* Store the frequency values in 16.66 MHZ units, which is
-                * the natural hardware unit for SKL
-                */
-               rps->rp0_freq *= GEN9_FREQ_SCALER;
-               rps->rp1_freq *= GEN9_FREQ_SCALER;
-               rps->min_freq *= GEN9_FREQ_SCALER;
-               rps->max_freq *= GEN9_FREQ_SCALER;
-               rps->efficient_freq *= GEN9_FREQ_SCALER;
-       }
 }
 
 static bool rps_reset(struct intel_rps *rps)
@@ -2214,19 +2249,6 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
                return set_min_freq(rps, val);
 }
 
-u32 intel_rps_read_state_cap(struct intel_rps *rps)
-{
-       struct drm_i915_private *i915 = rps_to_i915(rps);
-       struct intel_uncore *uncore = rps_to_uncore(rps);
-
-       if (IS_XEHPSDV(i915))
-               return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
-       else if (IS_GEN9_LP(i915))
-               return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
-       else
-               return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
-}
-
 static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
 {
        struct intel_uncore *uncore = rps_to_uncore(rps);
@@ -2239,18 +2261,18 @@ static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
 void intel_rps_raise_unslice(struct intel_rps *rps)
 {
        struct intel_uncore *uncore = rps_to_uncore(rps);
-       u32 rp0_unslice_req;
 
        mutex_lock(&rps->lock);
 
        if (rps_uses_slpc(rps)) {
                /* RP limits have not been initialized yet for SLPC path */
-               rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0)
-                                  & 0xff) * GEN9_FREQ_SCALER;
+               struct intel_rps_freq_caps caps;
+
+               gen6_rps_get_freq_caps(rps, &caps);
 
                intel_rps_set_manual(rps, true);
                intel_uncore_write(uncore, GEN6_RPNSWREQ,
-                                  ((rp0_unslice_req <<
+                                  ((caps.rp0_freq <<
                                   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
                                   GEN9_IGNORE_SLICE_RATIO));
                intel_rps_set_manual(rps, false);
@@ -2264,18 +2286,18 @@ void intel_rps_raise_unslice(struct intel_rps *rps)
 void intel_rps_lower_unslice(struct intel_rps *rps)
 {
        struct intel_uncore *uncore = rps_to_uncore(rps);
-       u32 rpn_unslice_req;
 
        mutex_lock(&rps->lock);
 
        if (rps_uses_slpc(rps)) {
                /* RP limits have not been initialized yet for SLPC path */
-               rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16)
-                                  & 0xff) * GEN9_FREQ_SCALER;
+               struct intel_rps_freq_caps caps;
+
+               gen6_rps_get_freq_caps(rps, &caps);
 
                intel_rps_set_manual(rps, true);
                intel_uncore_write(uncore, GEN6_RPNSWREQ,
-                                  ((rpn_unslice_req <<
+                                  ((caps.min_freq <<
                                   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
                                   GEN9_IGNORE_SLICE_RATIO));
                intel_rps_set_manual(rps, false);
@@ -2286,6 +2308,24 @@ void intel_rps_lower_unslice(struct intel_rps *rps)
        mutex_unlock(&rps->lock);
 }
 
+static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
+{
+       struct intel_gt *gt = rps_to_gt(rps);
+       intel_wakeref_t wakeref;
+       u32 val;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               val = intel_uncore_read(gt->uncore, reg32);
+
+       return val;
+}
+
+bool rps_read_mask_mmio(struct intel_rps *rps,
+                       i915_reg_t reg32, u32 mask)
+{
+       return rps_read_mmio(rps, reg32) & mask;
+}
+
 /* External interface for intel_ips.ko */
 
 static struct drm_i915_private __rcu *ips_mchdev;
index c6d76a3..1e8d564 100644 (file)
@@ -7,6 +7,7 @@
 #define INTEL_RPS_H
 
 #include "intel_rps_types.h"
+#include "i915_reg_defs.h"
 
 struct i915_request;
 
@@ -44,10 +45,13 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
 u32 intel_rps_read_punit_req(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
-u32 intel_rps_read_state_cap(struct intel_rps *rps);
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
 void intel_rps_raise_unslice(struct intel_rps *rps);
 void intel_rps_lower_unslice(struct intel_rps *rps);
 
+u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
+bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
+
 void gen5_rps_irq_handler(struct intel_rps *rps);
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
index 3941d85..9173ec7 100644 (file)
@@ -37,6 +37,21 @@ enum {
        INTEL_RPS_TIMER,
 };
 
+/**
+ * struct intel_rps_freq_caps - rps freq capabilities
+ * @rp0_freq: non-overclocked max frequency
+ * @rp1_freq: "less than" RP0 power/freqency
+ * @min_freq: aka RPn, minimum frequency
+ *
+ * Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq()
+ * should be used to convert to MHz
+ */
+struct intel_rps_freq_caps {
+       u8 rp0_freq;
+       u8 rp1_freq;
+       u8 min_freq;
+};
+
 struct intel_rps {
        struct mutex lock; /* protects enabling and the worker */
 
index 4ac0bba..726d48b 100644 (file)
@@ -8,6 +8,8 @@
 #include "intel_gt_regs.h"
 #include "intel_sseu.h"
 
+#include "linux/string_helpers.h"
+
 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
                         u8 max_subslices, u8 max_eus_per_subslice)
 {
@@ -33,8 +35,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 }
 
 static u32
-_intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
-                         const u8 *subslice_mask, u8 slice)
+sseu_get_subslices(const struct sseu_dev_info *sseu,
+                  const u8 *subslice_mask, u8 slice)
 {
        int i, offset = slice * sseu->ss_stride;
        u32 mask = 0;
@@ -49,12 +51,17 @@ _intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
 
 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
 {
-       return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+       return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+}
+
+static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
+{
+       return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
 }
 
 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
 {
-       return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+       return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
 }
 
 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
@@ -717,16 +724,11 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
        drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
 }
 
-void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
-                              struct drm_printer *p)
+static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
+                                   struct drm_printer *p)
 {
        int s, ss;
 
-       if (sseu->max_slices == 0) {
-               drm_printf(p, "Unavailable\n");
-               return;
-       }
-
        for (s = 0; s < sseu->max_slices; s++) {
                drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
                           s, intel_sseu_subslices_per_slice(sseu, s),
@@ -741,6 +743,36 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
        }
 }
 
+static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
+                                    struct drm_printer *p)
+{
+       u32 g_dss_mask = sseu_get_geometry_subslices(sseu);
+       u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu);
+       int dss;
+
+       for (dss = 0; dss < sseu->max_subslices; dss++) {
+               u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
+
+               drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
+                          str_yes_no(g_dss_mask & BIT(dss)),
+                          str_yes_no(c_dss_mask & BIT(dss)),
+                          hweight16(enabled_eus), enabled_eus);
+       }
+}
+
+void intel_sseu_print_topology(struct drm_i915_private *i915,
+                              const struct sseu_dev_info *sseu,
+                              struct drm_printer *p)
+{
+       if (sseu->max_slices == 0) {
+               drm_printf(p, "Unavailable\n");
+       } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               sseu_print_xehp_topology(sseu, p);
+       } else {
+               sseu_print_hsw_topology(sseu, p);
+       }
+}
+
 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
 {
        u16 slice_mask = 0;
index 8a79cd8..5c078df 100644 (file)
@@ -15,26 +15,49 @@ struct drm_i915_private;
 struct intel_gt;
 struct drm_printer;
 
-#define GEN_MAX_SLICES         (3) /* SKL upper bound */
-#define GEN_MAX_SUBSLICES      (32) /* XEHPSDV upper bound */
-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
-#define GEN_MAX_EUS            (16) /* TGL upper bound */
-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
+/*
+ * Maximum number of slices on older platforms.  Slices no longer exist
+ * starting on Xe_HP ("gslices," "cslices," etc. are a different concept and
+ * are not expressed through fusing).
+ */
+#define GEN_MAX_HSW_SLICES             3
+
+/*
+ * Maximum number of subslices that can exist within a HSW-style slice.  This
+ * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
+ * GEN_MAX_DSS value below).
+ */
+#define GEN_MAX_SS_PER_HSW_SLICE       6
+
+/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
+#define GEN_MAX_DSS                    32
+
+/* Maximum number of EUs that can exist within a subslice or DSS. */
+#define GEN_MAX_EUS_PER_SS             16
+
+#define SSEU_MAX(a, b)                 ((a) > (b) ? (a) : (b))
+
+/* The maximum number of bits needed to express each subslice/DSS independently */
+#define GEN_SS_MASK_SIZE               SSEU_MAX(GEN_MAX_DSS, \
+                                                GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE)
+
+#define GEN_SSEU_STRIDE(max_entries)   DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE                GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
+#define GEN_MAX_EU_STRIDE              GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
 
 #define GEN_DSS_PER_GSLICE     4
 #define GEN_DSS_PER_CSLICE     8
 #define GEN_DSS_PER_MSLICE     8
 
-#define GEN_MAX_GSLICES                (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
-#define GEN_MAX_CSLICES                (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
+#define GEN_MAX_GSLICES                (GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES                (GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
 
 struct sseu_dev_info {
        u8 slice_mask;
-       u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-       u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-       u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-       u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
+       u8 subslice_mask[GEN_SS_MASK_SIZE];
+       u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
+       u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
+       u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
        u16 eu_total;
        u8 eu_per_subslice;
        u8 min_eu_in_pool;
@@ -116,7 +139,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
                         const struct intel_sseu *req_sseu);
 
 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
-void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
+void intel_sseu_print_topology(struct drm_i915_private *i915,
+                              const struct sseu_dev_info *sseu,
                               struct drm_printer *p);
 
 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
index 903626f..074d293 100644 (file)
@@ -246,7 +246,7 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
        const struct intel_gt_info *info = &gt->info;
-       struct sseu_dev_info sseu;
+       struct sseu_dev_info *sseu;
        intel_wakeref_t wakeref;
 
        if (GRAPHICS_VER(i915) < 8)
@@ -256,23 +256,29 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt)
        i915_print_sseu_info(m, true, HAS_POOLED_EU(i915), &info->sseu);
 
        seq_puts(m, "SSEU Device Status\n");
-       memset(&sseu, 0, sizeof(sseu));
-       intel_sseu_set_info(&sseu, info->sseu.max_slices,
+
+       sseu = kzalloc(sizeof(*sseu), GFP_KERNEL);
+       if (!sseu)
+               return -ENOMEM;
+
+       intel_sseu_set_info(sseu, info->sseu.max_slices,
                            info->sseu.max_subslices,
                            info->sseu.max_eus_per_subslice);
 
        with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
                if (IS_CHERRYVIEW(i915))
-                       cherryview_sseu_device_status(gt, &sseu);
+                       cherryview_sseu_device_status(gt, sseu);
                else if (IS_BROADWELL(i915))
-                       bdw_sseu_device_status(gt, &sseu);
+                       bdw_sseu_device_status(gt, sseu);
                else if (GRAPHICS_VER(i915) == 9)
-                       gen9_sseu_device_status(gt, &sseu);
+                       gen9_sseu_device_status(gt, sseu);
                else if (GRAPHICS_VER(i915) >= 11)
-                       gen11_sseu_device_status(gt, &sseu);
+                       gen11_sseu_device_status(gt, sseu);
        }
 
-       i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu);
+       i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), sseu);
+
+       kfree(sseu);
 
        return 0;
 }
@@ -285,22 +291,22 @@ static int sseu_status_show(struct seq_file *m, void *unused)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status);
 
-static int rcs_topology_show(struct seq_file *m, void *unused)
+static int sseu_topology_show(struct seq_file *m, void *unused)
 {
        struct intel_gt *gt = m->private;
        struct drm_printer p = drm_seq_file_printer(m);
 
-       intel_sseu_print_topology(&gt->info.sseu, &p);
+       intel_sseu_print_topology(gt->i915, &gt->info.sseu, &p);
 
        return 0;
 }
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_topology);
 
 void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root)
 {
        static const struct intel_gt_debugfs_file files[] = {
                { "sseu_status", &sseu_status_fops, NULL },
-               { "rcs_topology", &rcs_topology_fops, NULL },
+               { "sseu_topology", &sseu_topology_fops, NULL },
        };
 
        intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
index c014b40..29c8cd0 100644 (file)
@@ -1072,9 +1072,15 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
 static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
                         unsigned int slice, unsigned int subslice)
 {
-       drm_dbg(&gt->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
+       struct drm_printer p = drm_debug_printer("MCR Steering:");
 
        __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
+
+       gt->default_steering.groupid = slice;
+       gt->default_steering.instanceid = subslice;
+
+       if (drm_debug_enabled(DRM_UT_DRIVER))
+               intel_gt_report_steering(&p, gt, false);
 }
 
 static void
@@ -2310,7 +2316,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                             FF_DOP_CLOCK_GATE_DISABLE);
        }
 
-       if (IS_GRAPHICS_VER(i915, 9, 12)) {
+       if (HAS_PERCTX_PREEMPT_CTRL(i915)) {
                /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
                wa_masked_en(wal,
                             GEN7_FF_SLICE_CS_CHICKEN1,
@@ -2618,6 +2624,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
                wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
                                GLOBAL_INVALIDATION_MODE);
        }
+
+       if (IS_DG2(i915)) {
+               /* Wa_22014226127:dg2 */
+               wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+       }
 }
 
 static void
@@ -2633,7 +2644,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
         * to a single RCS/CCS engine's workaround list since
         * they're reset as part of the general render domain reset.
         */
-       if (engine->class == RENDER_CLASS)
+       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
                general_render_compute_wa_init(engine, wal);
 
        if (engine->class == RENDER_CLASS)
index 72d5faa..09f8cd2 100644 (file)
@@ -1736,15 +1736,9 @@ static int live_preempt(void *arg)
        enum intel_engine_id id;
        int err = -ENOMEM;
 
-       if (igt_spinner_init(&spin_hi, gt))
-               return -ENOMEM;
-
-       if (igt_spinner_init(&spin_lo, gt))
-               goto err_spin_hi;
-
        ctx_hi = kernel_context(gt->i915, NULL);
        if (!ctx_hi)
-               goto err_spin_lo;
+               return -ENOMEM;
        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
 
        ctx_lo = kernel_context(gt->i915, NULL);
@@ -1752,6 +1746,12 @@ static int live_preempt(void *arg)
                goto err_ctx_hi;
        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
 
+       if (igt_spinner_init(&spin_hi, gt))
+               goto err_ctx_lo;
+
+       if (igt_spinner_init(&spin_lo, gt))
+               goto err_spin_hi;
+
        for_each_engine(engine, gt, id) {
                struct igt_live_test t;
                struct i915_request *rq;
@@ -1761,14 +1761,14 @@ static int live_preempt(void *arg)
 
                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
                                            MI_ARB_CHECK);
                if (IS_ERR(rq)) {
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                i915_request_add(rq);
@@ -1777,7 +1777,7 @@ static int live_preempt(void *arg)
                        GEM_TRACE_DUMP();
                        intel_gt_set_wedged(gt);
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                rq = spinner_create_request(&spin_hi, ctx_hi, engine,
@@ -1785,7 +1785,7 @@ static int live_preempt(void *arg)
                if (IS_ERR(rq)) {
                        igt_spinner_end(&spin_lo);
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                i915_request_add(rq);
@@ -1794,7 +1794,7 @@ static int live_preempt(void *arg)
                        GEM_TRACE_DUMP();
                        intel_gt_set_wedged(gt);
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                igt_spinner_end(&spin_hi);
@@ -1802,19 +1802,19 @@ static int live_preempt(void *arg)
 
                if (igt_live_test_end(&t)) {
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
        }
 
        err = 0;
-err_ctx_lo:
-       kernel_context_close(ctx_lo);
-err_ctx_hi:
-       kernel_context_close(ctx_hi);
 err_spin_lo:
        igt_spinner_fini(&spin_lo);
 err_spin_hi:
        igt_spinner_fini(&spin_hi);
+err_ctx_lo:
+       kernel_context_close(ctx_lo);
+err_ctx_hi:
+       kernel_context_close(ctx_hi);
        return err;
 }
 
@@ -1828,20 +1828,20 @@ static int live_late_preempt(void *arg)
        enum intel_engine_id id;
        int err = -ENOMEM;
 
-       if (igt_spinner_init(&spin_hi, gt))
-               return -ENOMEM;
-
-       if (igt_spinner_init(&spin_lo, gt))
-               goto err_spin_hi;
-
        ctx_hi = kernel_context(gt->i915, NULL);
        if (!ctx_hi)
-               goto err_spin_lo;
+               return -ENOMEM;
 
        ctx_lo = kernel_context(gt->i915, NULL);
        if (!ctx_lo)
                goto err_ctx_hi;
 
+       if (igt_spinner_init(&spin_hi, gt))
+               goto err_ctx_lo;
+
+       if (igt_spinner_init(&spin_lo, gt))
+               goto err_spin_hi;
+
        /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
        ctx_lo->sched.priority = 1;
 
@@ -1854,14 +1854,14 @@ static int live_late_preempt(void *arg)
 
                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
                                            MI_ARB_CHECK);
                if (IS_ERR(rq)) {
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                i915_request_add(rq);
@@ -1875,7 +1875,7 @@ static int live_late_preempt(void *arg)
                if (IS_ERR(rq)) {
                        igt_spinner_end(&spin_lo);
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                i915_request_add(rq);
@@ -1898,19 +1898,19 @@ static int live_late_preempt(void *arg)
 
                if (igt_live_test_end(&t)) {
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
        }
 
        err = 0;
-err_ctx_lo:
-       kernel_context_close(ctx_lo);
-err_ctx_hi:
-       kernel_context_close(ctx_hi);
 err_spin_lo:
        igt_spinner_fini(&spin_lo);
 err_spin_hi:
        igt_spinner_fini(&spin_hi);
+err_ctx_lo:
+       kernel_context_close(ctx_lo);
+err_ctx_hi:
+       kernel_context_close(ctx_hi);
        return err;
 
 err_wedged:
@@ -1918,7 +1918,7 @@ err_wedged:
        igt_spinner_end(&spin_lo);
        intel_gt_set_wedged(gt);
        err = -EIO;
-       goto err_ctx_lo;
+       goto err_spin_lo;
 }
 
 struct preempt_client {
@@ -3382,12 +3382,9 @@ static int live_preempt_timeout(void *arg)
        if (!intel_has_reset_engine(gt))
                return 0;
 
-       if (igt_spinner_init(&spin_lo, gt))
-               return -ENOMEM;
-
        ctx_hi = kernel_context(gt->i915, NULL);
        if (!ctx_hi)
-               goto err_spin_lo;
+               return -ENOMEM;
        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
 
        ctx_lo = kernel_context(gt->i915, NULL);
@@ -3395,6 +3392,9 @@ static int live_preempt_timeout(void *arg)
                goto err_ctx_hi;
        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
 
+       if (igt_spinner_init(&spin_lo, gt))
+               goto err_ctx_lo;
+
        for_each_engine(engine, gt, id) {
                unsigned long saved_timeout;
                struct i915_request *rq;
@@ -3406,21 +3406,21 @@ static int live_preempt_timeout(void *arg)
                                            MI_NOOP); /* preemption disabled */
                if (IS_ERR(rq)) {
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                i915_request_add(rq);
                if (!igt_wait_for_spinner(&spin_lo, rq)) {
                        intel_gt_set_wedged(gt);
                        err = -EIO;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                rq = igt_request_alloc(ctx_hi, engine);
                if (IS_ERR(rq)) {
                        igt_spinner_end(&spin_lo);
                        err = PTR_ERR(rq);
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                /* Flush the previous CS ack before changing timeouts */
@@ -3440,7 +3440,7 @@ static int live_preempt_timeout(void *arg)
                        intel_gt_set_wedged(gt);
                        i915_request_put(rq);
                        err = -ETIME;
-                       goto err_ctx_lo;
+                       goto err_spin_lo;
                }
 
                igt_spinner_end(&spin_lo);
@@ -3448,12 +3448,12 @@ static int live_preempt_timeout(void *arg)
        }
 
        err = 0;
+err_spin_lo:
+       igt_spinner_fini(&spin_lo);
 err_ctx_lo:
        kernel_context_close(ctx_lo);
 err_ctx_hi:
        kernel_context_close(ctx_hi);
-err_spin_lo:
-       igt_spinner_fini(&spin_lo);
        return err;
 }
 
index 21c29d3..6ba52ef 100644 (file)
@@ -1753,8 +1753,8 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
        if (IS_ERR(ce))
                return PTR_ERR(ce);
 
-       ce->runtime.num_underflow = 0;
-       ce->runtime.max_underflow = 0;
+       ce->stats.runtime.num_underflow = 0;
+       ce->stats.runtime.max_underflow = 0;
 
        do {
                unsigned int loop = 1024;
@@ -1792,11 +1792,11 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
                intel_context_get_avg_runtime_ns(ce));
 
        err = 0;
-       if (ce->runtime.num_underflow) {
+       if (ce->stats.runtime.num_underflow) {
                pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
                       engine->name,
-                      ce->runtime.num_underflow,
-                      ce->runtime.max_underflow);
+                      ce->stats.runtime.num_underflow,
+                      ce->stats.runtime.max_underflow);
                GEM_TRACE_DUMP();
                err = -EOVERFLOW;
        }
index c9c4f39..cca3b0d 100644 (file)
@@ -621,13 +621,15 @@ static int perf_copy_blt(void *arg)
 
        for (i = 0; i < ARRAY_SIZE(sizes); i++) {
                struct drm_i915_gem_object *src, *dst;
+               size_t sz;
                int err;
 
                src = create_init_lmem_internal(gt, sizes[i], true);
                if (IS_ERR(src))
                        return PTR_ERR(src);
 
-               dst = create_init_lmem_internal(gt, sizes[i], false);
+               sz = src->base.size;
+               dst = create_init_lmem_internal(gt, sz, false);
                if (IS_ERR(dst)) {
                        err = PTR_ERR(dst);
                        goto err_src;
@@ -640,7 +642,7 @@ static int perf_copy_blt(void *arg)
                                      dst->mm.pages->sgl,
                                      I915_CACHE_NONE,
                                      i915_gem_object_is_lmem(dst),
-                                     sizes[i]);
+                                     sz);
 
                i915_gem_object_unlock(dst);
                i915_gem_object_put(dst);
index 7afdadc..9ad6df1 100644 (file)
@@ -129,10 +129,9 @@ enum intel_guc_action {
        INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
        INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
        INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+       INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
        INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
        INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
-       INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
-       INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
        INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
        INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
        INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
@@ -173,4 +172,11 @@ enum intel_guc_sleep_state_status {
 #define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
 #define GUC_LOG_CONTROL_DEFAULT_LOGGING        (1 << 8)
 
+enum intel_guc_state_capture_event_status {
+       INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+       INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK      0x000000FF
+
 #endif /* _ABI_GUC_ACTIONS_ABI_H */
index c20658e..8085fb1 100644 (file)
@@ -8,6 +8,10 @@
 
 enum intel_guc_response_status {
        INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+       INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
+       INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
+       INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
+       INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
        INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
new file mode 100644 (file)
index 0000000..3624abf
--- /dev/null
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2021-2022 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_CAPTURE_FWIF_H
+#define _INTEL_GUC_CAPTURE_FWIF_H
+
+#include <linux/types.h>
+#include "intel_guc_fwif.h"
+
+struct intel_guc;
+struct file;
+
+/**
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+       u32 size;
+       void *data;
+       u32 rd;
+       u32 wr;
+};
+
+/**
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+       /*
+        * A single set of 3 capture lists: a global-list
+        * an engine-class-list and an engine-instance list.
+        * outlist in __guc_capture_parsed_output will keep
+        * a linked list of these nodes that will eventually
+        * be detached from outlist and attached into to
+        * i915_gpu_codedump in response to a context reset
+        */
+       struct list_head link;
+       bool is_partial;
+       u32 eng_class;
+       u32 eng_inst;
+       u32 guc_id;
+       u32 lrca;
+       struct gcap_reg_list_info {
+               u32 vfid;
+               u32 num_regs;
+               struct guc_mmio_reg *regs;
+       } reginfo[GUC_CAPTURE_LIST_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS)
+#define GCAP_PARSED_REGLIST_INDEX_ENGINST  BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
+};
+
+/**
+ * struct guc_debug_capture_list_header / struct guc_debug_capture_list
+ *
+ * As part of ADS registration, these header structures (followed by
+ * an array of 'struct guc_mmio_reg' entries) are used to register with
+ * GuC microkernel the list of registers we want it to dump out prior
+ * to a engine reset.
+ */
+struct guc_debug_capture_list_header {
+       u32 info;
+#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
+} __packed;
+
+struct guc_debug_capture_list {
+       struct guc_debug_capture_list_header header;
+       struct guc_mmio_reg regs[0];
+} __packed;
+
+/**
+ * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
+ *
+ * intel_guc_capture module uses these structures to maintain static
+ * tables (per unique platform) that consists of lists of registers
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr {
+       i915_reg_t reg;
+       u32 flags;
+       u32 mask;
+       const char *regname;
+};
+
+struct __guc_mmio_reg_descr_group {
+       const struct __guc_mmio_reg_descr *list;
+       u32 num_regs;
+       u32 owner; /* see enum guc_capture_owner */
+       u32 type; /* see enum guc_capture_type */
+       u32 engine; /* as per MAX_ENGINE_CLASS */
+       struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
+};
+
+/**
+ * struct guc_state_capture_header_t / struct guc_state_capture_t /
+ * guc_state_capture_group_header_t / guc_state_capture_group_t
+ *
+ * Prior to resetting engines that have hung or faulted, GuC microkernel
+ * reports the engine error-state (register values that was read) by
+ * logging them into the shared GuC log buffer using these hierarchy
+ * of structures.
+ */
+struct guc_state_capture_header_t {
+       u32 owner;
+#define CAP_HDR_CAPTURE_VFID GENMASK(7, 0)
+       u32 info;
+#define CAP_HDR_CAPTURE_TYPE GENMASK(3, 0) /* see enum guc_capture_type */
+#define CAP_HDR_ENGINE_CLASS GENMASK(7, 4) /* see GUC_MAX_ENGINE_CLASSES */
+#define CAP_HDR_ENGINE_INSTANCE GENMASK(11, 8)
+       u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */
+       u32 guc_id; /* if type-instance, context index of hung context, else set to ~0 */
+       u32 num_mmios;
+#define CAP_HDR_NUM_MMIOS GENMASK(9, 0)
+} __packed;
+
+struct guc_state_capture_t {
+       struct guc_state_capture_header_t header;
+       struct guc_mmio_reg mmio_entries[0];
+} __packed;
+
+enum guc_capture_group_types {
+       GUC_STATE_CAPTURE_GROUP_TYPE_FULL,
+       GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL,
+       GUC_STATE_CAPTURE_GROUP_TYPE_MAX,
+};
+
+struct guc_state_capture_group_header_t {
+       u32 owner;
+#define CAP_GRP_HDR_CAPTURE_VFID GENMASK(7, 0)
+       u32 info;
+#define CAP_GRP_HDR_NUM_CAPTURES GENMASK(7, 0)
+#define CAP_GRP_HDR_CAPTURE_TYPE GENMASK(15, 8) /* guc_capture_group_types */
+} __packed;
+
+/* this is the top level structure where an error-capture dump starts */
+struct guc_state_capture_group_t {
+       struct guc_state_capture_group_header_t grp_header;
+       struct guc_state_capture_t capture_entries[0];
+} __packed;
+
+/**
+ * struct __guc_capture_ads_cache
+ *
+ * A structure to cache register lists that were populated and registered
+ * with GuC at startup during ADS registration. This allows much quicker
+ * GuC resets without re-parsing all the tables for the given gt.
+ */
+struct __guc_capture_ads_cache {
+       bool is_valid;
+       void *ptr;
+       size_t size;
+       int status;
+};
+
+/**
+ * struct intel_guc_state_capture
+ *
+ * Internal context of the intel_guc_capture module.
+ */
+struct intel_guc_state_capture {
+       /**
+        * @reglists: static table of register lists used for error-capture state.
+        */
+       const struct __guc_mmio_reg_descr_group *reglists;
+
+       /**
+        * @extlists: allocated table of steered register lists used for error-capture state.
+        *
+        * NOTE: steered registers have multiple instances depending on the HW configuration
+        * (slices or dual-sub-slices) and thus depends on HW fuses discovered at startup
+        */
+       struct __guc_mmio_reg_descr_group *extlists;
+
+       /**
+        * @ads_cache: cached register lists that is ADS format ready
+        */
+       struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
+                                               [GUC_CAPTURE_LIST_TYPE_MAX]
+                                               [GUC_MAX_ENGINE_CLASSES];
+       void *ads_null_cache;
+
+       /**
+        * @cachelist: Pool of pre-allocated nodes for error capture output
+        *
+        * We need this pool of pre-allocated nodes because we cannot
+        * dynamically allocate new nodes when receiving the G2H notification
+        * because the event handlers for all G2H event-processing is called
+        * by the ct processing worker queue and when that queue is being
+        * processed, there is no absoluate guarantee that we are not in the
+        * midst of a GT reset operation (which doesn't allow allocations).
+        */
+       struct list_head cachelist;
+#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
+#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+       int max_mmio_per_node;
+
+       /**
+        * @outlist: Pool of pre-allocated nodes for error capture output
+        *
+        * A linked list of parsed GuC error-capture output data before
+        * reporting with formatting via i915_gpu_coredump. Each node in this linked list shall
+        * contain a single engine-capture including global, engine-class and
+        * engine-instance register dumps as per guc_capture_parsed_output_node
+        */
+       struct list_head outlist;
+};
+
+#endif /* _INTEL_GUC_CAPTURE_FWIF_H */
index 447a976..cda7e4b 100644 (file)
@@ -9,8 +9,9 @@
 #include "gt/intel_gt_pm_irq.h"
 #include "gt/intel_gt_regs.h"
 #include "intel_guc.h"
-#include "intel_guc_slpc.h"
 #include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
+#include "intel_guc_slpc.h"
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
 #include "i915_irq.h"
@@ -362,9 +363,14 @@ int intel_guc_init(struct intel_guc *guc)
        if (ret)
                goto err_fw;
 
-       ret = intel_guc_ads_create(guc);
+       ret = intel_guc_capture_init(guc);
        if (ret)
                goto err_log;
+
+       ret = intel_guc_ads_create(guc);
+       if (ret)
+               goto err_capture;
+
        GEM_BUG_ON(!guc->ads_vma);
 
        ret = intel_guc_ct_init(&guc->ct);
@@ -403,6 +409,8 @@ err_ct:
        intel_guc_ct_fini(&guc->ct);
 err_ads:
        intel_guc_ads_destroy(guc);
+err_capture:
+       intel_guc_capture_destroy(guc);
 err_log:
        intel_guc_log_destroy(&guc->log);
 err_fw:
@@ -430,6 +438,7 @@ void intel_guc_fini(struct intel_guc *guc)
        intel_guc_ct_fini(&guc->ct);
 
        intel_guc_ads_destroy(guc);
+       intel_guc_capture_destroy(guc);
        intel_guc_log_destroy(&guc->log);
        intel_uc_fw_fini(&guc->fw);
 }
index bf70794..4e431c1 100644 (file)
 #include <linux/iosys-map.h>
 #include <linux/xarray.h>
 
-#include "intel_uncore.h"
+#include "intel_guc_ct.h"
 #include "intel_guc_fw.h"
 #include "intel_guc_fwif.h"
-#include "intel_guc_ct.h"
 #include "intel_guc_log.h"
 #include "intel_guc_reg.h"
 #include "intel_guc_slpc_types.h"
 #include "intel_uc_fw.h"
+#include "intel_uncore.h"
 #include "i915_utils.h"
 #include "i915_vma.h"
 
 struct __guc_ads_blob;
+struct intel_guc_state_capture;
 
 /**
  * struct intel_guc - Top level structure of GuC.
@@ -38,6 +39,8 @@ struct intel_guc {
        struct intel_guc_ct ct;
        /** @slpc: sub-structure containing SLPC related data and objects */
        struct intel_guc_slpc slpc;
+       /** @capture: the error-state-capture module's data and objects */
+       struct intel_guc_state_capture *capture;
 
        /** @sched_engine: Global engine used to submit requests to GuC */
        struct i915_sched_engine *sched_engine;
@@ -138,6 +141,8 @@ struct intel_guc {
        bool submission_supported;
        /** @submission_selected: tracks whether the user enabled GuC submission */
        bool submission_selected;
+       /** @submission_initialized: tracks whether GuC submission has been initialised */
+       bool submission_initialized;
        /**
         * @rc_supported: tracks whether we support GuC rc on the current platform
         */
@@ -160,6 +165,8 @@ struct intel_guc {
        struct guc_mmio_reg *ads_regset;
        /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
        u32 ads_golden_ctxt_size;
+       /** @ads_capture_size: size of register lists in the ADS used for error capture */
+       u32 ads_capture_size;
        /** @ads_engine_usage_size: size of engine usage in the ADS */
        u32 ads_engine_usage_size;
 
@@ -431,6 +438,9 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
                                        const u32 *msg, u32 len);
 
+struct intel_engine_cs *
+intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
+
 void intel_guc_find_hung_context(struct intel_engine_cs *engine);
 
 int intel_guc_global_policies_update(struct intel_guc *guc);
index 92cb882..3eabf4c 100644 (file)
@@ -11,6 +11,7 @@
 #include "gt/intel_lrc.h"
 #include "gt/shmem_utils.h"
 #include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
 #include "intel_guc_fwif.h"
 #include "intel_uc.h"
 #include "i915_drv.h"
@@ -86,8 +87,7 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
 
 static u32 guc_ads_capture_size(struct intel_guc *guc)
 {
-       /* FIXME: Allocate a proper capture list */
-       return PAGE_ALIGN(PAGE_SIZE);
+       return PAGE_ALIGN(guc->ads_capture_size);
 }
 
 static u32 guc_ads_private_data_size(struct intel_guc *guc)
@@ -276,15 +276,24 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
        return slot;
 }
 
-static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
-                                         u32 offset, u32 flags)
+#define GUC_REGSET_STEERING(group, instance) ( \
+       FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
+       FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
+       GUC_REGSET_NEEDS_STEERING \
+)
+
+static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
+                                         struct temp_regset *regset,
+                                         i915_reg_t reg, u32 flags)
 {
        u32 count = regset->storage_used - (regset->registers - regset->storage);
-       struct guc_mmio_reg reg = {
+       u32 offset = i915_mmio_reg_offset(reg);
+       struct guc_mmio_reg entry = {
                .offset = offset,
                .flags = flags,
        };
        struct guc_mmio_reg *slot;
+       u8 group, inst;
 
        /*
         * The mmio list is built using separate lists within the driver.
@@ -292,11 +301,22 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
         * register more than once. Do not consider this an error; silently
         * move on if the register is already in the list.
         */
-       if (bsearch(&reg, regset->registers, count,
-                   sizeof(reg), guc_mmio_reg_cmp))
+       if (bsearch(&entry, regset->registers, count,
+                   sizeof(entry), guc_mmio_reg_cmp))
                return 0;
 
-       slot = __mmio_reg_add(regset, &reg);
+       /*
+        * The GuC doesn't have a default steering, so we need to explicitly
+        * steer all registers that need steering. However, we do not keep track
+        * of all the steering ranges, only of those that have a chance of using
+        * a non-default steering from the i915 pov. Instead of adding such
+        * tracking, it is easier to just program the default steering for all
+        * regs that don't need a non-default one.
+        */
+       intel_gt_get_valid_steering_for_reg(gt, reg, &group, &inst);
+       entry.flags |= GUC_REGSET_STEERING(group, inst);
+
+       slot = __mmio_reg_add(regset, &entry);
        if (IS_ERR(slot))
                return PTR_ERR(slot);
 
@@ -311,14 +331,16 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
        return 0;
 }
 
-#define GUC_MMIO_REG_ADD(regset, reg, masked) \
-       guc_mmio_reg_add(regset, \
-                        i915_mmio_reg_offset((reg)), \
+#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
+       guc_mmio_reg_add(gt, \
+                        regset, \
+                        (reg), \
                         (masked) ? GUC_REGSET_MASKED : 0)
 
 static int guc_mmio_regset_init(struct temp_regset *regset,
                                struct intel_engine_cs *engine)
 {
+       struct intel_gt *gt = engine->gt;
        const u32 base = engine->mmio_base;
        struct i915_wa_list *wal = &engine->wa_list;
        struct i915_wa *wa;
@@ -331,26 +353,26 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
         */
        regset->registers = regset->storage + regset->storage_used;
 
-       ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
-       ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
-       ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+       ret |= GUC_MMIO_REG_ADD(gt, regset, RING_MODE_GEN7(base), true);
+       ret |= GUC_MMIO_REG_ADD(gt, regset, RING_HWS_PGA(base), false);
+       ret |= GUC_MMIO_REG_ADD(gt, regset, RING_IMR(base), false);
 
-       if (engine->class == RENDER_CLASS &&
+       if ((engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) &&
            CCS_MASK(engine->gt))
-               ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
 
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
-               ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg);
 
        /* Be extra paranoid and include all whitelist registers. */
        for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
-               ret |= GUC_MMIO_REG_ADD(regset,
+               ret |= GUC_MMIO_REG_ADD(gt, regset,
                                        RING_FORCE_TO_NONPRIV(base, i),
                                        false);
 
        /* add in local MOCS registers */
        for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
-               ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+               ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
 
        return ret ? -1 : 0;
 }
@@ -433,7 +455,7 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc)
 static void fill_engine_enable_masks(struct intel_gt *gt,
                                     struct iosys_map *info_map)
 {
-       info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
+       info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt));
        info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
        info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
        info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
@@ -589,24 +611,119 @@ static void guc_init_golden_context(struct intel_guc *guc)
        GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
 }
 
-static void guc_capture_list_init(struct intel_guc *guc)
+static int
+guc_capture_prep_lists(struct intel_guc *guc)
 {
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
+       struct guc_gt_system_info local_info;
+       struct iosys_map info_map;
+       bool ads_is_mapped;
+       size_t size = 0;
+       void *ptr;
        int i, j;
-       u32 addr_ggtt, offset;
 
-       offset = guc_ads_capture_offset(guc);
-       addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+       ads_is_mapped = !iosys_map_is_null(&guc->ads_map);
+       if (ads_is_mapped) {
+               capture_offset = guc_ads_capture_offset(guc);
+               ads_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma);
+               info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
+                                                offsetof(struct __guc_ads_blob, system_info));
+       } else {
+               memset(&local_info, 0, sizeof(local_info));
+               iosys_map_set_vaddr(&info_map, &local_info);
+               fill_engine_enable_masks(gt, &info_map);
+       }
 
-       /* FIXME: Populate a proper capture list */
+       /* first, set aside the first page for a capture_list with zero descriptors */
+       total_size = PAGE_SIZE;
+       if (ads_is_mapped) {
+               if (!intel_guc_capture_getnullheader(guc, &ptr, &size))
+                       iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+               null_ggtt = ads_ggtt + capture_offset;
+               capture_offset += PAGE_SIZE;
+       }
 
        for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
                for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
-                       ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt);
-                       ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt);
-               }
 
-               ads_blob_write(guc, ads.capture_global[i], addr_ggtt);
+                       /* null list if we dont have said engine or list */
+                       if (!info_map_read(&info_map, engine_enabled_masks[j])) {
+                               if (ads_is_mapped) {
+                                       ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+                                       ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+                               }
+                               continue;
+                       }
+                       if (intel_guc_capture_getlistsize(guc, i,
+                                                         GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+                                                         j, &size)) {
+                               if (ads_is_mapped)
+                                       ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+                               goto engine_instance_list;
+                       }
+                       total_size += size;
+                       if (ads_is_mapped) {
+                               if (total_size > guc->ads_capture_size ||
+                                   intel_guc_capture_getlist(guc, i,
+                                                             GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+                                                             j, &ptr)) {
+                                       ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+                                       continue;
+                               }
+                               ads_blob_write(guc, ads.capture_class[i][j], ads_ggtt +
+                                              capture_offset);
+                               iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+                               capture_offset += size;
+                       }
+engine_instance_list:
+                       if (intel_guc_capture_getlistsize(guc, i,
+                                                         GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+                                                         j, &size)) {
+                               if (ads_is_mapped)
+                                       ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+                               continue;
+                       }
+                       total_size += size;
+                       if (ads_is_mapped) {
+                               if (total_size > guc->ads_capture_size ||
+                                   intel_guc_capture_getlist(guc, i,
+                                                             GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+                                                             j, &ptr)) {
+                                       ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+                                       continue;
+                               }
+                               ads_blob_write(guc, ads.capture_instance[i][j], ads_ggtt +
+                                              capture_offset);
+                               iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+                               capture_offset += size;
+                       }
+               }
+               if (intel_guc_capture_getlistsize(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &size)) {
+                       if (ads_is_mapped)
+                               ads_blob_write(guc, ads.capture_global[i], null_ggtt);
+                       continue;
+               }
+               total_size += size;
+               if (ads_is_mapped) {
+                       if (total_size > guc->ads_capture_size ||
+                           intel_guc_capture_getlist(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0,
+                                                     &ptr)) {
+                               ads_blob_write(guc, ads.capture_global[i], null_ggtt);
+                               continue;
+                       }
+                       ads_blob_write(guc, ads.capture_global[i], ads_ggtt + capture_offset);
+                       iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+                       capture_offset += size;
+               }
        }
+
+       if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size))
+               drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n",
+                        guc->ads_capture_size, PAGE_ALIGN(total_size));
+
+       return PAGE_ALIGN(total_size);
 }
 
 static void __guc_ads_init(struct intel_guc *guc)
@@ -644,8 +761,8 @@ static void __guc_ads_init(struct intel_guc *guc)
 
        base = intel_guc_ggtt_offset(guc, guc->ads_vma);
 
-       /* Capture list for hang debug */
-       guc_capture_list_init(guc);
+       /* Lists for error capture debug */
+       guc_capture_prep_lists(guc);
 
        /* ADS */
        ads_blob_write(guc, ads.scheduler_policies, base +
@@ -693,6 +810,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
                return ret;
        guc->ads_golden_ctxt_size = ret;
 
+       /* Likewise the capture lists: */
+       ret = guc_capture_prep_lists(guc);
+       if (ret < 0)
+               return ret;
+       guc->ads_capture_size = ret;
+
        /* Now the total size can be determined: */
        size = guc_ads_blob_size(guc);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
new file mode 100644 (file)
index 0000000..c4e2596
--- /dev/null
@@ -0,0 +1,1657 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2021-2022 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+#include <drm/drm_print.h>
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
+#include "guc_capture_fwif.h"
+#include "intel_guc_capture.h"
+#include "intel_guc_fwif.h"
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_irq.h"
+#include "i915_memcpy.h"
+#include "i915_reg.h"
+
+/*
+ * Define all device tables of GuC error capture register lists
+ * NOTE: For engine-registers, GuC only needs the register offsets
+ *       from the engine-mmio-base
+ */
+#define COMMON_BASE_GLOBAL \
+       { FORCEWAKE_MT,             0,      0, "FORCEWAKE" }
+
+#define COMMON_GEN9BASE_GLOBAL \
+       { GEN8_FAULT_TLB_DATA0,     0,      0, "GEN8_FAULT_TLB_DATA0" }, \
+       { GEN8_FAULT_TLB_DATA1,     0,      0, "GEN8_FAULT_TLB_DATA1" }, \
+       { ERROR_GEN6,               0,      0, "ERROR_GEN6" }, \
+       { DONE_REG,                 0,      0, "DONE_REG" }, \
+       { HSW_GTT_CACHE_EN,         0,      0, "HSW_GTT_CACHE_EN" }
+
+#define COMMON_GEN12BASE_GLOBAL \
+       { GEN12_FAULT_TLB_DATA0,    0,      0, "GEN12_FAULT_TLB_DATA0" }, \
+       { GEN12_FAULT_TLB_DATA1,    0,      0, "GEN12_FAULT_TLB_DATA1" }, \
+       { GEN12_AUX_ERR_DBG,        0,      0, "AUX_ERR_DBG" }, \
+       { GEN12_GAM_DONE,           0,      0, "GAM_DONE" }, \
+       { GEN12_RING_FAULT_REG,     0,      0, "FAULT_REG" }
+
+#define COMMON_BASE_ENGINE_INSTANCE \
+       { RING_PSMI_CTL(0),         0,      0, "RC PSMI" }, \
+       { RING_ESR(0),              0,      0, "ESR" }, \
+       { RING_DMA_FADD(0),         0,      0, "RING_DMA_FADD_LDW" }, \
+       { RING_DMA_FADD_UDW(0),     0,      0, "RING_DMA_FADD_UDW" }, \
+       { RING_IPEIR(0),            0,      0, "IPEIR" }, \
+       { RING_IPEHR(0),            0,      0, "IPEHR" }, \
+       { RING_INSTPS(0),           0,      0, "INSTPS" }, \
+       { RING_BBADDR(0),           0,      0, "RING_BBADDR_LOW32" }, \
+       { RING_BBADDR_UDW(0),       0,      0, "RING_BBADDR_UP32" }, \
+       { RING_BBSTATE(0),          0,      0, "BB_STATE" }, \
+       { CCID(0),                  0,      0, "CCID" }, \
+       { RING_ACTHD(0),            0,      0, "ACTHD_LDW" }, \
+       { RING_ACTHD_UDW(0),        0,      0, "ACTHD_UDW" }, \
+       { RING_INSTPM(0),           0,      0, "INSTPM" }, \
+       { RING_INSTDONE(0),         0,      0, "INSTDONE" }, \
+       { RING_NOPID(0),            0,      0, "RING_NOPID" }, \
+       { RING_START(0),            0,      0, "START" }, \
+       { RING_HEAD(0),             0,      0, "HEAD" }, \
+       { RING_TAIL(0),             0,      0, "TAIL" }, \
+       { RING_CTL(0),              0,      0, "CTL" }, \
+       { RING_MI_MODE(0),          0,      0, "MODE" }, \
+       { RING_CONTEXT_CONTROL(0),  0,      0, "RING_CONTEXT_CONTROL" }, \
+       { RING_HWS_PGA(0),          0,      0, "HWS" }, \
+       { RING_MODE_GEN7(0),        0,      0, "GFX_MODE" }, \
+       { GEN8_RING_PDP_LDW(0, 0),  0,      0, "PDP0_LDW" }, \
+       { GEN8_RING_PDP_UDW(0, 0),  0,      0, "PDP0_UDW" }, \
+       { GEN8_RING_PDP_LDW(0, 1),  0,      0, "PDP1_LDW" }, \
+       { GEN8_RING_PDP_UDW(0, 1),  0,      0, "PDP1_UDW" }, \
+       { GEN8_RING_PDP_LDW(0, 2),  0,      0, "PDP2_LDW" }, \
+       { GEN8_RING_PDP_UDW(0, 2),  0,      0, "PDP2_UDW" }, \
+       { GEN8_RING_PDP_LDW(0, 3),  0,      0, "PDP3_LDW" }, \
+       { GEN8_RING_PDP_UDW(0, 3),  0,      0, "PDP3_UDW" }
+
+#define COMMON_BASE_HAS_EU \
+       { EIR,                      0,      0, "EIR" }
+
+#define COMMON_BASE_RENDER \
+       { GEN7_SC_INSTDONE,         0,      0, "GEN7_SC_INSTDONE" }
+
+#define COMMON_GEN12BASE_RENDER \
+       { GEN12_SC_INSTDONE_EXTRA,  0,      0, "GEN12_SC_INSTDONE_EXTRA" }, \
+       { GEN12_SC_INSTDONE_EXTRA2, 0,      0, "GEN12_SC_INSTDONE_EXTRA2" }
+
+#define COMMON_GEN12BASE_VEC \
+       { GEN12_SFC_DONE(0),        0,      0, "SFC_DONE[0]" }, \
+       { GEN12_SFC_DONE(1),        0,      0, "SFC_DONE[1]" }, \
+       { GEN12_SFC_DONE(2),        0,      0, "SFC_DONE[2]" }, \
+       { GEN12_SFC_DONE(3),        0,      0, "SFC_DONE[3]" }
+
+/* XE_LPD - Global */
+static const struct __guc_mmio_reg_descr xe_lpd_global_regs[] = {
+       COMMON_BASE_GLOBAL,
+       COMMON_GEN9BASE_GLOBAL,
+       COMMON_GEN12BASE_GLOBAL,
+};
+
+/* XE_LPD - Render / Compute Per-Class */
+static const struct __guc_mmio_reg_descr xe_lpd_rc_class_regs[] = {
+       COMMON_BASE_HAS_EU,
+       COMMON_BASE_RENDER,
+       COMMON_GEN12BASE_RENDER,
+};
+
+/* GEN9/XE_LPD - Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_rc_inst_regs[] = {
+       COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9/XE_LPD - Media Decode/Encode Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_vd_inst_regs[] = {
+       COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* XE_LPD - Video Enhancement Per-Class */
+static const struct __guc_mmio_reg_descr xe_lpd_vec_class_regs[] = {
+       COMMON_GEN12BASE_VEC,
+};
+
+/* GEN9/XE_LPD - Video Enhancement Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_vec_inst_regs[] = {
+       COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9/XE_LPD - Blitter Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = {
+       COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9 - Global */
+static const struct __guc_mmio_reg_descr default_global_regs[] = {
+       COMMON_BASE_GLOBAL,
+       COMMON_GEN9BASE_GLOBAL,
+};
+
+static const struct __guc_mmio_reg_descr default_rc_class_regs[] = {
+       COMMON_BASE_HAS_EU,
+       COMMON_BASE_RENDER,
+};
+
+/*
+ * Empty lists:
+ * GEN9/XE_LPD - Blitter Per-Class
+ * GEN9/XE_LPD - Media Decode/Encode Per-Class
+ * GEN9 - VEC Class
+ */
+static const struct __guc_mmio_reg_descr empty_regs_list[] = {
+};
+
+#define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
+#define TO_GCAP_DEF_TYPE(x) (GUC_CAPTURE_LIST_TYPE_##x)
+#define MAKE_REGLIST(regslist, regsowner, regstype, class) \
+       { \
+               regslist, \
+               ARRAY_SIZE(regslist), \
+               TO_GCAP_DEF_OWNER(regsowner), \
+               TO_GCAP_DEF_TYPE(regstype), \
+               class, \
+               NULL, \
+       }
+
+/* List of lists */
+static struct __guc_mmio_reg_descr_group default_lists[] = {
+       MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
+       MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
+       MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+       MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
+       MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
+       MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
+       MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
+       MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
+       MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
+       {}
+};
+
+static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
+       MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
+       MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
+       MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+       MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
+       MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
+       MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
+       MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
+       MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
+       MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
+       {}
+};
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
+                        u32 owner, u32 type, u32 id)
+{
+       int i;
+
+       if (!reglists)
+               return NULL;
+
+       for (i = 0; reglists[i].list; ++i) {
+               if (reglists[i].owner == owner && reglists[i].type == type &&
+                   (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL))
+                       return &reglists[i];
+       }
+
+       return NULL;
+}
+
+static struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_ext_list(struct __guc_mmio_reg_descr_group *reglists,
+                            u32 owner, u32 type, u32 id)
+{
+       int i;
+
+       if (!reglists)
+               return NULL;
+
+       for (i = 0; reglists[i].extlist; ++i) {
+               if (reglists[i].owner == owner && reglists[i].type == type &&
+                   (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL))
+                       return &reglists[i];
+       }
+
+       return NULL;
+}
+
+static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglists)
+{
+       int i = 0;
+
+       if (!reglists)
+               return;
+
+       while (reglists[i].extlist)
+               kfree(reglists[i++].extlist);
+}
+
+struct __ext_steer_reg {
+       const char *name;
+       i915_reg_t reg;
+};
+
+static const struct __ext_steer_reg xe_extregs[] = {
+       {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
+       {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
+};
+
+static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
+                          const struct __ext_steer_reg *extlist,
+                          int slice_id, int subslice_id)
+{
+       ext->reg = extlist->reg;
+       ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
+       ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
+       ext->regname = extlist->name;
+}
+
+static int
+__alloc_ext_regs(struct __guc_mmio_reg_descr_group *newlist,
+                const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
+{
+       struct __guc_mmio_reg_descr *list;
+
+       list = kcalloc(num_regs, sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
+       if (!list)
+               return -ENOMEM;
+
+       newlist->extlist = list;
+       newlist->num_regs = num_regs;
+       newlist->owner = rootlist->owner;
+       newlist->engine = rootlist->engine;
+       newlist->type = rootlist->type;
+
+       return 0;
+}
+
+static void
+guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc,
+                                      const struct __guc_mmio_reg_descr_group *lists)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       int slice, subslice, i, num_steer_regs, num_tot_regs = 0;
+       const struct __guc_mmio_reg_descr_group *list;
+       struct __guc_mmio_reg_descr_group *extlists;
+       struct __guc_mmio_reg_descr *extarray;
+       struct sseu_dev_info *sseu;
+
+       /* In XE_LPD we only have steered registers for the render-class */
+       list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+                                       GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
+       /* skip if extlists was previously allocated */
+       if (!list || guc->capture->extlists)
+               return;
+
+       num_steer_regs = ARRAY_SIZE(xe_extregs);
+
+       sseu = &gt->info.sseu;
+       for_each_instdone_slice_subslice(i915, sseu, slice, subslice)
+               num_tot_regs += num_steer_regs;
+
+       if (!num_tot_regs)
+               return;
+
+       /* allocate an extra for an end marker */
+       extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+       if (!extlists)
+               return;
+
+       if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) {
+               kfree(extlists);
+               return;
+       }
+
+       extarray = extlists[0].extlist;
+       for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
+               for (i = 0; i < num_steer_regs; ++i) {
+                       __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+                       ++extarray;
+               }
+       }
+
+       guc->capture->extlists = extlists;
+}
+
+static const struct __ext_steer_reg xehpg_extregs[] = {
+       {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG}
+};
+
+static bool __has_xehpg_extregs(u32 ipver)
+{
+       return (ipver >= IP_VER(12, 55));
+}
+
+static void
+guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc,
+                                      const struct __guc_mmio_reg_descr_group *lists,
+                                      u32 ipver)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       struct sseu_dev_info *sseu;
+       int slice, subslice, i, iter, num_steer_regs, num_tot_regs = 0;
+       const struct __guc_mmio_reg_descr_group *list;
+       struct __guc_mmio_reg_descr_group *extlists;
+       struct __guc_mmio_reg_descr *extarray;
+
+       /* In XE_LP / HPG we only have render-class steering registers during error-capture */
+       list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+                                       GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
+       /* skip if extlists was previously allocated */
+       if (!list || guc->capture->extlists)
+               return;
+
+       num_steer_regs = ARRAY_SIZE(xe_extregs);
+       if (__has_xehpg_extregs(ipver))
+               num_steer_regs += ARRAY_SIZE(xehpg_extregs);
+
+       sseu = &gt->info.sseu;
+       for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+               num_tot_regs += num_steer_regs;
+       }
+
+       if (!num_tot_regs)
+               return;
+
+       /* allocate an extra for an end marker */
+       extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+       if (!extlists)
+               return;
+
+       if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) {
+               kfree(extlists);
+               return;
+       }
+
+       extarray = extlists[0].extlist;
+       for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+               for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
+                       __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+                       ++extarray;
+               }
+               if (__has_xehpg_extregs(ipver)) {
+                       for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
+                               __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
+                               ++extarray;
+                       }
+               }
+       }
+
+       drm_dbg(&i915->drm, "GuC-capture found %d-ext-regs.\n", num_tot_regs);
+       guc->capture->extlists = extlists;
+}
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_device_reglist(struct intel_guc *guc)
+{
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+       if (GRAPHICS_VER(i915) > 11) {
+               /*
+                * For certain engine classes, there are slice and subslice
+                * level registers requiring steering. We allocate and populate
+                * these at init time based on hw config add it as an extension
+                * list at the end of the pre-populated render list.
+                */
+               if (IS_DG2(i915))
+                       guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 55));
+               else if (IS_XEHPSDV(i915))
+                       guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 50));
+               else
+                       guc_capture_alloc_steered_lists_xe_lpd(guc, xe_lpd_lists);
+
+               return xe_lpd_lists;
+       }
+
+       /* if GuC submission is enabled on a non-POR platform, just use a common baseline */
+       return default_lists;
+}
+
+static const char *
+__stringify_owner(u32 owner)
+{
+       switch (owner) {
+       case GUC_CAPTURE_LIST_INDEX_PF:
+               return "PF";
+       case GUC_CAPTURE_LIST_INDEX_VF:
+               return "VF";
+       default:
+               return "unknown";
+       }
+
+       return "";
+}
+
+static const char *
+__stringify_type(u32 type)
+{
+       switch (type) {
+       case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+               return "Global";
+       case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+               return "Class";
+       case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+               return "Instance";
+       default:
+               return "unknown";
+       }
+
+       return "";
+}
+
+static const char *
+__stringify_engclass(u32 class)
+{
+       switch (class) {
+       case GUC_RENDER_CLASS:
+               return "Render";
+       case GUC_VIDEO_CLASS:
+               return "Video";
+       case GUC_VIDEOENHANCE_CLASS:
+               return "VideoEnhance";
+       case GUC_BLITTER_CLASS:
+               return "Blitter";
+       case GUC_COMPUTE_CLASS:
+               return "Compute";
+       default:
+               return "unknown";
+       }
+
+       return "";
+}
+
+static void
+guc_capture_warn_with_list_info(struct drm_i915_private *i915, char *msg,
+                               u32 owner, u32 type, u32 classid)
+{
+       if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
+               drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers.\n", msg,
+                       __stringify_owner(owner), __stringify_type(type));
+       else
+               drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg,
+                       __stringify_owner(owner), __stringify_type(type),
+                       __stringify_engclass(classid));
+}
+
+static int
+guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                     struct guc_mmio_reg *ptr, u16 num_entries)
+{
+       u32 i = 0, j = 0;
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+       struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+       const struct __guc_mmio_reg_descr_group *match;
+       struct __guc_mmio_reg_descr_group *matchext;
+
+       if (!reglists)
+               return -ENODEV;
+
+       match = guc_capture_get_one_list(reglists, owner, type, classid);
+       if (!match) {
+               guc_capture_warn_with_list_info(i915, "Missing register list init", owner, type,
+                                               classid);
+               return -ENODATA;
+       }
+
+       for (i = 0; i < num_entries && i < match->num_regs; ++i) {
+               ptr[i].offset = match->list[i].reg.reg;
+               ptr[i].value = 0xDEADF00D;
+               ptr[i].flags = match->list[i].flags;
+               ptr[i].mask = match->list[i].mask;
+       }
+
+       matchext = guc_capture_get_one_ext_list(extlists, owner, type, classid);
+       if (matchext) {
+               for (i = match->num_regs, j = 0; i < num_entries &&
+                    i < (match->num_regs + matchext->num_regs) &&
+                       j < matchext->num_regs; ++i, ++j) {
+                       ptr[i].offset = matchext->extlist[j].reg.reg;
+                       ptr[i].value = 0xDEADF00D;
+                       ptr[i].flags = matchext->extlist[j].flags;
+                       ptr[i].mask = matchext->extlist[j].mask;
+               }
+       }
+       if (i < num_entries)
+               drm_dbg(&i915->drm, "GuC-capture: Init reglist short %d out %d.\n",
+                       (int)i, (int)num_entries);
+
+       return 0;
+}
+
+static int
+guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u32 classid)
+{
+       const struct __guc_mmio_reg_descr_group *match;
+       struct __guc_mmio_reg_descr_group *matchext;
+       int num_regs;
+
+       match = guc_capture_get_one_list(gc->reglists, owner, type, classid);
+       if (!match)
+               return 0;
+
+       num_regs = match->num_regs;
+
+       matchext = guc_capture_get_one_ext_list(gc->extlists, owner, type, classid);
+       if (matchext)
+               num_regs += matchext->num_regs;
+
+       return num_regs;
+}
+
+int
+intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                             size_t *size)
+{
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       struct intel_guc_state_capture *gc = guc->capture;
+       struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
+       int num_regs;
+
+       if (!gc->reglists)
+               return -ENODEV;
+
+       if (cache->is_valid) {
+               *size = cache->size;
+               return cache->status;
+       }
+
+       num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
+       if (!num_regs) {
+               guc_capture_warn_with_list_info(i915, "Missing register list size",
+                                               owner, type, classid);
+               return -ENODATA;
+       }
+
+       *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+                          (num_regs * sizeof(struct guc_mmio_reg)));
+
+       return 0;
+}
+
+static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
+
+int
+intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                         void **outptr)
+{
+       struct intel_guc_state_capture *gc = guc->capture;
+       struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       struct guc_debug_capture_list *listnode;
+       int ret, num_regs;
+       u8 *caplist, *tmp;
+       size_t size = 0;
+
+       if (!gc->reglists)
+               return -ENODEV;
+
+       if (cache->is_valid) {
+               *outptr = cache->ptr;
+               return cache->status;
+       }
+
+       /*
+        * ADS population of input registers is a good
+        * time to pre-allocate cachelist output nodes
+        */
+       guc_capture_create_prealloc_nodes(guc);
+
+       ret = intel_guc_capture_getlistsize(guc, owner, type, classid, &size);
+       if (ret) {
+               cache->is_valid = true;
+               cache->ptr = NULL;
+               cache->size = 0;
+               cache->status = ret;
+               return ret;
+       }
+
+       caplist = kzalloc(size, GFP_KERNEL);
+       if (!caplist) {
+               drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached caplist");
+               return -ENOMEM;
+       }
+
+       /* populate capture list header */
+       tmp = caplist;
+       num_regs = guc_cap_list_num_regs(guc->capture, owner, type, classid);
+       listnode = (struct guc_debug_capture_list *)tmp;
+       listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
+
+       /* populate list of register descriptor */
+       tmp += sizeof(struct guc_debug_capture_list);
+       guc_capture_list_init(guc, owner, type, classid, (struct guc_mmio_reg *)tmp, num_regs);
+
+       /* cache this list */
+       cache->is_valid = true;
+       cache->ptr = caplist;
+       cache->size = size;
+       cache->status = 0;
+
+       *outptr = caplist;
+
+       return 0;
+}
+
+int
+intel_guc_capture_getnullheader(struct intel_guc *guc,
+                               void **outptr, size_t *size)
+{
+       struct intel_guc_state_capture *gc = guc->capture;
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       int tmp = sizeof(u32) * 4;
+       void *null_header;
+
+       if (gc->ads_null_cache) {
+               *outptr = gc->ads_null_cache;
+               *size = tmp;
+               return 0;
+       }
+
+       null_header = kzalloc(tmp, GFP_KERNEL);
+       if (!null_header) {
+               drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached nulllist");
+               return -ENOMEM;
+       }
+
+       gc->ads_null_cache = null_header;
+       *outptr = null_header;
+       *size = tmp;
+
+       return 0;
+}
+
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+int
+intel_guc_capture_output_min_size_est(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int worst_min_size = 0, num_regs = 0;
+       size_t tmp = 0;
+
+       if (!guc->capture)
+               return -ENODEV;
+
+       /*
+        * If every single engine-instance suffered a failure in quick succession but
+        * were all unrelated, then a burst of multiple error-capture events would dump
+        * registers for every one engine instance, one at a time. In this case, GuC
+        * would even dump the global-registers repeatedly.
+        *
+        * For each engine instance, there would be 1 x guc_state_capture_group_t output
+        * followed by 3 x guc_state_capture_t lists. The latter is how the register
+        * dumps are split across different register types (where the '3' are global vs class
+        * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
+        * not limited to just 1 round of data in a worst case full register dump log)
+        *
+        * NOTE: intel_guc_log that allocates the log buffer would round this size up to
+        * a power of two.
+        */
+
+       for_each_engine(engine, gt, id) {
+               worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
+                                        (3 * sizeof(struct guc_state_capture_header_t));
+
+               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
+                       num_regs += tmp;
+
+               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+                                                  engine->class, &tmp)) {
+                       num_regs += tmp;
+               }
+               if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+                                                  engine->class, &tmp)) {
+                       num_regs += tmp;
+               }
+       }
+
+       worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
+
+       return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
+}
+
+/*
+ * KMD Init time flows:
+ * --------------------
+ *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ *                  intel_guc_ads acquires the register lists by calling
+ *                  intel_guc_capture_list_size and intel_guc_capture_list_get 'n' times,
+ *                  where n = 1 for global-reg-list +
+ *                            num_engine_classes for class-reg-list +
+ *                            num_engine_classes for instance-reg-list
+ *                               (since all instances of the same engine-class type
+ *                                have an identical engine-instance register-list).
+ *                  ADS module also calls separately for PF vs VF.
+ *
+ *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ *                  Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ *                  Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ *     --> G2H STATE_CAPTURE_NOTIFICATION
+ *                   L--> intel_guc_capture_process
+ *                           L--> Loop through B (head..tail) and for each engine instance's
+ *                                err-state-captured register-list we find, we alloc 'C':
+ *      --> alloc C: A capture-output-node structure that includes misc capture info along
+ *                   with 3 register list dumps (global, engine-class and engine-instance)
+ *                   This node is created from a pre-allocated list of blank nodes in
+ *                   guc->capture->cachelist and populated with the error-capture
+ *                   data from GuC and then it's added into guc->capture->outlist linked
+ *                   list. This list is used for matchup and printout by i915_gpu_coredump
+ *                   and err_print_gt, (when user invokes the error capture sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ *     --> G2H CONTEXT RESET
+ *                   L--> guc_handle_context_reset --> i915_capture_error_state
+ *                          L--> i915_gpu_coredump(..IS_GUC_CAPTURE) --> gt_record_engines
+ *                               --> capture_engine(..IS_GUC_CAPTURE)
+ *                               L--> intel_guc_capture_get_matching_node is where
+ *                                    detach C from internal linked list and add it into
+ *                                    intel_engine_coredump struct (if the context and
+ *                                    engine of the event notification matches a node
+ *                                    in the link list).
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ *      --> i915_gpu_coredump_copy_to_buffer->
+ *                   L--> err_print_to_sgl --> err_print_gt
+ *                        L--> error_print_guc_captures
+ *                             L--> intel_guc_capture_print_node prints the
+ *                                  register lists values of the attached node
+ *                                  on the error-engine-dump being reported.
+ *                   L--> i915_reset_error_state ... -->__i915_gpu_coredump_free
+ *                        L--> ... cleanup_gt -->
+ *                             L--> intel_guc_capture_free_node returns the
+ *                                  capture-output-node back to the internal
+ *                                  cachelist for reuse.
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+       if (buf->wr >= buf->rd)
+               return (buf->wr - buf->rd);
+       return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+       if (buf->rd > buf->wr)
+               return (buf->size - buf->rd);
+       return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_dw is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_dw(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+                         u32 *dw)
+{
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       int tries = 2;
+       int avail = 0;
+       u32 *src_data;
+
+       if (!guc_capture_buf_cnt(buf))
+               return 0;
+
+       while (tries--) {
+               avail = guc_capture_buf_cnt_to_end(buf);
+               if (avail >= sizeof(u32)) {
+                       src_data = (u32 *)(buf->data + buf->rd);
+                       *dw = *src_data;
+                       buf->rd += 4;
+                       return 4;
+               }
+               if (avail)
+                       drm_dbg(&i915->drm, "GuC-Cap-Logs not dword aligned, skipping.\n");
+               buf->rd = 0;
+       }
+
+       return 0;
+}
+
+static bool
+guc_capture_data_extracted(struct __guc_capture_bufstate *b,
+                          int size, void *dest)
+{
+       if (guc_capture_buf_cnt_to_end(b) >= size) {
+               memcpy(dest, (b->data + b->rd), size);
+               b->rd += size;
+               return true;
+       }
+       return false;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+                             struct guc_state_capture_group_header_t *ghdr)
+{
+       int read = 0;
+       int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+       if (fullsize > guc_capture_buf_cnt(buf))
+               return -1;
+
+       if (guc_capture_data_extracted(buf, fullsize, (void *)ghdr))
+               return 0;
+
+       read += guc_capture_log_remove_dw(guc, buf, &ghdr->owner);
+       read += guc_capture_log_remove_dw(guc, buf, &ghdr->info);
+       if (read != fullsize)
+               return -1;
+
+       return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+                            struct guc_state_capture_header_t *hdr)
+{
+       int read = 0;
+       int fullsize = sizeof(struct guc_state_capture_header_t);
+
+       if (fullsize > guc_capture_buf_cnt(buf))
+               return -1;
+
+       if (guc_capture_data_extracted(buf, fullsize, (void *)hdr))
+               return 0;
+
+       read += guc_capture_log_remove_dw(guc, buf, &hdr->owner);
+       read += guc_capture_log_remove_dw(guc, buf, &hdr->info);
+       read += guc_capture_log_remove_dw(guc, buf, &hdr->lrca);
+       read += guc_capture_log_remove_dw(guc, buf, &hdr->guc_id);
+       read += guc_capture_log_remove_dw(guc, buf, &hdr->num_mmios);
+       if (read != fullsize)
+               return -1;
+
+       return 0;
+}
+
+static int
+guc_capture_log_get_register(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+                            struct guc_mmio_reg *reg)
+{
+       int read = 0;
+       int fullsize = sizeof(struct guc_mmio_reg);
+
+       if (fullsize > guc_capture_buf_cnt(buf))
+               return -1;
+
+       if (guc_capture_data_extracted(buf, fullsize, (void *)reg))
+               return 0;
+
+       read += guc_capture_log_remove_dw(guc, buf, &reg->offset);
+       read += guc_capture_log_remove_dw(guc, buf, &reg->value);
+       read += guc_capture_log_remove_dw(guc, buf, &reg->flags);
+       read += guc_capture_log_remove_dw(guc, buf, &reg->mask);
+       if (read != fullsize)
+               return -1;
+
+       return 0;
+}
+
+static void
+guc_capture_delete_one_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node)
+{
+       int i;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i)
+               kfree(node->reginfo[i].regs);
+       list_del(&node->link);
+       kfree(node);
+}
+
+static void
+guc_capture_delete_prealloc_nodes(struct intel_guc *guc)
+{
+       struct __guc_capture_parsed_output *n, *ntmp;
+
+       /*
+        * NOTE: At the end of driver operation, we must assume that we
+        * have prealloc nodes in both the cachelist as well as outlist
+        * if unclaimed error capture events occurred prior to shutdown.
+        */
+       list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link)
+               guc_capture_delete_one_node(guc, n);
+
+       list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link)
+               guc_capture_delete_one_node(guc, n);
+}
+
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+                            struct list_head *list)
+{
+       list_add_tail(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct intel_guc_state_capture *gc,
+                               struct __guc_capture_parsed_output *node)
+{
+       guc_capture_add_node_to_list(node, &gc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct intel_guc_state_capture *gc,
+                                 struct __guc_capture_parsed_output *node)
+{
+       guc_capture_add_node_to_list(node, &gc->cachelist);
+}
+
+static void
+guc_capture_init_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node)
+{
+       struct guc_mmio_reg *tmp[GUC_CAPTURE_LIST_TYPE_MAX];
+       int i;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+               tmp[i] = node->reginfo[i].regs;
+               memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+                      guc->capture->max_mmio_per_node);
+       }
+       memset(node, 0, sizeof(*node));
+       for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i)
+               node->reginfo[i].regs = tmp[i];
+
+       INIT_LIST_HEAD(&node->link);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct intel_guc *guc)
+{
+       struct __guc_capture_parsed_output *found = NULL;
+
+       if (!list_empty(&guc->capture->cachelist)) {
+               struct __guc_capture_parsed_output *n, *ntmp;
+
+               /* get first avail node from the cache list */
+               list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+                       found = n;
+                       list_del(&n->link);
+                       break;
+               }
+       } else {
+               struct __guc_capture_parsed_output *n, *ntmp;
+
+               /* traverse down and steal back the oldest node already allocated */
+               list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+                       found = n;
+               }
+               if (found)
+                       list_del(&found->link);
+       }
+       if (found)
+               guc_capture_init_node(guc, found);
+
+       return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct intel_guc *guc)
+{
+       struct __guc_capture_parsed_output *new;
+       int i;
+
+       new = kzalloc(sizeof(*new), GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+               new->reginfo[i].regs = kcalloc(guc->capture->max_mmio_per_node,
+                                              sizeof(struct guc_mmio_reg), GFP_KERNEL);
+               if (!new->reginfo[i].regs) {
+                       while (i)
+                               kfree(new->reginfo[--i].regs);
+                       kfree(new);
+                       return NULL;
+               }
+       }
+       guc_capture_init_node(guc, new);
+
+       return new;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct intel_guc *guc, struct __guc_capture_parsed_output *original,
+                      u32 keep_reglist_mask)
+{
+       struct __guc_capture_parsed_output *new;
+       int i;
+
+       new = guc_capture_get_prealloc_node(guc);
+       if (!new)
+               return NULL;
+       if (!original)
+               return new;
+
+       new->is_partial = original->is_partial;
+
+       /* copy reg-lists that we want to clone */
+       for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+               if (keep_reglist_mask & BIT(i)) {
+                       GEM_BUG_ON(original->reginfo[i].num_regs  >
+                                  guc->capture->max_mmio_per_node);
+
+                       memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+                              original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+                       new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+                       new->reginfo[i].vfid  = original->reginfo[i].vfid;
+
+                       if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS) {
+                               new->eng_class = original->eng_class;
+                       } else if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+                               new->eng_inst = original->eng_inst;
+                               new->guc_id = original->guc_id;
+                               new->lrca = original->lrca;
+                       }
+               }
+       }
+
+       return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct intel_guc *guc)
+{
+       struct __guc_capture_parsed_output *node = NULL;
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       int i;
+
+       for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+               node = guc_capture_alloc_one_node(guc);
+               if (!node) {
+                       drm_warn(&i915->drm, "GuC Capture pre-alloc-cache failure\n");
+                       /* dont free the priors, use what we got and cleanup at shutdown */
+                       return;
+               }
+               guc_capture_add_node_to_cachelist(guc->capture, node);
+       }
+}
+
+static int
+guc_get_max_reglist_count(struct intel_guc *guc)
+{
+       int i, j, k, tmp, maxregcount = 0;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+               for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) {
+                       for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) {
+                               if (j == GUC_CAPTURE_LIST_TYPE_GLOBAL && k > 0)
+                                       continue;
+
+                               tmp = guc_cap_list_num_regs(guc->capture, i, j, k);
+                               if (tmp > maxregcount)
+                                       maxregcount = tmp;
+                       }
+               }
+       }
+       if (!maxregcount)
+               maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+       return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct intel_guc *guc)
+{
+       /* skip if we've already done the pre-alloc */
+       if (guc->capture->max_mmio_per_node)
+               return;
+
+       guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+       __guc_capture_create_prealloc_nodes(guc);
+}
+
+static int
+guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf)
+{
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       struct guc_state_capture_group_header_t ghdr = {0};
+       struct guc_state_capture_header_t hdr = {0};
+       struct __guc_capture_parsed_output *node = NULL;
+       struct guc_mmio_reg *regs = NULL;
+       int i, numlists, numregs, ret = 0;
+       enum guc_capture_type datatype;
+       struct guc_mmio_reg tmp;
+       bool is_partial = false;
+
+       i = guc_capture_buf_cnt(buf);
+       if (!i)
+               return -ENODATA;
+       if (i % sizeof(u32)) {
+               drm_warn(&i915->drm, "GuC Capture new entries unaligned\n");
+               ret = -EIO;
+               goto bailout;
+       }
+
+       /* first get the capture group header */
+       if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+               ret = -EIO;
+               goto bailout;
+       }
+       /*
+        * we would typically expect a layout as below where n would be expected to be
+        * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+        * instances being reset together.
+        * ____________________________________________
+        * | Capture Group                            |
+        * | ________________________________________ |
+        * | | Capture Group Header:                | |
+        * | |  - num_captures = 5                  | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture1:                            | |
+        * | |  Hdr: GLOBAL, numregs=a              | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... rega           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture2:                            | |
+        * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regb           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture3:                            | |
+        * | |  Hdr: INSTANCE=RCS, numregs=c        | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regc           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture4:                            | |
+        * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regd           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture5:                            | |
+        * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... rege           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * |__________________________________________|
+        */
+       is_partial = FIELD_GET(CAP_GRP_HDR_CAPTURE_TYPE, ghdr.info);
+       numlists = FIELD_GET(CAP_GRP_HDR_NUM_CAPTURES, ghdr.info);
+
+       while (numlists--) {
+               if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+                       ret = -EIO;
+                       break;
+               }
+
+               datatype = FIELD_GET(CAP_HDR_CAPTURE_TYPE, hdr.info);
+               if (datatype > GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+                       /* unknown capture type - skip over to next capture set */
+                       numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+                       while (numregs--) {
+                               if (guc_capture_log_get_register(guc, buf, &tmp)) {
+                                       ret = -EIO;
+                                       break;
+                               }
+                       }
+                       continue;
+               } else if (node) {
+                       /*
+                        * Based on the current capture type and what we have so far,
+                        * decide if we should add the current node into the internal
+                        * linked list for match-up when i915_gpu_coredump calls later
+                        * (and alloc a blank node for the next set of reglists)
+                        * or continue with the same node or clone the current node
+                        * but only retain the global or class registers (such as the
+                        * case of dependent engine resets).
+                        */
+                       if (datatype == GUC_CAPTURE_LIST_TYPE_GLOBAL) {
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = NULL;
+                       } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS &&
+                                  node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS].num_regs) {
+                               /* Add to list, clone node and duplicate global list */
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = guc_capture_clone_node(guc, node,
+                                                             GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+                       } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE &&
+                                  node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE].num_regs) {
+                               /* Add to list, clone node and duplicate global + class lists */
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = guc_capture_clone_node(guc, node,
+                                                             (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+                                                             GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+                       }
+               }
+
+               if (!node) {
+                       node = guc_capture_get_prealloc_node(guc);
+                       if (!node) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+                       if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL)
+                               drm_dbg(&i915->drm, "GuC Capture missing global dump: %08x!\n",
+                                       datatype);
+               }
+               node->is_partial = is_partial;
+               node->reginfo[datatype].vfid = FIELD_GET(CAP_HDR_CAPTURE_VFID, hdr.owner);
+               switch (datatype) {
+               case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+                       node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+                       node->eng_inst = FIELD_GET(CAP_HDR_ENGINE_INSTANCE, hdr.info);
+                       node->lrca = hdr.lrca;
+                       node->guc_id = hdr.guc_id;
+                       break;
+               case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+                       node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+                       break;
+               default:
+                       break;
+               }
+
+               numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+               if (numregs > guc->capture->max_mmio_per_node) {
+                       drm_dbg(&i915->drm, "GuC Capture list extraction clipped by prealloc!\n");
+                       numregs = guc->capture->max_mmio_per_node;
+               }
+               node->reginfo[datatype].num_regs = numregs;
+               regs = node->reginfo[datatype].regs;
+               i = 0;
+               while (numregs--) {
+                       if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
+                               ret = -EIO;
+                               break;
+                       }
+               }
+       }
+
+bailout:
+       if (node) {
+               /* If we have data, add to linked list for match-up when i915_gpu_coredump calls */
+               for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+                       if (node->reginfo[i].regs) {
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = NULL;
+                               break;
+                       }
+               }
+               if (node) /* else return it back to cache list */
+                       guc_capture_add_node_to_cachelist(guc->capture, node);
+       }
+       return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct intel_guc *guc)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+               GUC_CAPTURE_LOG_BUFFER
+       };
+
+       return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct intel_guc *guc)
+{
+       unsigned int buffer_size, read_offset, write_offset, full_count;
+       struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+       struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+       struct guc_log_buffer_state log_buf_state_local;
+       struct guc_log_buffer_state *log_buf_state;
+       struct __guc_capture_bufstate buf;
+       void *src_data = NULL;
+       bool new_overflow;
+       int ret;
+
+       log_buf_state = guc->log.buf_addr +
+                       (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER);
+       src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER);
+
+       /*
+        * Make a copy of the state structure, inside GuC log buffer
+        * (which is uncached mapped), on the stack to avoid reading
+        * from it multiple times.
+        */
+       memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state));
+       buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER);
+       read_offset = log_buf_state_local.read_ptr;
+       write_offset = log_buf_state_local.sampled_write_ptr;
+       full_count = log_buf_state_local.buffer_full_cnt;
+
+       /* Bookkeeping stuff */
+       guc->log.stats[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file;
+       new_overflow = intel_guc_check_log_buf_overflow(&guc->log, GUC_CAPTURE_LOG_BUFFER,
+                                                       full_count);
+
+       /* Now copy the actual logs. */
+       if (unlikely(new_overflow)) {
+               /* copy the whole buffer in case of overflow */
+               read_offset = 0;
+               write_offset = buffer_size;
+       } else if (unlikely((read_offset > buffer_size) ||
+                       (write_offset > buffer_size))) {
+               drm_err(&i915->drm, "invalid GuC log capture buffer state!\n");
+               /* copy whole buffer as offsets are unreliable */
+               read_offset = 0;
+               write_offset = buffer_size;
+       }
+
+       buf.size = buffer_size;
+       buf.rd = read_offset;
+       buf.wr = write_offset;
+       buf.data = src_data;
+
+       if (!uc->reset_in_progress) {
+               do {
+                       ret = guc_capture_extract_reglists(guc, &buf);
+               } while (ret >= 0);
+       }
+
+       /* Update the state of log buffer err-cap state */
+       log_buf_state->read_ptr = write_offset;
+       log_buf_state->flush_to_file = 0;
+       __guc_capture_flushlog_complete(guc);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+static const char *
+guc_capture_reg_to_str(const struct intel_guc *guc, u32 owner, u32 type,
+                      u32 class, u32 id, u32 offset, u32 *is_ext)
+{
+       const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+       struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+       const struct __guc_mmio_reg_descr_group *match;
+       struct __guc_mmio_reg_descr_group *matchext;
+       int j;
+
+       *is_ext = 0;
+       if (!reglists)
+               return NULL;
+
+       match = guc_capture_get_one_list(reglists, owner, type, id);
+       if (!match)
+               return NULL;
+
+       for (j = 0; j < match->num_regs; ++j) {
+               if (offset == match->list[j].reg.reg)
+                       return match->list[j].regname;
+       }
+       if (extlists) {
+               matchext = guc_capture_get_one_ext_list(extlists, owner, type, id);
+               if (!matchext)
+                       return NULL;
+               for (j = 0; j < matchext->num_regs; ++j) {
+                       if (offset == matchext->extlist[j].reg.reg) {
+                               *is_ext = 1;
+                               return matchext->extlist[j].regname;
+                       }
+               }
+       }
+
+       return NULL;
+}
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#define __out(a, ...) \
+       do { \
+               drm_warn((&(a)->i915->drm), __VA_ARGS__); \
+               i915_error_printf((a), __VA_ARGS__); \
+       } while (0)
+#else
+#define __out(a, ...) \
+       i915_error_printf(a, __VA_ARGS__)
+#endif
+
+#define GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng) \
+       do { \
+               __out(ebuf, "    i915-Eng-Name: %s command stream\n", \
+                     (eng)->name); \
+               __out(ebuf, "    i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \
+               __out(ebuf, "    i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \
+               __out(ebuf, "    i915-Eng-LogicalMask: 0x%08x\n", \
+                     (eng)->logical_mask); \
+       } while (0)
+
+#define GCAP_PRINT_GUC_INST_INFO(ebuf, node) \
+       do { \
+               __out(ebuf, "    GuC-Engine-Inst-Id: 0x%08x\n", \
+                     (node)->eng_inst); \
+               __out(ebuf, "    GuC-Context-Id: 0x%08x\n", (node)->guc_id); \
+               __out(ebuf, "    LRCA: 0x%08x\n", (node)->lrca); \
+       } while (0)
+
+int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
+                                       const struct intel_engine_coredump *ee)
+{
+       const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
+               "full-capture",
+               "partial-capture"
+       };
+       const char *datatype[GUC_CAPTURE_LIST_TYPE_MAX] = {
+               "Global",
+               "Engine-Class",
+               "Engine-Instance"
+       };
+       struct intel_guc_state_capture *cap;
+       struct __guc_capture_parsed_output *node;
+       struct intel_engine_cs *eng;
+       struct guc_mmio_reg *regs;
+       struct intel_guc *guc;
+       const char *str;
+       int numregs, i, j;
+       u32 is_ext;
+
+       if (!ebuf || !ee)
+               return -EINVAL;
+       cap = ee->capture;
+       if (!cap || !ee->engine)
+               return -ENODEV;
+
+       guc = &ee->engine->gt->uc.guc;
+
+       __out(ebuf, "global --- GuC Error Capture on %s command stream:\n",
+             ee->engine->name);
+
+       node = ee->guc_capture_node;
+       if (!node) {
+               __out(ebuf, "  No matching ee-node\n");
+               return 0;
+       }
+
+       __out(ebuf, "Coverage:  %s\n", grptype[node->is_partial]);
+
+       for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+               __out(ebuf, "  RegListType: %s\n",
+                     datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]);
+               __out(ebuf, "    Owner-Id: %d\n", node->reginfo[i].vfid);
+
+               switch (i) {
+               case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+               default:
+                       break;
+               case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+                       __out(ebuf, "    GuC-Eng-Class: %d\n", node->eng_class);
+                       __out(ebuf, "    i915-Eng-Class: %d\n",
+                             guc_class_to_engine_class(node->eng_class));
+                       break;
+               case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+                       eng = intel_guc_lookup_engine(guc, node->eng_class, node->eng_inst);
+                       if (eng)
+                               GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng);
+                       else
+                               __out(ebuf, "    i915-Eng-Lookup Fail!\n");
+                       GCAP_PRINT_GUC_INST_INFO(ebuf, node);
+                       break;
+               }
+
+               numregs = node->reginfo[i].num_regs;
+               __out(ebuf, "    NumRegs: %d\n", numregs);
+               j = 0;
+               while (numregs--) {
+                       regs = node->reginfo[i].regs;
+                       str = guc_capture_reg_to_str(guc, GUC_CAPTURE_LIST_INDEX_PF, i,
+                                                    node->eng_class, 0, regs[j].offset, &is_ext);
+                       if (!str)
+                               __out(ebuf, "      REG-0x%08x", regs[j].offset);
+                       else
+                               __out(ebuf, "      %s", str);
+                       if (is_ext)
+                               __out(ebuf, "[%ld][%ld]",
+                                     FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags),
+                                     FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags));
+                       __out(ebuf, ":  0x%08x\n", regs[j].value);
+                       ++j;
+               }
+       }
+       return 0;
+}
+
+#endif //CONFIG_DRM_I915_CAPTURE_ERROR
+
+void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
+{
+       if (!ee || !ee->guc_capture_node)
+               return;
+
+       guc_capture_add_node_to_cachelist(ee->capture, ee->guc_capture_node);
+       ee->capture = NULL;
+       ee->guc_capture_node = NULL;
+}
+
+void intel_guc_capture_get_matching_node(struct intel_gt *gt,
+                                        struct intel_engine_coredump *ee,
+                                        struct intel_context *ce)
+{
+       struct __guc_capture_parsed_output *n, *ntmp;
+       struct drm_i915_private *i915;
+       struct intel_guc *guc;
+
+       if (!gt || !ee || !ce)
+               return;
+
+       i915 = gt->i915;
+       guc = &gt->uc.guc;
+       if (!guc->capture)
+               return;
+
+       GEM_BUG_ON(ee->guc_capture_node);
+       /*
+        * Look for a matching GuC reported error capture node from
+        * the internal output link-list based on lrca, guc-id and engine
+        * identification.
+        */
+       list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+               if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(ee->engine->guc_id) &&
+                   n->eng_class == GUC_ID_TO_ENGINE_CLASS(ee->engine->guc_id) &&
+                   n->guc_id && n->guc_id == ce->guc_id.id &&
+                   (n->lrca & CTX_GTT_ADDRESS_MASK) && (n->lrca & CTX_GTT_ADDRESS_MASK) ==
+                   (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
+                       list_del(&n->link);
+                       ee->guc_capture_node = n;
+                       ee->capture = guc->capture;
+                       return;
+               }
+       }
+       drm_dbg(&i915->drm, "GuC capture can't match ee to node\n");
+}
+
+void intel_guc_capture_process(struct intel_guc *guc)
+{
+       if (guc->capture)
+               __guc_capture_process_output(guc);
+}
+
+static void
+guc_capture_free_ads_cache(struct intel_guc_state_capture *gc)
+{
+       int i, j, k;
+       struct __guc_capture_ads_cache *cache;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+               for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) {
+                       for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) {
+                               cache = &gc->ads_cache[i][j][k];
+                               if (cache->is_valid)
+                                       kfree(cache->ptr);
+                       }
+               }
+       }
+       kfree(gc->ads_null_cache);
+}
+
+void intel_guc_capture_destroy(struct intel_guc *guc)
+{
+       if (!guc->capture)
+               return;
+
+       guc_capture_free_ads_cache(guc->capture);
+
+       guc_capture_delete_prealloc_nodes(guc);
+
+       guc_capture_free_extlists(guc->capture->extlists);
+       kfree(guc->capture->extlists);
+
+       kfree(guc->capture);
+       guc->capture = NULL;
+}
+
+int intel_guc_capture_init(struct intel_guc *guc)
+{
+       guc->capture = kzalloc(sizeof(*guc->capture), GFP_KERNEL);
+       if (!guc->capture)
+               return -ENOMEM;
+
+       guc->capture->reglists = guc_capture_get_device_reglist(guc);
+
+       INIT_LIST_HEAD(&guc->capture->outlist);
+       INIT_LIST_HEAD(&guc->capture->cachelist);
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
new file mode 100644 (file)
index 0000000..d3d7bd0
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2021-2021 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_CAPTURE_H
+#define _INTEL_GUC_CAPTURE_H
+
+#include <linux/types.h>
+
+struct drm_i915_error_state_buf;
+struct guc_gt_system_info;
+struct intel_engine_coredump;
+struct intel_context;
+struct intel_gt;
+struct intel_guc;
+
+void intel_guc_capture_free_node(struct intel_engine_coredump *ee);
+int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
+                                       const struct intel_engine_coredump *ee);
+void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
+                                        struct intel_context *ce);
+void intel_guc_capture_process(struct intel_guc *guc);
+int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
+int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                             void **outptr);
+int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+                                 size_t *size);
+int intel_guc_capture_getnullheader(struct intel_guc *guc, void **outptr, size_t *size);
+void intel_guc_capture_destroy(struct intel_guc *guc);
+int intel_guc_capture_init(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CAPTURE_H */
index 4b300b6..f21b6de 100644 (file)
@@ -32,8 +32,8 @@
 #define GUC_CLIENT_PRIORITY_NORMAL     3
 #define GUC_CLIENT_PRIORITY_NUM                4
 
-#define GUC_MAX_LRC_DESCRIPTORS                65535
-#define        GUC_INVALID_LRC_ID              GUC_MAX_LRC_DESCRIPTORS
+#define GUC_MAX_CONTEXT_ID             65535
+#define        GUC_INVALID_CONTEXT_ID          GUC_MAX_CONTEXT_ID
 
 #define GUC_RENDER_ENGINE              0
 #define GUC_VIDEO_ENGINE               1
@@ -285,10 +285,13 @@ struct guc_mmio_reg {
        u32 offset;
        u32 value;
        u32 flags;
-       u32 mask;
 #define GUC_REGSET_MASKED              BIT(0)
+#define GUC_REGSET_NEEDS_STEERING      BIT(1)
 #define GUC_REGSET_MASKED_WITH_VALUE   BIT(2)
 #define GUC_REGSET_RESTORE_ONLY                BIT(3)
+#define GUC_REGSET_STEERING_GROUP       GENMASK(15, 12)
+#define GUC_REGSET_STEERING_INSTANCE    GENMASK(23, 20)
+       u32 mask;
 } __packed;
 
 /* GuC register sets */
@@ -311,6 +314,14 @@ enum {
        GUC_CAPTURE_LIST_INDEX_MAX = 2,
 };
 
+/*Register-types of GuC capture register lists */
+enum guc_capture_type {
+       GUC_CAPTURE_LIST_TYPE_GLOBAL = 0,
+       GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+       GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+       GUC_CAPTURE_LIST_TYPE_MAX,
+};
+
 /* GuC Additional Data Struct */
 struct guc_ads {
        struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
new file mode 100644 (file)
index 0000000..e0f65bd
--- /dev/null
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2022 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "gt/intel_hwconfig.h"
+#include "i915_drv.h"
+#include "i915_memcpy.h"
+
+/*
+ * GuC has a blob containing hardware configuration information (HWConfig).
+ * This is formatted as a simple and flexible KLV (Key/Length/Value) table.
+ *
+ * For example, a minimal version could be:
+ *   enum device_attr {
+ *     ATTR_SOME_VALUE = 0,
+ *     ATTR_SOME_MASK  = 1,
+ *   };
+ *
+ *   static const u32 hwconfig[] = {
+ *     ATTR_SOME_VALUE,
+ *     1,              // Value Length in DWords
+ *     8,              // Value
+ *
+ *     ATTR_SOME_MASK,
+ *     3,
+ *     0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000,
+ *   };
+ *
+ * The attribute ids are defined in a hardware spec.
+ */
+
+static int __guc_action_get_hwconfig(struct intel_guc *guc,
+                                    u32 ggtt_offset, u32 ggtt_size)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_GET_HWCONFIG,
+               lower_32_bits(ggtt_offset),
+               upper_32_bits(ggtt_offset),
+               ggtt_size,
+       };
+       int ret;
+
+       ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+       if (ret == -ENXIO)
+               return -ENOENT;
+
+       return ret;
+}
+
+static int guc_hwconfig_discover_size(struct intel_guc *guc, struct intel_hwconfig *hwconfig)
+{
+       int ret;
+
+       /*
+        * Sending a query with zero offset and size will return the
+        * size of the blob.
+        */
+       ret = __guc_action_get_hwconfig(guc, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0)
+               return -EINVAL;
+
+       hwconfig->size = ret;
+       return 0;
+}
+
+static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig *hwconfig)
+{
+       struct i915_vma *vma;
+       u32 ggtt_offset;
+       void *vaddr;
+       int ret;
+
+       GEM_BUG_ON(!hwconfig->size);
+
+       ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr);
+       if (ret)
+               return ret;
+
+       ggtt_offset = intel_guc_ggtt_offset(guc, vma);
+
+       ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
+       if (ret >= 0)
+               memcpy(hwconfig->ptr, vaddr, hwconfig->size);
+
+       i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+
+       return ret;
+}
+
+static bool has_table(struct drm_i915_private *i915)
+{
+       if (IS_ALDERLAKE_P(i915))
+               return true;
+
+       return false;
+}
+
+/**
+ * intel_guc_hwconfig_init - Initialize the HWConfig
+ *
+ * Retrieve the HWConfig table from the GuC and save it locally.
+ * It can then be queried on demand by other users later on.
+ */
+static int guc_hwconfig_init(struct intel_gt *gt)
+{
+       struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+       struct intel_guc *guc = &gt->uc.guc;
+       int ret;
+
+       if (!has_table(gt->i915))
+               return 0;
+
+       ret = guc_hwconfig_discover_size(guc, hwconfig);
+       if (ret)
+               return ret;
+
+       hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL);
+       if (!hwconfig->ptr) {
+               hwconfig->size = 0;
+               return -ENOMEM;
+       }
+
+       ret = guc_hwconfig_fill_buffer(guc, hwconfig);
+       if (ret < 0) {
+               intel_gt_fini_hwconfig(gt);
+               return ret;
+       }
+
+       return 0;
+}
+
+/**
+ * intel_gt_init_hwconfig - Initialize the HWConfig if available
+ *
+ * Retrieve the HWConfig table if available on the current platform.
+ */
+int intel_gt_init_hwconfig(struct intel_gt *gt)
+{
+       if (!intel_uc_uses_guc(&gt->uc))
+               return 0;
+
+       return guc_hwconfig_init(gt);
+}
+
+/**
+ * intel_gt_fini_hwconfig - Finalize the HWConfig
+ *
+ * Free up the memory allocation holding the table.
+ */
+void intel_gt_fini_hwconfig(struct intel_gt *gt)
+{
+       struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+
+       kfree(hwconfig->ptr);
+       hwconfig->size = 0;
+       hwconfig->ptr = NULL;
+}
index b53f61f..74034eb 100644 (file)
@@ -9,9 +9,10 @@
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_memcpy.h"
+#include "intel_guc_capture.h"
 #include "intel_guc_log.h"
 
-static void guc_log_capture_logs(struct intel_guc_log *log);
+static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
 
 /**
  * DOC: GuC firmware log
@@ -25,7 +26,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log);
 static int guc_action_flush_log_complete(struct intel_guc *guc)
 {
        u32 action[] = {
-               INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE
+               INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+               GUC_DEBUG_LOG_BUFFER
        };
 
        return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -136,7 +138,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log)
        smp_wmb();
 
        /* All data has been written, so now move the offset of sub buffer. */
-       relay_reserve(log->relay.channel, log->vma->obj->base.size);
+       relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE);
 
        /* Switch to the next sub buffer */
        relay_flush(log->relay.channel);
@@ -156,9 +158,9 @@ static void *guc_get_write_buffer(struct intel_guc_log *log)
        return relay_reserve(log->relay.channel, 0);
 }
 
-static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
-                                      enum guc_log_buffer_type type,
-                                      unsigned int full_cnt)
+bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log,
+                                     enum guc_log_buffer_type type,
+                                     unsigned int full_cnt)
 {
        unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
        bool overflow = false;
@@ -181,7 +183,7 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
        return overflow;
 }
 
-static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
+unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type)
 {
        switch (type) {
        case GUC_DEBUG_LOG_BUFFER:
@@ -197,7 +199,21 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
        return 0;
 }
 
-static void guc_read_update_log_buffer(struct intel_guc_log *log)
+size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type)
+{
+       enum guc_log_buffer_type i;
+       size_t offset = PAGE_SIZE;/* for the log_buffer_states */
+
+       for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; ++i) {
+               if (i == type)
+                       break;
+               offset += intel_guc_get_log_buffer_size(i);
+       }
+
+       return offset;
+}
+
+static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 {
        unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt;
        struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state;
@@ -212,7 +228,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
                goto out_unlock;
 
        /* Get the pointer to shared GuC log buffer */
-       log_buf_state = src_data = log->relay.buf_addr;
+       src_data = log->buf_addr;
+       log_buf_state = src_data;
 
        /* Get the pointer to local buffer to store the logs */
        log_buf_snapshot_state = dst_data = guc_get_write_buffer(log);
@@ -222,7 +239,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
                 * Used rate limited to avoid deluge of messages, logs might be
                 * getting consumed by User at a slow rate.
                 */
-               DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
+               DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n");
                log->relay.full_count++;
 
                goto out_unlock;
@@ -232,7 +249,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
        src_data += PAGE_SIZE;
        dst_data += PAGE_SIZE;
 
-       for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+       /* For relay logging, we exclude error state capture */
+       for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) {
                /*
                 * Make a copy of the state structure, inside GuC log buffer
                 * (which is uncached mapped), on the stack to avoid reading
@@ -240,14 +258,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
                 */
                memcpy(&log_buf_state_local, log_buf_state,
                       sizeof(struct guc_log_buffer_state));
-               buffer_size = guc_get_log_buffer_size(type);
+               buffer_size = intel_guc_get_log_buffer_size(type);
                read_offset = log_buf_state_local.read_ptr;
                write_offset = log_buf_state_local.sampled_write_ptr;
                full_cnt = log_buf_state_local.buffer_full_cnt;
 
                /* Bookkeeping stuff */
                log->stats[type].flush += log_buf_state_local.flush_to_file;
-               new_overflow = guc_check_log_buf_overflow(log, type, full_cnt);
+               new_overflow = intel_guc_check_log_buf_overflow(log, type, full_cnt);
 
                /* Update the state of shared log buffer */
                log_buf_state->read_ptr = write_offset;
@@ -300,49 +318,43 @@ out_unlock:
        mutex_unlock(&log->relay.lock);
 }
 
-static void capture_logs_work(struct work_struct *work)
+static void copy_debug_logs_work(struct work_struct *work)
 {
        struct intel_guc_log *log =
                container_of(work, struct intel_guc_log, relay.flush_work);
 
-       guc_log_capture_logs(log);
+       guc_log_copy_debuglogs_for_relay(log);
 }
 
-static int guc_log_map(struct intel_guc_log *log)
+static int guc_log_relay_map(struct intel_guc_log *log)
 {
-       void *vaddr;
-
        lockdep_assert_held(&log->relay.lock);
 
-       if (!log->vma)
+       if (!log->vma || !log->buf_addr)
                return -ENODEV;
 
        /*
-        * Create a WC (Uncached for read) vmalloc mapping of log
-        * buffer pages, so that we can directly get the data
-        * (up-to-date) from memory.
+        * WC vmalloc mapping of log buffer pages was done at
+        * GuC Log Init time, but lets keep a ref for book-keeping
         */
-       vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC);
-       if (IS_ERR(vaddr))
-               return PTR_ERR(vaddr);
-
-       log->relay.buf_addr = vaddr;
+       i915_gem_object_get(log->vma->obj);
+       log->relay.buf_in_use = true;
 
        return 0;
 }
 
-static void guc_log_unmap(struct intel_guc_log *log)
+static void guc_log_relay_unmap(struct intel_guc_log *log)
 {
        lockdep_assert_held(&log->relay.lock);
 
-       i915_gem_object_unpin_map(log->vma->obj);
-       log->relay.buf_addr = NULL;
+       i915_gem_object_put(log->vma->obj);
+       log->relay.buf_in_use = false;
 }
 
 void intel_guc_log_init_early(struct intel_guc_log *log)
 {
        mutex_init(&log->relay.lock);
-       INIT_WORK(&log->relay.flush_work, capture_logs_work);
+       INIT_WORK(&log->relay.flush_work, copy_debug_logs_work);
        log->relay.started = false;
 }
 
@@ -357,8 +369,11 @@ static int guc_log_relay_create(struct intel_guc_log *log)
        lockdep_assert_held(&log->relay.lock);
        GEM_BUG_ON(!log->vma);
 
-        /* Keep the size of sub buffers same as shared log buffer */
-       subbuf_size = log->vma->size;
+        /*
+         * Keep the size of sub buffers same as shared log buffer
+         * but GuC log-events excludes the error-state-capture logs
+         */
+       subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE;
 
        /*
         * Store up to 8 snapshots, which is large enough to buffer sufficient
@@ -393,13 +408,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
        log->relay.channel = NULL;
 }
 
-static void guc_log_capture_logs(struct intel_guc_log *log)
+static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 {
        struct intel_guc *guc = log_to_guc(log);
        struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
        intel_wakeref_t wakeref;
 
-       guc_read_update_log_buffer(log);
+       _guc_log_copy_debuglogs_for_relay(log);
 
        /*
         * Generally device is expected to be active only at this
@@ -439,6 +454,7 @@ int intel_guc_log_create(struct intel_guc_log *log)
 {
        struct intel_guc *guc = log_to_guc(log);
        struct i915_vma *vma;
+       void *vaddr;
        u32 guc_log_size;
        int ret;
 
@@ -446,23 +462,28 @@ int intel_guc_log_create(struct intel_guc_log *log)
 
        /*
         *  GuC Log buffer Layout
+        * (this ordering must follow "enum guc_log_buffer_type" definition)
         *
         *  +===============================+ 00B
-        *  |    Crash dump state header    |
-        *  +-------------------------------+ 32B
         *  |      Debug state header       |
+        *  +-------------------------------+ 32B
+        *  |    Crash dump state header    |
         *  +-------------------------------+ 64B
         *  |     Capture state header      |
         *  +-------------------------------+ 96B
         *  |                               |
         *  +===============================+ PAGE_SIZE (4KB)
-        *  |        Crash Dump logs        |
-        *  +===============================+ + CRASH_SIZE
         *  |          Debug logs           |
         *  +===============================+ + DEBUG_SIZE
+        *  |        Crash Dump logs        |
+        *  +===============================+ + CRASH_SIZE
         *  |         Capture logs          |
         *  +===============================+ + CAPTURE_SIZE
         */
+       if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
+               DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
+                        CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
+
        guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
                       CAPTURE_BUFFER_SIZE;
 
@@ -473,6 +494,17 @@ int intel_guc_log_create(struct intel_guc_log *log)
        }
 
        log->vma = vma;
+       /*
+        * Create a WC (Uncached for read) vmalloc mapping up front immediate access to
+        * data from memory during  critical events such as error capture
+        */
+       vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC);
+       if (IS_ERR(vaddr)) {
+               ret = PTR_ERR(vaddr);
+               i915_vma_unpin_and_release(&log->vma, 0);
+               goto err;
+       }
+       log->buf_addr = vaddr;
 
        log->level = __get_default_log_level(log);
        DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n",
@@ -483,13 +515,14 @@ int intel_guc_log_create(struct intel_guc_log *log)
        return 0;
 
 err:
-       DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret);
+       DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret);
        return ret;
 }
 
 void intel_guc_log_destroy(struct intel_guc_log *log)
 {
-       i915_vma_unpin_and_release(&log->vma, 0);
+       log->buf_addr = NULL;
+       i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP);
 }
 
 int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
@@ -534,7 +567,7 @@ out_unlock:
 
 bool intel_guc_log_relay_created(const struct intel_guc_log *log)
 {
-       return log->relay.buf_addr;
+       return log->buf_addr;
 }
 
 int intel_guc_log_relay_open(struct intel_guc_log *log)
@@ -565,7 +598,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
        if (ret)
                goto out_unlock;
 
-       ret = guc_log_map(log);
+       ret = guc_log_relay_map(log);
        if (ret)
                goto out_relay;
 
@@ -615,8 +648,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
        with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
                guc_action_flush_log(guc);
 
-       /* GuC would have updated log buffer by now, so capture it */
-       guc_log_capture_logs(log);
+       /* GuC would have updated log buffer by now, so copy it */
+       guc_log_copy_debuglogs_for_relay(log);
 }
 
 /*
@@ -645,7 +678,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log)
 
        mutex_lock(&log->relay.lock);
        GEM_BUG_ON(!intel_guc_log_relay_created(log));
-       guc_log_unmap(log);
+       guc_log_relay_unmap(log);
        guc_log_relay_destroy(log);
        mutex_unlock(&log->relay.lock);
 }
index d7e1b64..18007e6 100644 (file)
@@ -49,8 +49,9 @@ struct intel_guc;
 struct intel_guc_log {
        u32 level;
        struct i915_vma *vma;
+       void *buf_addr;
        struct {
-               void *buf_addr;
+               bool buf_in_use;
                bool started;
                struct work_struct flush_work;
                struct rchan *channel;
@@ -66,6 +67,10 @@ struct intel_guc_log {
 };
 
 void intel_guc_log_init_early(struct intel_guc_log *log);
+bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type,
+                                     unsigned int full_cnt);
+unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type);
+size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type);
 int intel_guc_log_create(struct intel_guc_log *log);
 void intel_guc_log_destroy(struct intel_guc_log *log);
 
index ac749ab..b170238 100644 (file)
@@ -581,16 +581,12 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
 static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
 {
        struct intel_rps *rps = &slpc_to_gt(slpc)->rps;
-       u32 rp_state_cap;
+       struct intel_rps_freq_caps caps;
 
-       rp_state_cap = intel_rps_read_state_cap(rps);
-
-       slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) *
-                                       GT_FREQUENCY_MULTIPLIER;
-       slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) *
-                                       GT_FREQUENCY_MULTIPLIER;
-       slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
-                                       GT_FREQUENCY_MULTIPLIER;
+       gen6_rps_get_freq_caps(rps, &caps);
+       slpc->rp0_freq = intel_gpu_freq(rps, caps.rp0_freq);
+       slpc->rp1_freq = intel_gpu_freq(rps, caps.rp1_freq);
+       slpc->min_freq = intel_gpu_freq(rps, caps.min_freq);
 
        if (!slpc->boost_freq)
                slpc->boost_freq = slpc->rp0_freq;
index 1ce7e04..e1612c3 100644 (file)
@@ -25,6 +25,7 @@
 #include "gt/intel_ring.h"
 
 #include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
@@ -351,12 +352,12 @@ request_to_scheduling_context(struct i915_request *rq)
 
 static inline bool context_guc_id_invalid(struct intel_context *ce)
 {
-       return ce->guc_id.id == GUC_INVALID_LRC_ID;
+       return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
 }
 
 static inline void set_context_guc_id_invalid(struct intel_context *ce)
 {
-       ce->guc_id.id = GUC_INVALID_LRC_ID;
+       ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
 }
 
 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
@@ -471,7 +472,7 @@ static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
 {
        struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
 
-       GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
+       GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
 
        return &base[index];
 }
@@ -480,7 +481,7 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
 {
        struct intel_context *ce = xa_load(&guc->context_lookup, id);
 
-       GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
+       GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
 
        return ce;
 }
@@ -491,7 +492,7 @@ static int guc_lrc_desc_pool_create(struct intel_guc *guc)
        int ret;
 
        size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) *
-                         GUC_MAX_LRC_DESCRIPTORS);
+                         GUC_MAX_CONTEXT_ID);
        ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool,
                                             (void **)&guc->lrc_desc_pool_vaddr);
        if (ret)
@@ -508,34 +509,23 @@ static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
 
 static inline bool guc_submission_initialized(struct intel_guc *guc)
 {
-       return !!guc->lrc_desc_pool_vaddr;
+       return guc->submission_initialized;
 }
 
-static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
+static inline void _reset_lrc_desc(struct intel_guc *guc, u32 id)
 {
-       if (likely(guc_submission_initialized(guc))) {
-               struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
-               unsigned long flags;
-
-               memset(desc, 0, sizeof(*desc));
+       struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
 
-               /*
-                * xarray API doesn't have xa_erase_irqsave wrapper, so calling
-                * the lower level functions directly.
-                */
-               xa_lock_irqsave(&guc->context_lookup, flags);
-               __xa_erase(&guc->context_lookup, id);
-               xa_unlock_irqrestore(&guc->context_lookup, flags);
-       }
+       memset(desc, 0, sizeof(*desc));
 }
 
-static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
+static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
 {
        return __get_context(guc, id);
 }
 
-static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
-                                          struct intel_context *ce)
+static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
+                                     struct intel_context *ce)
 {
        unsigned long flags;
 
@@ -548,6 +538,24 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
        xa_unlock_irqrestore(&guc->context_lookup, flags);
 }
 
+static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
+{
+       unsigned long flags;
+
+       if (unlikely(!guc_submission_initialized(guc)))
+               return;
+
+       _reset_lrc_desc(guc, id);
+
+       /*
+        * xarray API doesn't have xa_erase_irqsave wrapper, so calling
+        * the lower level functions directly.
+        */
+       xa_lock_irqsave(&guc->context_lookup, flags);
+       __xa_erase(&guc->context_lookup, id);
+       xa_unlock_irqrestore(&guc->context_lookup, flags);
+}
+
 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
 {
        if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
@@ -624,7 +632,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
                                              true, timeout);
 }
 
-static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
+static int try_context_registration(struct intel_context *ce, bool loop);
 
 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
@@ -792,7 +800,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
        GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
        GEM_BUG_ON(context_guc_id_invalid(ce));
        GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
-       GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
+       GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
 
        /* Insert NOOP if this work queue item will wrap the tail pointer. */
        if (wqi_size > wq_space_until_wrap(ce)) {
@@ -920,9 +928,9 @@ register_context:
        if (submit) {
                struct intel_context *ce = request_to_scheduling_context(last);
 
-               if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
+               if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
                             !intel_context_is_banned(ce))) {
-                       ret = guc_lrc_desc_pin(ce, false);
+                       ret = try_context_registration(ce, false);
                        if (unlikely(ret == -EPIPE)) {
                                goto deadlk;
                        } else if (ret == -EBUSY) {
@@ -1806,7 +1814,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
        struct intel_gt *gt = guc_to_gt(guc);
        int ret;
 
-       if (guc->lrc_desc_pool)
+       if (guc->submission_initialized)
                return 0;
 
        ret = guc_lrc_desc_pool_create(guc);
@@ -1825,19 +1833,21 @@ int intel_guc_submission_init(struct intel_guc *guc)
 
        guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
        guc->timestamp.shift = gpm_timestamp_shift(gt);
+       guc->submission_initialized = true;
 
        return 0;
 }
 
 void intel_guc_submission_fini(struct intel_guc *guc)
 {
-       if (!guc->lrc_desc_pool)
+       if (!guc->submission_initialized)
                return;
 
        guc_flush_destroyed_contexts(guc);
        guc_lrc_desc_pool_destroy(guc);
        i915_sched_engine_put(guc->sched_engine);
        bitmap_free(guc->submission_state.guc_ids_bitmap);
+       guc->submission_initialized = false;
 }
 
 static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1884,7 +1894,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
 
        return submission_disabled(guc) || guc->stalled_request ||
                !i915_sched_engine_is_empty(sched_engine) ||
-               !lrc_desc_registered(guc, ce->guc_id.id);
+               !ctx_id_mapped(guc, ce->guc_id.id);
 }
 
 static void guc_submit_request(struct i915_request *rq)
@@ -1941,7 +1951,7 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
                else
                        ida_simple_remove(&guc->submission_state.guc_ids,
                                          ce->guc_id.id);
-               reset_lrc_desc(guc, ce->guc_id.id);
+               clr_ctx_id_mapping(guc, ce->guc_id.id);
                set_context_guc_id_invalid(ce);
        }
        if (!list_empty(&ce->guc_id.link))
@@ -2131,6 +2141,8 @@ static int __guc_action_register_context(struct intel_guc *guc,
                                             0, loop);
 }
 
+static void prepare_context_registration_info(struct intel_context *ce);
+
 static int register_context(struct intel_context *ce, bool loop)
 {
        struct intel_guc *guc = ce_to_guc(ce);
@@ -2141,6 +2153,8 @@ static int register_context(struct intel_context *ce, bool loop)
        GEM_BUG_ON(intel_context_is_child(ce));
        trace_intel_context_register(ce);
 
+       prepare_context_registration_info(ce);
+
        if (intel_context_is_parent(ce))
                ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
                                                      offset, loop);
@@ -2215,20 +2229,15 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
        desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 }
 
-static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
+static void prepare_context_registration_info(struct intel_context *ce)
 {
        struct intel_engine_cs *engine = ce->engine;
-       struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
        struct intel_guc *guc = &engine->gt->uc.guc;
-       u32 desc_idx = ce->guc_id.id;
+       u32 ctx_id = ce->guc_id.id;
        struct guc_lrc_desc *desc;
-       bool context_registered;
-       intel_wakeref_t wakeref;
        struct intel_context *child;
-       int ret = 0;
 
        GEM_BUG_ON(!engine->mask);
-       GEM_BUG_ON(!sched_state_is_init(ce));
 
        /*
         * Ensure LRC + CT vmas are is same region as write barrier is done
@@ -2237,12 +2246,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
        GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
                   i915_gem_object_is_lmem(ce->ring->vma->obj));
 
-       context_registered = lrc_desc_registered(guc, desc_idx);
-
-       reset_lrc_desc(guc, desc_idx);
-       set_lrc_desc_registered(guc, desc_idx, ce);
-
-       desc = __get_lrc_desc(guc, desc_idx);
+       desc = __get_lrc_desc(guc, ctx_id);
        desc->engine_class = engine_class_to_guc_class(engine->class);
        desc->engine_submit_mask = engine->logical_mask;
        desc->hw_context_desc = ce->lrc.lrca;
@@ -2286,6 +2290,24 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
 
                clear_children_join_go_memory(ce);
        }
+}
+
+static int try_context_registration(struct intel_context *ce, bool loop)
+{
+       struct intel_engine_cs *engine = ce->engine;
+       struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
+       struct intel_guc *guc = &engine->gt->uc.guc;
+       intel_wakeref_t wakeref;
+       u32 ctx_id = ce->guc_id.id;
+       bool context_registered;
+       int ret = 0;
+
+       GEM_BUG_ON(!sched_state_is_init(ce));
+
+       context_registered = ctx_id_mapped(guc, ctx_id);
+
+       clr_ctx_id_mapping(guc, ctx_id);
+       set_ctx_id_mapping(guc, ctx_id, ce);
 
        /*
         * The context_lookup xarray is used to determine if the hardware
@@ -2311,7 +2333,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
                }
                spin_unlock_irqrestore(&ce->guc_state.lock, flags);
                if (unlikely(disabled)) {
-                       reset_lrc_desc(guc, desc_idx);
+                       clr_ctx_id_mapping(guc, ctx_id);
                        return 0;       /* Will get registered later */
                }
 
@@ -2327,9 +2349,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
                with_intel_runtime_pm(runtime_pm, wakeref)
                        ret = register_context(ce, loop);
                if (unlikely(ret == -EBUSY)) {
-                       reset_lrc_desc(guc, desc_idx);
+                       clr_ctx_id_mapping(guc, ctx_id);
                } else if (unlikely(ret == -ENODEV)) {
-                       reset_lrc_desc(guc, desc_idx);
+                       clr_ctx_id_mapping(guc, ctx_id);
                        ret = 0;        /* Will get registered later */
                }
        }
@@ -2419,7 +2441,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
                GUC_CONTEXT_DISABLE
        };
 
-       GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
+       GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
 
        GEM_BUG_ON(intel_context_is_child(ce));
        trace_intel_context_sched_disable(ce);
@@ -2516,7 +2538,7 @@ static bool context_cant_unblock(struct intel_context *ce)
 
        return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
                context_guc_id_invalid(ce) ||
-               !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
+               !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
                !intel_context_is_pinned(ce);
 }
 
@@ -2686,7 +2708,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
        bool disabled;
 
        GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
-       GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
+       GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
        GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
        GEM_BUG_ON(context_enabled(ce));
 
@@ -2803,7 +2825,7 @@ static void guc_context_destroy(struct kref *kref)
         */
        spin_lock_irqsave(&guc->submission_state.lock, flags);
        destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
-               !lrc_desc_registered(guc, ce->guc_id.id);
+               !ctx_id_mapped(guc, ce->guc_id.id);
        if (likely(!destroy)) {
                if (!list_empty(&ce->guc_id.link))
                        list_del_init(&ce->guc_id.link);
@@ -3046,7 +3068,7 @@ static void guc_signal_context_fence(struct intel_context *ce)
 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
 {
        return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
-               !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
+               !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
                !submission_disabled(ce_to_guc(ce));
 }
 
@@ -3123,7 +3145,7 @@ static int guc_request_alloc(struct i915_request *rq)
        if (unlikely(ret < 0))
                return ret;
        if (context_needs_register(ce, !!ret)) {
-               ret = guc_lrc_desc_pin(ce, true);
+               ret = try_context_registration(ce, true);
                if (unlikely(ret)) {    /* unwind */
                        if (ret == -EPIPE) {
                                disable_submission(guc);
@@ -3560,7 +3582,7 @@ static void guc_sanitize(struct intel_engine_cs *engine)
        sanitize_hwsp(engine);
 
        /* And scrub the dirty cachelines for the HWSP */
-       clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+       drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
 
        intel_engine_reset_pinned_contexts(engine);
 }
@@ -3595,7 +3617,7 @@ static int guc_resume(struct intel_engine_cs *engine)
        setup_hwsp(engine);
        start_engine(engine);
 
-       if (engine->class == RENDER_CLASS)
+       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
                xehp_enable_ccs_engines(engine);
 
        return 0;
@@ -3614,9 +3636,17 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
 static inline void guc_kernel_context_pin(struct intel_guc *guc,
                                          struct intel_context *ce)
 {
+       /*
+        * Note: we purposefully do not check the returns below because
+        * the registration can only fail if a reset is just starting.
+        * This is called at the end of reset so presumably another reset
+        * isn't happening and even it did this code would be run again.
+        */
+
        if (context_guc_id_invalid(ce))
                pin_guc_id(guc, ce);
-       guc_lrc_desc_pin(ce, true);
+
+       try_context_registration(ce, true);
 }
 
 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
@@ -3634,13 +3664,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
         * Also, after a reset the of the GuC we want to make sure that the
         * information shared with GuC is properly reset. The kernel LRCs are
         * not attached to the gem_context, so they need to be added separately.
-        *
-        * Note: we purposefully do not check the return of guc_lrc_desc_pin,
-        * because that function can only fail if a reset is just starting. This
-        * is at the end of reset so presumably another reset isn't happening
-        * and even it did this code would be run again.
         */
-
        for_each_engine(engine, gt, id) {
                struct intel_context *ce;
 
@@ -3835,32 +3859,32 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
        spin_lock_init(&guc->timestamp.lock);
        INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
 
-       guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
+       guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
        guc->submission_supported = __guc_submission_supported(guc);
        guc->submission_selected = __guc_submission_selected(guc);
 }
 
 static inline struct intel_context *
-g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
+g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
 {
        struct intel_context *ce;
 
-       if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
+       if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
                drm_err(&guc_to_gt(guc)->i915->drm,
-                       "Invalid desc_idx %u", desc_idx);
+                       "Invalid ctx_id %u\n", ctx_id);
                return NULL;
        }
 
-       ce = __get_context(guc, desc_idx);
+       ce = __get_context(guc, ctx_id);
        if (unlikely(!ce)) {
                drm_err(&guc_to_gt(guc)->i915->drm,
-                       "Context is NULL, desc_idx %u", desc_idx);
+                       "Context is NULL, ctx_id %u\n", ctx_id);
                return NULL;
        }
 
        if (unlikely(intel_context_is_child(ce))) {
                drm_err(&guc_to_gt(guc)->i915->drm,
-                       "Context is child, desc_idx %u", desc_idx);
+                       "Context is child, ctx_id %u\n", ctx_id);
                return NULL;
        }
 
@@ -3872,14 +3896,15 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
                                          u32 len)
 {
        struct intel_context *ce;
-       u32 desc_idx = msg[0];
+       u32 ctx_id;
 
        if (unlikely(len < 1)) {
-               drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+               drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
                return -EPROTO;
        }
+       ctx_id = msg[0];
 
-       ce = g2h_context_lookup(guc, desc_idx);
+       ce = g2h_context_lookup(guc, ctx_id);
        if (unlikely(!ce))
                return -EPROTO;
 
@@ -3923,14 +3948,15 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
 {
        struct intel_context *ce;
        unsigned long flags;
-       u32 desc_idx = msg[0];
+       u32 ctx_id;
 
        if (unlikely(len < 2)) {
-               drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+               drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
                return -EPROTO;
        }
+       ctx_id = msg[0];
 
-       ce = g2h_context_lookup(guc, desc_idx);
+       ce = g2h_context_lookup(guc, ctx_id);
        if (unlikely(!ce))
                return -EPROTO;
 
@@ -3938,8 +3964,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
                     (!context_pending_enable(ce) &&
                     !context_pending_disable(ce)))) {
                drm_err(&guc_to_gt(guc)->i915->drm,
-                       "Bad context sched_state 0x%x, desc_idx %u",
-                       ce->guc_state.sched_state, desc_idx);
+                       "Bad context sched_state 0x%x, ctx_id %u\n",
+                       ce->guc_state.sched_state, ctx_id);
                return -EPROTO;
        }
 
@@ -4005,7 +4031,7 @@ static void capture_error_state(struct intel_guc *guc,
 
        intel_engine_set_hung_context(engine, ce);
        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-               i915_capture_error_state(gt, engine->mask);
+               i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
        atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
 }
 
@@ -4037,14 +4063,14 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 {
        struct intel_context *ce;
        unsigned long flags;
-       int desc_idx;
+       int ctx_id;
 
        if (unlikely(len != 1)) {
                drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
                return -EPROTO;
        }
 
-       desc_idx = msg[0];
+       ctx_id = msg[0];
 
        /*
         * The context lookup uses the xarray but lookups only require an RCU lock
@@ -4053,7 +4079,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
         * asynchronously until the reset is done.
         */
        xa_lock_irqsave(&guc->context_lookup, flags);
-       ce = g2h_context_lookup(guc, desc_idx);
+       ce = g2h_context_lookup(guc, ctx_id);
        if (ce)
                intel_context_get(ce);
        xa_unlock_irqrestore(&guc->context_lookup, flags);
@@ -4070,23 +4096,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
                                        const u32 *msg, u32 len)
 {
-       int status;
+       u32 status;
 
        if (unlikely(len != 1)) {
                drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
                return -EPROTO;
        }
 
-       status = msg[0];
-       drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+       status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+       if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+               drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
 
-       /* FIXME: Do something with the capture */
+       intel_guc_capture_process(guc);
 
        return 0;
 }
 
-static struct intel_engine_cs *
-guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
+struct intel_engine_cs *
+intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
 {
        struct intel_gt *gt = guc_to_gt(guc);
        u8 engine_class = guc_class_to_engine_class(guc_class);
@@ -4135,7 +4162,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
        instance = msg[1];
        reason = msg[2];
 
-       engine = guc_lookup_engine(guc, guc_class, instance);
+       engine = intel_guc_lookup_engine(guc, guc_class, instance);
        if (unlikely(!engine)) {
                drm_err(&gt->i915->drm,
                        "Invalid engine %d:%d", guc_class, instance);
index a115894..1df71d0 100644 (file)
@@ -148,7 +148,7 @@ static int intel_guc_steal_guc_ids(void *arg)
        struct i915_request *spin_rq = NULL, *rq, *last = NULL;
        int number_guc_id_stolen = guc->number_guc_id_stolen;
 
-       ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
+       ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
        if (!ce) {
                pr_err("Context array allocation failed\n");
                return -ENOMEM;
index 946bbe5..e6fcfde 100644 (file)
@@ -307,7 +307,8 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 
        gpu = NULL;
        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-               gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES);
+               gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+
        if (IS_ERR(gpu))
                return PTR_ERR(gpu);
 
@@ -578,8 +579,9 @@ static int i915_wedged_get(void *data, u64 *val)
 static int i915_wedged_set(void *data, u64 val)
 {
        struct drm_i915_private *i915 = data;
+       intel_gt_debugfs_reset_store(to_gt(i915), val);
 
-       return intel_gt_debugfs_reset_store(to_gt(i915), val);
+       return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
@@ -727,15 +729,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
        struct drm_i915_private *i915 = inode->i_private;
+       intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
 
-       return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
+       return 0;
 }
 
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
        struct drm_i915_private *i915 = inode->i_private;
+       intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
 
-       return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
+       return 0;
 }
 
 static const struct file_operations i915_forcewake_fops = {
index 62b3f33..d6e0106 100644 (file)
@@ -76,6 +76,7 @@
 #include "i915_file_private.h"
 #include "i915_debugfs.h"
 #include "i915_driver.h"
+#include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_getparam.h"
 #include "i915_ioc32.h"
@@ -320,9 +321,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
        intel_device_info_subplatform_init(dev_priv);
        intel_step_init(dev_priv);
 
-       intel_gt_init_early(to_gt(dev_priv), dev_priv);
        intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
-       intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv));
 
        spin_lock_init(&dev_priv->irq_lock);
        spin_lock_init(&dev_priv->gpu_error.lock);
@@ -353,7 +352,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
        intel_wopcm_init_early(&dev_priv->wopcm);
 
-       __intel_gt_init_early(to_gt(dev_priv), dev_priv);
+       intel_root_gt_init_early(dev_priv);
+
+       i915_drm_clients_init(&dev_priv->clients, dev_priv);
 
        i915_gem_init_early(dev_priv);
 
@@ -374,7 +375,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 err_gem:
        i915_gem_cleanup_early(dev_priv);
-       intel_gt_driver_late_release(to_gt(dev_priv));
+       intel_gt_driver_late_release_all(dev_priv);
+       i915_drm_clients_fini(&dev_priv->clients);
        intel_region_ttm_device_fini(dev_priv);
 err_ttm:
        vlv_suspend_cleanup(dev_priv);
@@ -393,7 +395,8 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
        intel_irq_fini(dev_priv);
        intel_power_domains_cleanup(dev_priv);
        i915_gem_cleanup_early(dev_priv);
-       intel_gt_driver_late_release(to_gt(dev_priv));
+       intel_gt_driver_late_release_all(dev_priv);
+       i915_drm_clients_fini(&dev_priv->clients);
        intel_region_ttm_device_fini(dev_priv);
        vlv_suspend_cleanup(dev_priv);
        i915_workqueues_cleanup(dev_priv);
@@ -424,13 +427,9 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
        if (ret < 0)
                return ret;
 
-       ret = intel_uncore_setup_mmio(&dev_priv->uncore);
-       if (ret < 0)
-               goto err_bridge;
-
        ret = intel_uncore_init_mmio(&dev_priv->uncore);
        if (ret)
-               goto err_mmio;
+               return ret;
 
        /* Try to make sure MCHBAR is enabled before poking at it */
        intel_setup_mchbar(dev_priv);
@@ -448,9 +447,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 err_uncore:
        intel_teardown_mchbar(dev_priv);
        intel_uncore_fini_mmio(&dev_priv->uncore);
-err_mmio:
-       intel_uncore_cleanup_mmio(&dev_priv->uncore);
-err_bridge:
        pci_dev_put(dev_priv->bridge_dev);
 
        return ret;
@@ -464,7 +460,6 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
 {
        intel_teardown_mchbar(dev_priv);
        intel_uncore_fini_mmio(&dev_priv->uncore);
-       intel_uncore_cleanup_mmio(&dev_priv->uncore);
        pci_dev_put(dev_priv->bridge_dev);
 }
 
@@ -597,7 +592,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
        if (ret)
                goto err_ggtt;
 
-       ret = intel_gt_probe_lmem(to_gt(dev_priv));
+       ret = intel_gt_tiles_init(dev_priv);
        if (ret)
                goto err_mem_regions;
 
@@ -847,10 +842,14 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        intel_vgpu_detect(i915);
 
-       ret = i915_driver_mmio_probe(i915);
+       ret = intel_gt_probe_all(i915);
        if (ret < 0)
                goto out_runtime_pm_put;
 
+       ret = i915_driver_mmio_probe(i915);
+       if (ret < 0)
+               goto out_tiles_cleanup;
+
        ret = i915_driver_hw_probe(i915);
        if (ret < 0)
                goto out_cleanup_mmio;
@@ -907,6 +906,8 @@ out_cleanup_hw:
        i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
        i915_driver_mmio_release(i915);
+out_tiles_cleanup:
+       intel_gt_release_all(i915);
 out_runtime_pm_put:
        enable_rpm_wakeref_asserts(&i915->runtime_pm);
        i915_driver_late_release(i915);
@@ -1010,6 +1011,7 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
        struct drm_i915_file_private *file_priv = file->driver_priv;
 
        i915_gem_context_close(file);
+       i915_drm_client_put(file_priv->client);
 
        kfree_rcu(file_priv, rcu);
 
@@ -1740,6 +1742,9 @@ static const struct file_operations i915_driver_fops = {
        .read = drm_read,
        .compat_ioctl = i915_ioc32_compat_ioctl,
        .llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+       .show_fdinfo = i915_drm_client_fdinfo,
+#endif
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
new file mode 100644 (file)
index 0000000..e539f6b
--- /dev/null
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2020 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <uapi/drm/i915_drm.h>
+
+#include <drm/drm_print.h>
+
+#include "gem/i915_gem_context.h"
+#include "i915_drm_client.h"
+#include "i915_file_private.h"
+#include "i915_gem.h"
+#include "i915_utils.h"
+
+void i915_drm_clients_init(struct i915_drm_clients *clients,
+                          struct drm_i915_private *i915)
+{
+       clients->i915 = i915;
+       clients->next_id = 0;
+
+       xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+}
+
+struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients)
+{
+       struct i915_drm_client *client;
+       struct xarray *xa = &clients->xarray;
+       int ret;
+
+       client = kzalloc(sizeof(*client), GFP_KERNEL);
+       if (!client)
+               return ERR_PTR(-ENOMEM);
+
+       xa_lock_irq(xa);
+       ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b,
+                               &clients->next_id, GFP_KERNEL);
+       xa_unlock_irq(xa);
+       if (ret < 0)
+               goto err;
+
+       kref_init(&client->kref);
+       spin_lock_init(&client->ctx_lock);
+       INIT_LIST_HEAD(&client->ctx_list);
+       client->clients = clients;
+
+       return client;
+
+err:
+       kfree(client);
+
+       return ERR_PTR(ret);
+}
+
+void __i915_drm_client_free(struct kref *kref)
+{
+       struct i915_drm_client *client =
+               container_of(kref, typeof(*client), kref);
+       struct xarray *xa = &client->clients->xarray;
+       unsigned long flags;
+
+       xa_lock_irqsave(xa, flags);
+       __xa_erase(xa, client->id);
+       xa_unlock_irqrestore(xa, flags);
+       kfree(client);
+}
+
+void i915_drm_clients_fini(struct i915_drm_clients *clients)
+{
+       GEM_BUG_ON(!xa_empty(&clients->xarray));
+       xa_destroy(&clients->xarray);
+}
+
+#ifdef CONFIG_PROC_FS
+static const char * const uabi_class_names[] = {
+       [I915_ENGINE_CLASS_RENDER] = "render",
+       [I915_ENGINE_CLASS_COPY] = "copy",
+       [I915_ENGINE_CLASS_VIDEO] = "video",
+       [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
+};
+
+static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
+{
+       struct i915_gem_engines_iter it;
+       struct intel_context *ce;
+       u64 total = 0;
+
+       for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
+               if (ce->engine->uabi_class != class)
+                       continue;
+
+               total += intel_context_get_total_runtime_ns(ce);
+       }
+
+       return total;
+}
+
+static void
+show_client_class(struct seq_file *m,
+                 struct i915_drm_client *client,
+                 unsigned int class)
+{
+       const struct list_head *list = &client->ctx_list;
+       u64 total = atomic64_read(&client->past_runtime[class]);
+       const unsigned int capacity =
+               client->clients->i915->engine_uabi_class_count[class];
+       struct i915_gem_context *ctx;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ctx, list, client_link)
+               total += busy_add(ctx, class);
+       rcu_read_unlock();
+
+       seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+                  uabi_class_names[class], total);
+
+       if (capacity > 1)
+               seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
+                          uabi_class_names[class],
+                          capacity);
+}
+
+void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
+{
+       struct drm_file *file = f->private_data;
+       struct drm_i915_file_private *file_priv = file->driver_priv;
+       struct drm_i915_private *i915 = file_priv->dev_priv;
+       struct i915_drm_client *client = file_priv->client;
+       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+       unsigned int i;
+
+       /*
+        * ******************************************************************
+        * For text output format description please see drm-usage-stats.rst!
+        * ******************************************************************
+        */
+
+       seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name);
+       seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n",
+                  pci_domain_nr(pdev->bus), pdev->bus->number,
+                  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+       seq_printf(m, "drm-client-id:\t%u\n", client->id);
+
+       if (GRAPHICS_VER(i915) < 8)
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+               show_client_class(m, client, i);
+}
+#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
new file mode 100644 (file)
index 0000000..5f5b02b
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2020 Intel Corporation
+ */
+
+#ifndef __I915_DRM_CLIENT_H__
+#define __I915_DRM_CLIENT_H__
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/xarray.h>
+
+#include "gt/intel_engine_types.h"
+
+#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
+
+struct drm_i915_private;
+
+struct i915_drm_clients {
+       struct drm_i915_private *i915;
+
+       struct xarray xarray;
+       u32 next_id;
+};
+
+struct i915_drm_client {
+       struct kref kref;
+
+       unsigned int id;
+
+       spinlock_t ctx_lock; /* For add/remove from ctx_list. */
+       struct list_head ctx_list; /* List of contexts belonging to client. */
+
+       struct i915_drm_clients *clients;
+
+       /**
+        * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
+        */
+       atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+};
+
+void i915_drm_clients_init(struct i915_drm_clients *clients,
+                          struct drm_i915_private *i915);
+
+static inline struct i915_drm_client *
+i915_drm_client_get(struct i915_drm_client *client)
+{
+       kref_get(&client->kref);
+       return client;
+}
+
+void __i915_drm_client_free(struct kref *kref);
+
+static inline void i915_drm_client_put(struct i915_drm_client *client)
+{
+       kref_put(&client->kref, __i915_drm_client_free);
+}
+
+struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients);
+
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+void i915_drm_clients_fini(struct i915_drm_clients *clients);
+
+#endif /* !__I915_DRM_CLIENT_H__ */
index fa14da8..fd8a423 100644 (file)
@@ -66,6 +66,7 @@
 #include "gt/intel_workarounds.h"
 #include "gt/uc/intel_uc.h"
 
+#include "i915_drm_client.h"
 #include "i915_gem.h"
 #include "i915_gpu_error.h"
 #include "i915_params.h"
@@ -532,6 +533,7 @@ struct drm_i915_private {
        struct pci_dev *bridge_dev;
 
        struct rb_root uabi_engines;
+       unsigned int engine_uabi_class_count[I915_LAST_UABI_ENGINE_CLASS + 1];
 
        struct resource mch_res;
 
@@ -808,6 +810,14 @@ struct drm_i915_private {
        /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
        struct intel_gt gt0;
 
+       /*
+        * i915->gt[0] == &i915->gt0
+        */
+#define I915_MAX_GT 4
+       struct intel_gt *gt[I915_MAX_GT];
+
+       struct kobject *sysfs_gt;
+
        struct {
                struct i915_gem_contexts {
                        spinlock_t lock; /* locks list */
@@ -846,6 +856,8 @@ struct drm_i915_private {
 
        struct i915_pmu pmu;
 
+       struct i915_drm_clients clients;
+
        struct i915_hdcp_comp_master *hdcp_master;
        bool hdcp_comp_added;
 
@@ -1237,6 +1249,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
        ((gt)->info.engine_mask &                                               \
         GENMASK(first__ + count__ - 1, first__)) >> first__;           \
 })
+#define RCS_MASK(gt) \
+       ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
 #define VDBOX_MASK(gt) \
        ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
 #define VEBOX_MASK(gt) \
@@ -1398,6 +1412,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_GUC_DEPRIVILEGE(dev_priv) \
        (INTEL_INFO(dev_priv)->has_guc_deprivilege)
 
+#define HAS_PERCTX_PREEMPT_CTRL(i915) \
+       ((GRAPHICS_VER(i915) >= 9) &&  GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
+
 static inline bool run_as_guest(void)
 {
        return !hypervisor_is_type(X86_HYPER_NATIVE);
index fb16cc4..f428778 100644 (file)
@@ -12,6 +12,7 @@
 
 struct drm_i915_private;
 struct drm_file;
+struct i915_drm_client;
 
 struct drm_i915_file_private {
        struct drm_i915_private *dev_priv;
@@ -103,6 +104,8 @@ struct drm_i915_file_private {
        /** ban_score: Accumulated score of all ctx bans and fast hangs. */
        atomic_t ban_score;
        unsigned long hang_timestamp;
+
+       struct i915_drm_client *client;
 };
 
 #endif /* __I915_FILE_PRIVATE_H__ */
index 2e10187..702e5b8 100644 (file)
@@ -118,6 +118,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
                           unsigned long flags)
 {
        struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
+       bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
        LIST_HEAD(still_in_list);
        intel_wakeref_t wakeref;
        struct i915_vma *vma;
@@ -142,8 +143,6 @@ try_again:
        while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
                                                       struct i915_vma,
                                                       obj_link))) {
-               struct i915_address_space *vm = vma->vm;
-
                list_move_tail(&vma->obj_link, &still_in_list);
                if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
                        continue;
@@ -153,40 +152,44 @@ try_again:
                        break;
                }
 
+               /*
+                * Requiring the vm destructor to take the object lock
+                * before destroying a vma would help us eliminate the
+                * i915_vm_tryget() here, AND thus also the barrier stuff
+                * at the end. That's an easy fix, but sleeping locks in
+                * a kthread should generally be avoided.
+                */
                ret = -EAGAIN;
-               if (!i915_vm_tryopen(vm))
+               if (!i915_vm_tryget(vma->vm))
                        break;
 
-               /* Prevent vma being freed by i915_vma_parked as we unbind */
-               vma = __i915_vma_get(vma);
                spin_unlock(&obj->vma.lock);
 
-               if (vma) {
-                       bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
-                       ret = -EBUSY;
-                       if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
-                               assert_object_held(vma->obj);
-                               ret = i915_vma_unbind_async(vma, vm_trylock);
-                       }
+               /*
+                * Since i915_vma_parked() takes the object lock
+                * before vma destruction, it won't race us here,
+                * and destroy the vma from under us.
+                */
 
-                       if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-                                             !i915_vma_is_active(vma))) {
-                               if (vm_trylock) {
-                                       if (mutex_trylock(&vma->vm->mutex)) {
-                                               ret = __i915_vma_unbind(vma);
-                                               mutex_unlock(&vma->vm->mutex);
-                                       } else {
-                                               ret = -EBUSY;
-                                       }
-                               } else {
-                                       ret = i915_vma_unbind(vma);
+               ret = -EBUSY;
+               if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+                       assert_object_held(vma->obj);
+                       ret = i915_vma_unbind_async(vma, vm_trylock);
+               }
+
+               if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+                                     !i915_vma_is_active(vma))) {
+                       if (vm_trylock) {
+                               if (mutex_trylock(&vma->vm->mutex)) {
+                                       ret = __i915_vma_unbind(vma);
+                                       mutex_unlock(&vma->vm->mutex);
                                }
+                       } else {
+                               ret = i915_vma_unbind(vma);
                        }
-
-                       __i915_vma_put(vma);
                }
 
-               i915_vm_close(vm);
+               i915_vm_put(vma->vm);
                spin_lock(&obj->vma.lock);
        }
        list_splice_init(&still_in_list, &obj->vma.list);
@@ -936,8 +939,19 @@ new_vma:
                        if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
                                return ERR_PTR(-ENOSPC);
 
+                       /*
+                        * If this misplaced vma is too big (i.e, at-least
+                        * half the size of aperture) or hasn't been pinned
+                        * mappable before, we ignore the misplacement when
+                        * PIN_NONBLOCK is set in order to avoid the ping-pong
+                        * issue described above. In other words, we try to
+                        * avoid the costly operation of unbinding this vma
+                        * from the GGTT and rebinding it back because there
+                        * may not be enough space for this vma in the aperture.
+                        */
                        if (flags & PIN_MAPPABLE &&
-                           vma->fence_size > ggtt->mappable_end / 2)
+                           (vma->fence_size > ggtt->mappable_end / 2 ||
+                           !i915_vma_is_map_and_fenceable(vma)))
                                return ERR_PTR(-ENOSPC);
                }
 
@@ -1213,25 +1227,40 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 {
        struct drm_i915_file_private *file_priv;
-       int ret;
+       struct i915_drm_client *client;
+       int ret = -ENOMEM;
 
        DRM_DEBUG("\n");
 
        file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
        if (!file_priv)
-               return -ENOMEM;
+               goto err_alloc;
+
+       client = i915_drm_client_add(&i915->clients);
+       if (IS_ERR(client)) {
+               ret = PTR_ERR(client);
+               goto err_client;
+       }
 
        file->driver_priv = file_priv;
        file_priv->dev_priv = i915;
        file_priv->file = file;
+       file_priv->client = client;
 
        file_priv->bsd_engine = -1;
        file_priv->hang_timestamp = jiffies;
 
        ret = i915_gem_context_open(i915, file);
        if (ret)
-               kfree(file_priv);
+               goto err_context;
+
+       return 0;
 
+err_context:
+       i915_drm_client_put(client);
+err_client:
+       kfree(file_priv);
+err_alloc:
        return ret;
 }
 
index 1d04255..7d09f92 100644 (file)
@@ -46,6 +46,7 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_regs.h"
+#include "gt/uc/intel_guc_capture.h"
 
 #include "i915_driver.h"
 #include "i915_drv.h"
@@ -508,13 +509,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
                                const char *header,
                                const struct i915_gem_context_coredump *ctx)
 {
-       const u32 period = to_gt(m->i915)->clock_period_ns;
-
        err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
                   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
                   ctx->guilty, ctx->active,
-                  ctx->total_runtime * period,
-                  mul_u32_u32(ctx->avg_runtime, period));
+                  ctx->total_runtime, ctx->avg_runtime);
 }
 
 static struct i915_vma_coredump *
@@ -529,8 +527,8 @@ __find_vma(struct i915_vma_coredump *vma, const char *name)
        return NULL;
 }
 
-static struct i915_vma_coredump *
-find_batch(const struct intel_engine_coredump *ee)
+struct i915_vma_coredump *
+intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
 {
        return __find_vma(ee->vma, "batch");
 }
@@ -558,7 +556,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 
        error_print_instdone(m, ee);
 
-       batch = find_batch(ee);
+       batch = intel_gpu_error_find_batch(ee);
        if (batch) {
                u64 start = batch->gtt_offset;
                u64 end = start + batch->gtt_size;
@@ -593,15 +591,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
                                   ee->vm_info.pp_dir_base);
                }
        }
-       err_printf(m, "  hung: %u\n", ee->hung);
-       err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
        for (n = 0; n < ee->num_ports; n++) {
                err_printf(m, "  ELSP[%d]:", n);
                error_print_request(m, " ", &ee->execlist[n]);
        }
-
-       error_print_context(m, "  Active context: ", &ee->context);
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -613,9 +607,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
        va_end(args);
 }
 
-static void print_error_vma(struct drm_i915_error_state_buf *m,
-                           const struct intel_engine_cs *engine,
-                           const struct i915_vma_coredump *vma)
+void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+                              const struct intel_engine_cs *engine,
+                              const struct i915_vma_coredump *vma)
 {
        char out[ASCII85_BUFSZ];
        struct page *page;
@@ -684,7 +678,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
 
        intel_uc_fw_dump(&error_uc->guc_fw, &p);
        intel_uc_fw_dump(&error_uc->huc_fw, &p);
-       print_error_vma(m, NULL, error_uc->guc_log);
+       intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
 }
 
 static void err_free_sgl(struct scatterlist *sgl)
@@ -710,26 +704,33 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m,
        struct drm_printer p = i915_error_printer(m);
 
        intel_gt_info_print(&gt->info, &p);
-       intel_sseu_print_topology(&gt->info.sseu, &p);
+       intel_sseu_print_topology(gt->_gt->i915, &gt->info.sseu, &p);
 }
 
-static void err_print_gt(struct drm_i915_error_state_buf *m,
-                        struct intel_gt_coredump *gt)
+static void err_print_gt_display(struct drm_i915_error_state_buf *m,
+                                struct intel_gt_coredump *gt)
+{
+       err_printf(m, "IER: 0x%08x\n", gt->ier);
+       err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
+
+static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
+                                      struct intel_gt_coredump *gt)
 {
-       const struct intel_engine_coredump *ee;
        int i;
 
        err_printf(m, "GT awake: %s\n", yesno(gt->awake));
        err_printf(m, "EIR: 0x%08x\n", gt->eir);
-       err_printf(m, "IER: 0x%08x\n", gt->ier);
+       err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+
        for (i = 0; i < gt->ngtier; i++)
                err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
-       err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
-       err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
-       err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
 
-       for (i = 0; i < gt->nfence; i++)
-               err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
+static void err_print_gt_global(struct drm_i915_error_state_buf *m,
+                               struct intel_gt_coredump *gt)
+{
+       err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
 
        if (IS_GRAPHICS_VER(m->i915, 6, 11)) {
                err_printf(m, "ERROR: 0x%08x\n", gt->error);
@@ -752,7 +753,7 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
        if (GRAPHICS_VER(m->i915) >= 12) {
                int i;
 
-               for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+               for (i = 0; i < I915_MAX_SFC; i++) {
                        /*
                         * SFC_DONE resides in the VD forcewake domain, so it
                         * only exists if the corresponding VCS engine is
@@ -768,19 +769,38 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
 
                err_printf(m, "  GAM_DONE: 0x%08x\n", gt->gam_done);
        }
+}
+
+static void err_print_gt_fences(struct drm_i915_error_state_buf *m,
+                               struct intel_gt_coredump *gt)
+{
+       int i;
+
+       for (i = 0; i < gt->nfence; i++)
+               err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
+}
+
+static void err_print_gt_engines(struct drm_i915_error_state_buf *m,
+                                struct intel_gt_coredump *gt)
+{
+       const struct intel_engine_coredump *ee;
 
        for (ee = gt->engine; ee; ee = ee->next) {
                const struct i915_vma_coredump *vma;
 
-               error_print_engine(m, ee);
+               if (ee->guc_capture_node)
+                       intel_guc_capture_print_engine_node(m, ee);
+               else
+                       error_print_engine(m, ee);
+
+               err_printf(m, "  hung: %u\n", ee->hung);
+               err_printf(m, "  engine reset count: %u\n", ee->reset_count);
+               error_print_context(m, "  Active context: ", &ee->context);
+
                for (vma = ee->vma; vma; vma = vma->next)
-                       print_error_vma(m, ee->engine, vma);
+                       intel_gpu_error_print_vma(m, ee->engine, vma);
        }
 
-       if (gt->uc)
-               err_print_uc(m, gt->uc);
-
-       err_print_gt_info(m, gt);
 }
 
 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
@@ -836,8 +856,30 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
        err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
        err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
 
-       if (error->gt)
-               err_print_gt(m, error->gt);
+       if (error->gt) {
+               bool print_guc_capture = false;
+
+               if (error->gt->uc && error->gt->uc->is_guc_capture)
+                       print_guc_capture = true;
+
+               err_print_gt_display(m, error->gt);
+               err_print_gt_global_nonguc(m, error->gt);
+               err_print_gt_fences(m, error->gt);
+
+               /*
+                * GuC dumped global, eng-class and eng-instance registers together
+                * as part of engine state dump so we print in err_print_gt_engines
+                */
+               if (!print_guc_capture)
+                       err_print_gt_global(m, error->gt);
+
+               err_print_gt_engines(m, error->gt);
+
+               if (error->gt->uc)
+                       err_print_uc(m, error->gt->uc);
+
+               err_print_gt_info(m, error->gt);
+       }
 
        if (error->overlay)
                intel_overlay_print_error_state(m, error->overlay);
@@ -985,6 +1027,7 @@ static void cleanup_gt(struct intel_gt_coredump *gt)
                gt->engine = ee->next;
 
                i915_vma_coredump_free(ee->vma);
+               intel_guc_capture_free_node(ee);
                kfree(ee);
        }
 
@@ -1318,8 +1361,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
        e->guilty = atomic_read(&ctx->guilty_count);
        e->active = atomic_read(&ctx->active_count);
 
-       e->total_runtime = rq->context->runtime.total;
-       e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+       e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
+       e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
 
        simulated = i915_gem_context_no_error_capture(ctx);
 
@@ -1436,7 +1479,7 @@ static void add_vma_coredump(struct intel_engine_coredump *ee,
 }
 
 struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
 {
        struct intel_engine_coredump *ee;
 
@@ -1446,8 +1489,10 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
        ee->engine = engine;
 
-       engine_record_registers(ee);
-       engine_record_execlists(ee);
+       if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)) {
+               engine_record_registers(ee);
+               engine_record_execlists(ee);
+       }
 
        return ee;
 }
@@ -1511,7 +1556,8 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 
 static struct intel_engine_coredump *
 capture_engine(struct intel_engine_cs *engine,
-              struct i915_vma_compress *compress)
+              struct i915_vma_compress *compress,
+              u32 dump_flags)
 {
        struct intel_engine_capture_vma *capture = NULL;
        struct intel_engine_coredump *ee;
@@ -1519,7 +1565,7 @@ capture_engine(struct intel_engine_cs *engine,
        struct i915_request *rq = NULL;
        unsigned long flags;
 
-       ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
+       ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
        if (!ee)
                return NULL;
 
@@ -1552,6 +1598,8 @@ capture_engine(struct intel_engine_cs *engine,
                i915_request_put(rq);
                goto no_request_capture;
        }
+       if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+               intel_guc_capture_get_matching_node(engine->gt, ee, ce);
 
        intel_engine_coredump_add_vma(ee, capture, compress);
        i915_request_put(rq);
@@ -1566,7 +1614,8 @@ no_request_capture:
 static void
 gt_record_engines(struct intel_gt_coredump *gt,
                  intel_engine_mask_t engine_mask,
-                 struct i915_vma_compress *compress)
+                 struct i915_vma_compress *compress,
+                 u32 dump_flags)
 {
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
@@ -1577,7 +1626,7 @@ gt_record_engines(struct intel_gt_coredump *gt,
                /* Refill our page pool before entering atomic section */
                pool_refill(&compress->pool, ALLOW_FAIL);
 
-               ee = capture_engine(engine, compress);
+               ee = capture_engine(engine, compress, dump_flags);
                if (!ee)
                        continue;
 
@@ -1585,6 +1634,8 @@ gt_record_engines(struct intel_gt_coredump *gt,
 
                gt->simulated |= ee->simulated;
                if (ee->simulated) {
+                       if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+                               intel_guc_capture_free_node(ee);
                        kfree(ee);
                        continue;
                }
@@ -1620,8 +1671,74 @@ gt_record_uc(struct intel_gt_coredump *gt,
        return error_uc;
 }
 
-/* Capture all registers which don't fit into another category. */
-static void gt_record_regs(struct intel_gt_coredump *gt)
+/* Capture display registers. */
+static void gt_record_display_regs(struct intel_gt_coredump *gt)
+{
+       struct intel_uncore *uncore = gt->_gt->uncore;
+       struct drm_i915_private *i915 = uncore->i915;
+
+       if (GRAPHICS_VER(i915) >= 6)
+               gt->derrmr = intel_uncore_read(uncore, DERRMR);
+
+       if (GRAPHICS_VER(i915) >= 8)
+               gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+       else if (IS_VALLEYVIEW(i915))
+               gt->ier = intel_uncore_read(uncore, VLV_IER);
+       else if (HAS_PCH_SPLIT(i915))
+               gt->ier = intel_uncore_read(uncore, DEIER);
+       else if (GRAPHICS_VER(i915) == 2)
+               gt->ier = intel_uncore_read16(uncore, GEN2_IER);
+       else
+               gt->ier = intel_uncore_read(uncore, GEN2_IER);
+}
+
+/* Capture all other registers that GuC doesn't capture. */
+static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt)
+{
+       struct intel_uncore *uncore = gt->_gt->uncore;
+       struct drm_i915_private *i915 = uncore->i915;
+       int i;
+
+       if (IS_VALLEYVIEW(i915)) {
+               gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+               gt->ngtier = 1;
+       } else if (GRAPHICS_VER(i915) >= 11) {
+               gt->gtier[0] =
+                       intel_uncore_read(uncore,
+                                         GEN11_RENDER_COPY_INTR_ENABLE);
+               gt->gtier[1] =
+                       intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
+               gt->gtier[2] =
+                       intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
+               gt->gtier[3] =
+                       intel_uncore_read(uncore,
+                                         GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+               gt->gtier[4] =
+                       intel_uncore_read(uncore,
+                                         GEN11_CRYPTO_RSVD_INTR_ENABLE);
+               gt->gtier[5] =
+                       intel_uncore_read(uncore,
+                                         GEN11_GUNIT_CSME_INTR_ENABLE);
+               gt->ngtier = 6;
+       } else if (GRAPHICS_VER(i915) >= 8) {
+               for (i = 0; i < 4; i++)
+                       gt->gtier[i] =
+                               intel_uncore_read(uncore, GEN8_GT_IER(i));
+               gt->ngtier = 4;
+       } else if (HAS_PCH_SPLIT(i915)) {
+               gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+               gt->ngtier = 1;
+       }
+
+       gt->eir = intel_uncore_read(uncore, EIR);
+       gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+}
+
+/*
+ * Capture all registers that relate to workload submission.
+ * NOTE: In GuC submission, when GuC resets an engine, it can dump these for us
+ */
+static void gt_record_global_regs(struct intel_gt_coredump *gt)
 {
        struct intel_uncore *uncore = gt->_gt->uncore;
        struct drm_i915_private *i915 = uncore->i915;
@@ -1637,11 +1754,8 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
         */
 
        /* 1: Registers specific to a single generation */
-       if (IS_VALLEYVIEW(i915)) {
-               gt->gtier[0] = intel_uncore_read(uncore, GTIER);
-               gt->ier = intel_uncore_read(uncore, VLV_IER);
+       if (IS_VALLEYVIEW(i915))
                gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
-       }
 
        if (GRAPHICS_VER(i915) == 7)
                gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
@@ -1669,7 +1783,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
                gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 
        if (GRAPHICS_VER(i915) >= 6) {
-               gt->derrmr = intel_uncore_read(uncore, DERRMR);
                if (GRAPHICS_VER(i915) < 12) {
                        gt->error = intel_uncore_read(uncore, ERROR_GEN6);
                        gt->done_reg = intel_uncore_read(uncore, DONE_REG);
@@ -1689,7 +1802,7 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
                gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
 
        if (GRAPHICS_VER(i915) >= 12) {
-               for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+               for (i = 0; i < I915_MAX_SFC; i++) {
                        /*
                         * SFC_DONE resides in the VD forcewake domain, so it
                         * only exists if the corresponding VCS engine is
@@ -1705,44 +1818,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
 
                gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
        }
-
-       /* 4: Everything else */
-       if (GRAPHICS_VER(i915) >= 11) {
-               gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-               gt->gtier[0] =
-                       intel_uncore_read(uncore,
-                                         GEN11_RENDER_COPY_INTR_ENABLE);
-               gt->gtier[1] =
-                       intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
-               gt->gtier[2] =
-                       intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
-               gt->gtier[3] =
-                       intel_uncore_read(uncore,
-                                         GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-               gt->gtier[4] =
-                       intel_uncore_read(uncore,
-                                         GEN11_CRYPTO_RSVD_INTR_ENABLE);
-               gt->gtier[5] =
-                       intel_uncore_read(uncore,
-                                         GEN11_GUNIT_CSME_INTR_ENABLE);
-               gt->ngtier = 6;
-       } else if (GRAPHICS_VER(i915) >= 8) {
-               gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-               for (i = 0; i < 4; i++)
-                       gt->gtier[i] =
-                               intel_uncore_read(uncore, GEN8_GT_IER(i));
-               gt->ngtier = 4;
-       } else if (HAS_PCH_SPLIT(i915)) {
-               gt->ier = intel_uncore_read(uncore, DEIER);
-               gt->gtier[0] = intel_uncore_read(uncore, GTIER);
-               gt->ngtier = 1;
-       } else if (GRAPHICS_VER(i915) == 2) {
-               gt->ier = intel_uncore_read16(uncore, GEN2_IER);
-       } else if (!IS_VALLEYVIEW(i915)) {
-               gt->ier = intel_uncore_read(uncore, GEN2_IER);
-       }
-       gt->eir = intel_uncore_read(uncore, EIR);
-       gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
 }
 
 static void gt_record_info(struct intel_gt_coredump *gt)
@@ -1854,7 +1929,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
 
 struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
 {
        struct intel_gt_coredump *gc;
 
@@ -1865,7 +1940,21 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
        gc->_gt = gt;
        gc->awake = intel_gt_pm_is_awake(gt);
 
-       gt_record_regs(gc);
+       gt_record_display_regs(gc);
+       gt_record_global_nonguc_regs(gc);
+
+       /*
+        * GuC dumps global, eng-class and eng-instance registers
+        * (that can change as part of engine state during execution)
+        * before an engine is reset due to a hung context.
+        * GuC captures and reports all three groups of registers
+        * together as a single set before the engine is reset.
+        * Thus, if GuC triggered the context reset we retrieve
+        * the register values as part of gt_record_engines.
+        */
+       if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE))
+               gt_record_global_regs(gc);
+
        gt_record_fences(gc);
 
        return gc;
@@ -1899,7 +1988,7 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
 }
 
 static struct i915_gpu_coredump *
-__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
        struct drm_i915_private *i915 = gt->i915;
        struct i915_gpu_coredump *error;
@@ -1913,7 +2002,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
        if (!error)
                return ERR_PTR(-ENOMEM);
 
-       error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL);
+       error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL, dump_flags);
        if (error->gt) {
                struct i915_vma_compress *compress;
 
@@ -1924,11 +2013,19 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
                        return ERR_PTR(-ENOMEM);
                }
 
+               if (INTEL_INFO(i915)->has_gt_uc) {
+                       error->gt->uc = gt_record_uc(error->gt, compress);
+                       if (error->gt->uc) {
+                               if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+                                       error->gt->uc->is_guc_capture = true;
+                               else
+                                       GEM_BUG_ON(error->gt->uc->is_guc_capture);
+                       }
+               }
+
                gt_record_info(error->gt);
-               gt_record_engines(error->gt, engine_mask, compress);
+               gt_record_engines(error->gt, engine_mask, compress, dump_flags);
 
-               if (INTEL_INFO(i915)->has_gt_uc)
-                       error->gt->uc = gt_record_uc(error->gt, compress);
 
                i915_vma_capture_finish(error->gt, compress);
 
@@ -1941,7 +2038,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 }
 
 struct i915_gpu_coredump *
-i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
        static DEFINE_MUTEX(capture_mutex);
        int ret = mutex_lock_interruptible(&capture_mutex);
@@ -1950,7 +2047,7 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
        if (ret)
                return ERR_PTR(ret);
 
-       dump = __i915_gpu_coredump(gt, engine_mask);
+       dump = __i915_gpu_coredump(gt, engine_mask, dump_flags);
        mutex_unlock(&capture_mutex);
 
        return dump;
@@ -1997,11 +2094,11 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
  * to pick up.
  */
 void i915_capture_error_state(struct intel_gt *gt,
-                             intel_engine_mask_t engine_mask)
+                             intel_engine_mask_t engine_mask, u32 dump_flags)
 {
        struct i915_gpu_coredump *error;
 
-       error = i915_gpu_coredump(gt, engine_mask);
+       error = i915_gpu_coredump(gt, engine_mask, dump_flags);
        if (IS_ERR(error)) {
                cmpxchg(&gt->i915->gpu_error.first_error, NULL, error);
                return;
index 903d838..72d8607 100644 (file)
@@ -53,6 +53,8 @@ struct i915_request_coredump {
        struct i915_sched_attr sched_attr;
 };
 
+struct __guc_capture_parsed_output;
+
 struct intel_engine_coredump {
        const struct intel_engine_cs *engine;
 
@@ -84,11 +86,15 @@ struct intel_engine_coredump {
        u32 rc_psmi; /* sleep state */
        struct intel_instdone instdone;
 
+       /* GuC matched capture-lists info */
+       struct intel_guc_state_capture *capture;
+       struct __guc_capture_parsed_output *guc_capture_node;
+
        struct i915_gem_context_coredump {
                char comm[TASK_COMM_LEN];
 
                u64 total_runtime;
-               u32 avg_runtime;
+               u64 avg_runtime;
 
                pid_t pid;
                int active;
@@ -124,7 +130,6 @@ struct intel_gt_coredump {
        u32 pgtbl_er;
        u32 ier;
        u32 gtier[6], ngtier;
-       u32 derrmr;
        u32 forcewake;
        u32 error; /* gen6+ */
        u32 err_int; /* gen7 */
@@ -137,9 +142,12 @@ struct intel_gt_coredump {
        u32 gfx_mode;
        u32 gtt_cache;
        u32 aux_err; /* gen12 */
-       u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
        u32 gam_done; /* gen12 */
 
+       /* Display related */
+       u32 derrmr;
+       u32 sfc_done[I915_MAX_SFC]; /* gen12 */
+
        u32 nfence;
        u64 fence[I915_MAX_NUM_FENCES];
 
@@ -149,6 +157,7 @@ struct intel_gt_coredump {
                struct intel_uc_fw guc_fw;
                struct intel_uc_fw huc_fw;
                struct i915_vma_coredump *guc_log;
+               bool is_guc_capture;
        } *uc;
 
        struct intel_gt_coredump *next;
@@ -221,24 +230,32 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
        return atomic_read(&error->reset_engine_count[engine->uabi_class]);
 }
 
+#define CORE_DUMP_FLAG_NONE           0x0
+#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
+
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
+void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+                              const struct intel_engine_cs *engine,
+                              const struct i915_vma_coredump *vma);
+struct i915_vma_coredump *
+intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
 
 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
-                                           intel_engine_mask_t engine_mask);
+                                           intel_engine_mask_t engine_mask, u32 dump_flags);
 void i915_capture_error_state(struct intel_gt *gt,
-                             intel_engine_mask_t engine_mask);
+                             intel_engine_mask_t engine_mask, u32 dump_flags);
 
 struct i915_gpu_coredump *
 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
 
 struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
 
 struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
@@ -282,7 +299,7 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err);
 #else
 
 static inline void
-i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
 }
 
@@ -293,13 +310,13 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 }
 
 static inline struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
 {
        return NULL;
 }
 
 static inline struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
 {
        return NULL;
 }
index eea355c..d898141 100644 (file)
@@ -200,6 +200,9 @@ i915_param_named_unsafe(request_timeout_ms, uint, 0600,
                        "Default request/fence/batch buffer expiration timeout.");
 #endif
 
+i915_param_named_unsafe(lmem_size, uint, 0400,
+                       "Set the lmem size(in MiB) for each region. (default: 0, all memory)");
+
 static __always_inline void _print_param(struct drm_printer *p,
                                         const char *name,
                                         const char *type,
index c779a6f..b5e7ea4 100644 (file)
@@ -73,6 +73,7 @@ struct drm_printer;
        param(int, enable_dpcd_backlight, -1, 0600) \
        param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
        param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \
+       param(unsigned int, lmem_size, 0, 0400) \
        /* leave bools at the end to not create holes */ \
        param(bool, enable_hangcheck, true, 0600) \
        param(bool, load_detect_test, false, 0600) \
index 2dfbc22..b5ca00c 100644 (file)
@@ -479,12 +479,35 @@ static int query_memregion_info(struct drm_i915_private *i915,
        return total_length;
 }
 
+static int query_hwconfig_blob(struct drm_i915_private *i915,
+                              struct drm_i915_query_item *query_item)
+{
+       struct intel_gt *gt = to_gt(i915);
+       struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+
+       if (!hwconfig->size || !hwconfig->ptr)
+               return -ENODEV;
+
+       if (query_item->length == 0)
+               return hwconfig->size;
+
+       if (query_item->length < hwconfig->size)
+               return -EINVAL;
+
+       if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+                        hwconfig->ptr, hwconfig->size))
+               return -EFAULT;
+
+       return hwconfig->size;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
                                        struct drm_i915_query_item *query_item) = {
        query_topology_info,
        query_engine_info,
        query_perf_config,
        query_memregion_info,
+       query_hwconfig_blob,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
index 3c87d77..6104f7a 100644 (file)
 #define GEN9_RP_STATE_LIMITS   _MMIO(0x138148)
 #define XEHPSDV_RP_STATE_CAP   _MMIO(0x250014)
 
+#define GT0_PERF_LIMIT_REASONS         _MMIO(0x1381a8)
+#define   GT0_PERF_LIMIT_REASONS_MASK  0xde3
+#define   PROCHOT_MASK                 REG_BIT(1)
+#define   THERMAL_LIMIT_MASK           REG_BIT(2)
+#define   RATL_MASK                    REG_BIT(6)
+#define   VR_THERMALERT_MASK           REG_BIT(7)
+#define   VR_TDC_MASK                  REG_BIT(8)
+#define   POWER_LIMIT_4_MASK           REG_BIT(9)
+#define   POWER_LIMIT_1_MASK           REG_BIT(11)
+#define   POWER_LIMIT_2_MASK           REG_BIT(12)
+
 #define CHV_CLK_CTL1                   _MMIO(0x101100)
 #define VLV_CLK_CTL2                   _MMIO(0x101104)
 #define   CLK_CTL2_CZCOUNT_30NS_SHIFT  28
@@ -8465,6 +8476,9 @@ enum skl_power_gate {
 #define   SGGI_DIS                     REG_BIT(15)
 #define   SGR_DIS                      REG_BIT(13)
 
+#define XEHPSDV_TILE0_ADDR_RANGE       _MMIO(0x4900)
+#define   XEHPSDV_TILE_LMEM_RANGE_SHIFT  8
+
 #define XEHPSDV_FLAT_CCS_BASE_ADDR     _MMIO(0x4910)
 #define   XEHPSDV_CCS_BASE_SHIFT       8
 
index d78d78f..8f486f7 100644 (file)
@@ -123,6 +123,4 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define VLV_DISPLAY_BASE               0x180000
 
-#define GEN12_SFC_DONE_MAX             4
-
 #endif /* __I915_REG_DEFS__ */
index a4d1759..8521dab 100644 (file)
 #include "i915_sysfs.h"
 #include "intel_pm.h"
 
-static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
+struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 {
        struct drm_minor *minor = dev_get_drvdata(kdev);
        return to_i915(minor->dev);
 }
 
-#ifdef CONFIG_PM
-static u32 calc_residency(struct drm_i915_private *dev_priv,
-                         i915_reg_t reg)
-{
-       intel_wakeref_t wakeref;
-       u64 res = 0;
-
-       with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
-               res = intel_rc6_residency_us(&to_gt(dev_priv)->rc6, reg);
-
-       return DIV_ROUND_CLOSEST_ULL(res, 1000);
-}
-
-static ssize_t rc6_enable_show(struct device *kdev,
-                              struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       unsigned int mask;
-
-       mask = 0;
-       if (HAS_RC6(dev_priv))
-               mask |= BIT(0);
-       if (HAS_RC6p(dev_priv))
-               mask |= BIT(1);
-       if (HAS_RC6pp(dev_priv))
-               mask |= BIT(2);
-
-       return sysfs_emit(buf, "%x\n", mask);
-}
-
-static ssize_t rc6_residency_ms_show(struct device *kdev,
-                                    struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6);
-       return sysfs_emit(buf, "%u\n", rc6_residency);
-}
-
-static ssize_t rc6p_residency_ms_show(struct device *kdev,
-                                     struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p);
-       return sysfs_emit(buf, "%u\n", rc6p_residency);
-}
-
-static ssize_t rc6pp_residency_ms_show(struct device *kdev,
-                                      struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp);
-       return sysfs_emit(buf, "%u\n", rc6pp_residency);
-}
-
-static ssize_t media_rc6_residency_ms_show(struct device *kdev,
-                                          struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6);
-       return sysfs_emit(buf, "%u\n", rc6_residency);
-}
-
-static DEVICE_ATTR_RO(rc6_enable);
-static DEVICE_ATTR_RO(rc6_residency_ms);
-static DEVICE_ATTR_RO(rc6p_residency_ms);
-static DEVICE_ATTR_RO(rc6pp_residency_ms);
-static DEVICE_ATTR_RO(media_rc6_residency_ms);
-
-static struct attribute *rc6_attrs[] = {
-       &dev_attr_rc6_enable.attr,
-       &dev_attr_rc6_residency_ms.attr,
-       NULL
-};
-
-static const struct attribute_group rc6_attr_group = {
-       .name = power_group_name,
-       .attrs =  rc6_attrs
-};
-
-static struct attribute *rc6p_attrs[] = {
-       &dev_attr_rc6p_residency_ms.attr,
-       &dev_attr_rc6pp_residency_ms.attr,
-       NULL
-};
-
-static const struct attribute_group rc6p_attr_group = {
-       .name = power_group_name,
-       .attrs =  rc6p_attrs
-};
-
-static struct attribute *media_rc6_attrs[] = {
-       &dev_attr_media_rc6_residency_ms.attr,
-       NULL
-};
-
-static const struct attribute_group media_rc6_attr_group = {
-       .name = power_group_name,
-       .attrs =  media_rc6_attrs
-};
-#endif
-
 static int l3_access_valid(struct drm_i915_private *i915, loff_t offset)
 {
        if (!HAS_L3_DPF(i915))
@@ -257,171 +156,6 @@ static const struct bin_attribute dpf_attrs_1 = {
        .private = (void *)1
 };
 
-static ssize_t gt_act_freq_mhz_show(struct device *kdev,
-                                   struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(i915)->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps));
-}
-
-static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
-                                   struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(i915)->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps));
-}
-
-static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(i915)->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps));
-}
-
-static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
-                                      struct device_attribute *attr,
-                                      const char *buf, size_t count)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(dev_priv)->rps;
-       ssize_t ret;
-       u32 val;
-
-       ret = kstrtou32(buf, 0, &val);
-       if (ret)
-               return ret;
-
-       ret = intel_rps_set_boost_frequency(rps, val);
-
-       return ret ?: count;
-}
-
-static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
-                                    struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(dev_priv)->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq));
-}
-
-static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct intel_gt *gt = to_gt(dev_priv);
-       struct intel_rps *rps = &gt->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps));
-}
-
-static ssize_t gt_max_freq_mhz_store(struct device *kdev,
-                                    struct device_attribute *attr,
-                                    const char *buf, size_t count)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct intel_gt *gt = to_gt(dev_priv);
-       struct intel_rps *rps = &gt->rps;
-       ssize_t ret;
-       u32 val;
-
-       ret = kstrtou32(buf, 0, &val);
-       if (ret)
-               return ret;
-
-       ret = intel_rps_set_max_frequency(rps, val);
-
-       return ret ?: count;
-}
-
-static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-       struct intel_gt *gt = to_gt(i915);
-       struct intel_rps *rps = &gt->rps;
-
-       return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps));
-}
-
-static ssize_t gt_min_freq_mhz_store(struct device *kdev,
-                                    struct device_attribute *attr,
-                                    const char *buf, size_t count)
-{
-       struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(i915)->rps;
-       ssize_t ret;
-       u32 val;
-
-       ret = kstrtou32(buf, 0, &val);
-       if (ret)
-               return ret;
-
-       ret = intel_rps_set_min_frequency(rps, val);
-
-       return ret ?: count;
-}
-
-static DEVICE_ATTR_RO(gt_act_freq_mhz);
-static DEVICE_ATTR_RO(gt_cur_freq_mhz);
-static DEVICE_ATTR_RW(gt_boost_freq_mhz);
-static DEVICE_ATTR_RW(gt_max_freq_mhz);
-static DEVICE_ATTR_RW(gt_min_freq_mhz);
-
-static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
-
-static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf);
-static DEVICE_ATTR(gt_RP0_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-static DEVICE_ATTR(gt_RP1_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-
-/* For now we have a static number of RP states */
-static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct intel_rps *rps = &to_gt(dev_priv)->rps;
-       u32 val;
-
-       if (attr == &dev_attr_gt_RP0_freq_mhz)
-               val = intel_rps_get_rp0_frequency(rps);
-       else if (attr == &dev_attr_gt_RP1_freq_mhz)
-               val = intel_rps_get_rp1_frequency(rps);
-       else if (attr == &dev_attr_gt_RPn_freq_mhz)
-               val = intel_rps_get_rpn_frequency(rps);
-       else
-               BUG();
-
-       return sysfs_emit(buf, "%d\n", val);
-}
-
-static const struct attribute * const gen6_attrs[] = {
-       &dev_attr_gt_act_freq_mhz.attr,
-       &dev_attr_gt_cur_freq_mhz.attr,
-       &dev_attr_gt_boost_freq_mhz.attr,
-       &dev_attr_gt_max_freq_mhz.attr,
-       &dev_attr_gt_min_freq_mhz.attr,
-       &dev_attr_gt_RP0_freq_mhz.attr,
-       &dev_attr_gt_RP1_freq_mhz.attr,
-       &dev_attr_gt_RPn_freq_mhz.attr,
-       NULL,
-};
-
-static const struct attribute * const vlv_attrs[] = {
-       &dev_attr_gt_act_freq_mhz.attr,
-       &dev_attr_gt_cur_freq_mhz.attr,
-       &dev_attr_gt_boost_freq_mhz.attr,
-       &dev_attr_gt_max_freq_mhz.attr,
-       &dev_attr_gt_min_freq_mhz.attr,
-       &dev_attr_gt_RP0_freq_mhz.attr,
-       &dev_attr_gt_RP1_freq_mhz.attr,
-       &dev_attr_gt_RPn_freq_mhz.attr,
-       &dev_attr_vlv_rpe_freq_mhz.attr,
-       NULL,
-};
-
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
 static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
@@ -492,29 +226,6 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
        struct device *kdev = dev_priv->drm.primary->kdev;
        int ret;
 
-#ifdef CONFIG_PM
-       if (HAS_RC6(dev_priv)) {
-               ret = sysfs_merge_group(&kdev->kobj,
-                                       &rc6_attr_group);
-               if (ret)
-                       drm_err(&dev_priv->drm,
-                               "RC6 residency sysfs setup failed\n");
-       }
-       if (HAS_RC6p(dev_priv)) {
-               ret = sysfs_merge_group(&kdev->kobj,
-                                       &rc6p_attr_group);
-               if (ret)
-                       drm_err(&dev_priv->drm,
-                               "RC6p residency sysfs setup failed\n");
-       }
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-               ret = sysfs_merge_group(&kdev->kobj,
-                                       &media_rc6_attr_group);
-               if (ret)
-                       drm_err(&dev_priv->drm,
-                               "Media RC6 residency sysfs setup failed\n");
-       }
-#endif
        if (HAS_L3_DPF(dev_priv)) {
                ret = device_create_bin_file(kdev, &dpf_attrs);
                if (ret)
@@ -530,13 +241,10 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
                }
        }
 
-       ret = 0;
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-               ret = sysfs_create_files(&kdev->kobj, vlv_attrs);
-       else if (GRAPHICS_VER(dev_priv) >= 6)
-               ret = sysfs_create_files(&kdev->kobj, gen6_attrs);
-       if (ret)
-               drm_err(&dev_priv->drm, "RPS sysfs setup failed\n");
+       dev_priv->sysfs_gt = kobject_create_and_add("gt", &kdev->kobj);
+       if (!dev_priv->sysfs_gt)
+               drm_warn(&dev_priv->drm,
+                        "failed to register GT sysfs directory\n");
 
        i915_setup_error_capture(kdev);
 
@@ -549,14 +257,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 
        i915_teardown_error_capture(kdev);
 
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-               sysfs_remove_files(&kdev->kobj, vlv_attrs);
-       else
-               sysfs_remove_files(&kdev->kobj, gen6_attrs);
        device_remove_bin_file(kdev,  &dpf_attrs_1);
        device_remove_bin_file(kdev,  &dpf_attrs);
-#ifdef CONFIG_PM
-       sysfs_unmerge_group(&kdev->kobj, &rc6_attr_group);
-       sysfs_unmerge_group(&kdev->kobj, &rc6p_attr_group);
-#endif
 }
index 41afd43..243a177 100644 (file)
@@ -6,8 +6,11 @@
 #ifndef __I915_SYSFS_H__
 #define __I915_SYSFS_H__
 
+struct device;
 struct drm_i915_private;
 
+struct drm_i915_private *kdev_minor_to_i915(struct device *kdev);
+
 void i915_setup_sysfs(struct drm_i915_private *i915);
 void i915_teardown_sysfs(struct drm_i915_private *i915);
 
index 129f668..8e4e3f7 100644 (file)
@@ -71,7 +71,8 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 
        GEM_BUG_ON(min_page_size < mm->chunk_size);
 
-       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+       if (place->fpfn + bman_res->base.num_pages != place->lpfn &&
+           place->flags & TTM_PL_FLAG_CONTIGUOUS) {
                unsigned long pages;
 
                size = roundup_pow_of_two(size);
index bae3423..033ef69 100644 (file)
@@ -47,7 +47,7 @@ static inline void assert_vma_held_evict(const struct i915_vma *vma)
         * This is the only exception to the requirement of the object lock
         * being held.
         */
-       if (atomic_read(&vma->vm->open))
+       if (kref_read(&vma->vm->ref))
                assert_object_held_shared(vma->obj);
 }
 
@@ -113,6 +113,7 @@ vma_create(struct drm_i915_gem_object *obj,
        struct i915_vma *pos = ERR_PTR(-E2BIG);
        struct i915_vma *vma;
        struct rb_node *rb, **p;
+       int err;
 
        /* The aliasing_ppgtt should never be used directly! */
        GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
@@ -121,8 +122,6 @@ vma_create(struct drm_i915_gem_object *obj,
        if (vma == NULL)
                return ERR_PTR(-ENOMEM);
 
-       kref_init(&vma->ref);
-       vma->vm = i915_vm_get(vm);
        vma->ops = &vm->vma_ops;
        vma->obj = obj;
        vma->size = obj->base.size;
@@ -138,6 +137,8 @@ vma_create(struct drm_i915_gem_object *obj,
        }
 
        INIT_LIST_HEAD(&vma->closed_link);
+       INIT_LIST_HEAD(&vma->obj_link);
+       RB_CLEAR_NODE(&vma->obj_node);
 
        if (view && view->type != I915_GGTT_VIEW_NORMAL) {
                vma->ggtt_view = *view;
@@ -163,8 +164,16 @@ vma_create(struct drm_i915_gem_object *obj,
 
        GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
 
-       spin_lock(&obj->vma.lock);
+       err = mutex_lock_interruptible(&vm->mutex);
+       if (err) {
+               pos = ERR_PTR(err);
+               goto err_vma;
+       }
 
+       vma->vm = vm;
+       list_add_tail(&vma->vm_link, &vm->unbound_list);
+
+       spin_lock(&obj->vma.lock);
        if (i915_is_ggtt(vm)) {
                if (unlikely(overflows_type(vma->size, u32)))
                        goto err_unlock;
@@ -222,13 +231,15 @@ vma_create(struct drm_i915_gem_object *obj,
                list_add_tail(&vma->obj_link, &obj->vma.list);
 
        spin_unlock(&obj->vma.lock);
+       mutex_unlock(&vm->mutex);
 
        return vma;
 
 err_unlock:
        spin_unlock(&obj->vma.lock);
+       list_del_init(&vma->vm_link);
+       mutex_unlock(&vm->mutex);
 err_vma:
-       i915_vm_put(vm);
        i915_vma_free(vma);
        return pos;
 }
@@ -279,7 +290,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
        struct i915_vma *vma;
 
        GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));
-       GEM_BUG_ON(!atomic_read(&vm->open));
+       GEM_BUG_ON(!kref_read(&vm->ref));
 
        spin_lock(&obj->vma.lock);
        vma = i915_vma_lookup(obj, vm, view);
@@ -322,7 +333,6 @@ static void __vma_release(struct dma_fence_work *work)
                i915_gem_object_put(vw->pinned);
 
        i915_vm_free_pt_stash(vw->vm, &vw->stash);
-       i915_vm_put(vw->vm);
        if (vw->vma_res)
                i915_vma_resource_put(vw->vma_res);
 }
@@ -515,21 +525,18 @@ int i915_vma_bind(struct i915_vma *vma,
                if (!work->vma_res->bi.pages_rsgt)
                        work->pinned = i915_gem_object_get(vma->obj);
        } else {
-               if (vma->obj) {
-                       ret = i915_gem_object_wait_moving_fence(vma->obj, true);
-                       if (ret) {
-                               i915_vma_resource_free(vma->resource);
-                               vma->resource = NULL;
+               ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+               if (ret) {
+                       i915_vma_resource_free(vma->resource);
+                       vma->resource = NULL;
 
-                               return ret;
-                       }
+                       return ret;
                }
                vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
                                   bind_flags);
        }
 
-       if (vma->obj)
-               set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+       set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
 
        atomic_or(bind_flags, &vma->flags);
        return 0;
@@ -841,7 +848,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
        GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
-       list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+       list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
        return 0;
 }
@@ -857,7 +864,7 @@ i915_vma_detach(struct i915_vma *vma)
         * vma, we can drop its hold on the backing storage and allow
         * it to be reaped by the shrinker.
         */
-       list_del(&vma->vm_link);
+       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 }
 
 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
@@ -1360,8 +1367,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
        if (flags & PIN_GLOBAL)
                wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-       moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
-       if (flags & vma->vm->bind_async_flags || moving) {
+       if (flags & vma->vm->bind_async_flags) {
                /* lock VM */
                err = i915_vm_lock_objects(vma->vm, ww);
                if (err)
@@ -1373,8 +1379,9 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
                        goto err_rpm;
                }
 
-               work->vm = i915_vm_get(vma->vm);
+               work->vm = vma->vm;
 
+               moving = i915_gem_object_get_moving_fence(vma->obj);
                dma_fence_work_chain(&work->base, moving);
 
                /* Allocate enough page directories to used PTE */
@@ -1618,16 +1625,6 @@ void i915_vma_reopen(struct i915_vma *vma)
                __i915_vma_remove_closed(vma);
 }
 
-void i915_vma_release(struct kref *ref)
-{
-       struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
-
-       i915_vm_put(vma->vm);
-       i915_active_fini(&vma->active);
-       GEM_WARN_ON(vma->resource);
-       i915_vma_free(vma);
-}
-
 static void force_unbind(struct i915_vma *vma)
 {
        if (!drm_mm_node_allocated(&vma->node))
@@ -1638,7 +1635,7 @@ static void force_unbind(struct i915_vma *vma)
        GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 }
 
-static void release_references(struct i915_vma *vma)
+static void release_references(struct i915_vma *vma, bool vm_ddestroy)
 {
        struct drm_i915_gem_object *obj = vma->obj;
 
@@ -1648,11 +1645,17 @@ static void release_references(struct i915_vma *vma)
        list_del(&vma->obj_link);
        if (!RB_EMPTY_NODE(&vma->obj_node))
                rb_erase(&vma->obj_node, &obj->vma.tree);
+
        spin_unlock(&obj->vma.lock);
 
        __i915_vma_remove_closed(vma);
 
-       __i915_vma_put(vma);
+       if (vm_ddestroy)
+               i915_vm_resv_put(vma->vm);
+
+       i915_active_fini(&vma->active);
+       GEM_WARN_ON(vma->resource);
+       i915_vma_free(vma);
 }
 
 /**
@@ -1667,8 +1670,12 @@ static void release_references(struct i915_vma *vma)
  * - __i915_gem_object_pages_fini()
  * - __i915_vm_close() - Blocks the above function by taking a reference on
  * the object.
- * - __i915_vma_parked() - Blocks the above functions by taking an open-count on
- * the vm and a reference on the object.
+ * - __i915_vma_parked() - Blocks the above functions by taking a reference
+ * on the vm and a reference on the object. Also takes the object lock so
+ * destruction from __i915_vma_parked() can be blocked by holding the
+ * object lock. Since the object lock is only allowed from within i915 with
+ * an object refcount, holding the object lock also implicitly blocks the
+ * vma freeing from __i915_gem_object_pages_fini().
  *
  * Because of locks taken during destruction, a vma is also guaranteed to
  * stay alive while the following locks are held if it was looked up while
@@ -1676,24 +1683,27 @@ static void release_references(struct i915_vma *vma)
  * - vm->mutex
  * - obj->vma.lock
  * - gt->closed_lock
- *
- * A vma user can also temporarily keep the vma alive while holding a vma
- * reference.
  */
 void i915_vma_destroy_locked(struct i915_vma *vma)
 {
        lockdep_assert_held(&vma->vm->mutex);
 
        force_unbind(vma);
-       release_references(vma);
+       list_del_init(&vma->vm_link);
+       release_references(vma, false);
 }
 
 void i915_vma_destroy(struct i915_vma *vma)
 {
+       bool vm_ddestroy;
+
        mutex_lock(&vma->vm->mutex);
        force_unbind(vma);
+       list_del_init(&vma->vm_link);
+       vm_ddestroy = vma->vm_ddestroy;
+       vma->vm_ddestroy = false;
        mutex_unlock(&vma->vm->mutex);
-       release_references(vma);
+       release_references(vma, vm_ddestroy);
 }
 
 void i915_vma_parked(struct intel_gt *gt)
@@ -1711,7 +1721,7 @@ void i915_vma_parked(struct intel_gt *gt)
                if (!kref_get_unless_zero(&obj->base.refcount))
                        continue;
 
-               if (!i915_vm_tryopen(vm)) {
+               if (!i915_vm_tryget(vm)) {
                        i915_gem_object_put(obj);
                        continue;
                }
@@ -1737,7 +1747,7 @@ void i915_vma_parked(struct intel_gt *gt)
                }
 
                i915_gem_object_put(obj);
-               i915_vm_close(vm);
+               i915_vm_put(vm);
        }
 }
 
@@ -1894,7 +1904,9 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 
        /* If vm is not open, unbind is a nop. */
        vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
-               atomic_read(&vma->vm->open);
+               kref_read(&vma->vm->ref);
+       vma_res->skip_pte_rewrite = !kref_read(&vma->vm->ref) ||
+               vma->vm->skip_pte_rewrite;
        trace_i915_vma_unbind(vma);
 
        unbind_fence = i915_vma_resource_unbind(vma_res);
index 67ae734..6034991 100644 (file)
@@ -222,20 +222,6 @@ void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
 
-static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma)
-{
-       if (kref_get_unless_zero(&vma->ref))
-               return vma;
-
-       return NULL;
-}
-
-void i915_vma_release(struct kref *ref);
-static inline void __i915_vma_put(struct i915_vma *vma)
-{
-       kref_put(&vma->ref, i915_vma_release);
-}
-
 void i915_vma_destroy_locked(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
index 57ae92b..27c5502 100644 (file)
@@ -178,7 +178,7 @@ static void i915_vma_resource_unbind_work(struct work_struct *work)
        bool lockdep_cookie;
 
        lockdep_cookie = dma_fence_begin_signalling();
-       if (likely(atomic_read(&vm->open)))
+       if (likely(!vma_res->skip_pte_rewrite))
                vma_res->ops->unbind_vma(vm, vma_res);
 
        dma_fence_end_signalling(lockdep_cookie);
index 2591391..5d8427c 100644 (file)
@@ -62,6 +62,11 @@ struct i915_page_sizes {
  * deferred to a work item awaiting unsignaled fences. This is a hack.
  * (dma_fence_work uses a fence flag for this, but this seems slightly
  * cleaner).
+ * @needs_wakeref: Whether a wakeref is needed during unbind. Since we can't
+ * take a wakeref in the dma-fence signalling critical path, it needs to be
+ * taken when the unbind is scheduled.
+ * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
+ * needs to be skipped for unbind.
  *
  * The lifetime of a struct i915_vma_resource is from a binding request to
  * the actual possible asynchronous unbind has completed.
@@ -113,6 +118,7 @@ struct i915_vma_resource {
        bool allocated:1;
        bool immediate_unbind:1;
        bool needs_wakeref:1;
+       bool skip_pte_rewrite:1;
 };
 
 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
index 88370da..be6e028 100644 (file)
@@ -211,7 +211,6 @@ struct i915_vma {
         * handles (but same file) for execbuf, i.e. the number of aliases
         * that exist in the ctx->handle_vmas LUT for this vma.
         */
-       struct kref ref;
        atomic_t open_count;
        atomic_t flags;
        /**
@@ -272,6 +271,13 @@ struct i915_vma {
        atomic_t pages_count; /* number of active binds to the pages */
 
        /**
+        * Whether we hold a reference on the vm dma_resv lock to temporarily
+        * block vm freeing until the vma is destroyed.
+        * Protected by the vm mutex.
+        */
+       bool vm_ddestroy;
+
+       /**
         * Support different GGTT views into the same object.
         * This means there can be multiple VMA mappings per object and per VM.
         * i915_ggtt_view_type is used to distinguish between those entries.
index 1c841f6..6a6ee2f 100644 (file)
@@ -17,7 +17,7 @@ static const struct {
                .class = INTEL_MEMORY_SYSTEM,
                .instance = 0,
        },
-       [INTEL_REGION_LMEM] = {
+       [INTEL_REGION_LMEM_0] = {
                .class = INTEL_MEMORY_LOCAL,
                .instance = 0,
        },
index 21dcbd6..2d84aff 100644 (file)
@@ -29,14 +29,17 @@ enum intel_memory_type {
 
 enum intel_region_id {
        INTEL_REGION_SMEM = 0,
-       INTEL_REGION_LMEM,
+       INTEL_REGION_LMEM_0,
+       INTEL_REGION_LMEM_1,
+       INTEL_REGION_LMEM_2,
+       INTEL_REGION_LMEM_3,
        INTEL_REGION_STOLEN_SMEM,
        INTEL_REGION_STOLEN_LMEM,
        INTEL_REGION_UNKNOWN, /* Should be last */
 };
 
 #define REGION_SMEM     BIT(INTEL_REGION_SMEM)
-#define REGION_LMEM     BIT(INTEL_REGION_LMEM)
+#define REGION_LMEM     BIT(INTEL_REGION_LMEM_0)
 #define REGION_STOLEN_SMEM   BIT(INTEL_REGION_STOLEN_SMEM)
 #define REGION_STOLEN_LMEM   BIT(INTEL_REGION_STOLEN_LMEM)
 
@@ -54,6 +57,7 @@ struct intel_memory_region_ops {
 
        int (*init_object)(struct intel_memory_region *mem,
                           struct drm_i915_gem_object *obj,
+                          resource_size_t offset,
                           resource_size_t size,
                           resource_size_t page_size,
                           unsigned int flags);
index 737ef3f..62ff774 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "intel_region_ttm.h"
 
+#include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */
 /**
  * DOC: TTM support structure
@@ -191,6 +192,7 @@ intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
  */
 struct ttm_resource *
 intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
+                               resource_size_t offset,
                                resource_size_t size,
                                unsigned int flags)
 {
@@ -202,7 +204,10 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
 
        if (flags & I915_BO_ALLOC_CONTIGUOUS)
                place.flags |= TTM_PL_FLAG_CONTIGUOUS;
-       if (mem->io_size && mem->io_size < mem->total) {
+       if (offset != I915_BO_INVALID_OFFSET) {
+               place.fpfn = offset >> PAGE_SHIFT;
+               place.lpfn = place.fpfn + (size >> PAGE_SHIFT);
+       } else if (mem->io_size && mem->io_size < mem->total) {
                if (flags & I915_BO_ALLOC_GPU_ONLY) {
                        place.flags |= TTM_PL_FLAG_TOPDOWN;
                } else {
index fdee5e7..cf9d86d 100644 (file)
@@ -36,6 +36,7 @@ struct ttm_device_funcs *i915_ttm_driver(void);
 #ifdef CONFIG_DRM_I915_SELFTEST
 struct ttm_resource *
 intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
+                               resource_size_t offset,
                                resource_size_t size,
                                unsigned int flags);
 #endif
index dd8fdd5..8b9caaa 100644 (file)
@@ -2039,14 +2039,11 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-int intel_uncore_setup_mmio(struct intel_uncore *uncore)
+int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr)
 {
        struct drm_i915_private *i915 = uncore->i915;
-       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-       int mmio_bar;
        int mmio_size;
 
-       mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
        /*
         * Before gen4, the registers and the GTT are behind different BARs.
         * However, from gen4 onwards, the registers and the GTT are shared
@@ -2063,7 +2060,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore)
        else
                mmio_size = 2 * 1024 * 1024;
 
-       uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size);
+       uncore->regs = ioremap(phys_addr, mmio_size);
        if (uncore->regs == NULL) {
                drm_err(&i915->drm, "failed to map registers\n");
                return -EIO;
@@ -2074,9 +2071,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore)
 
 void intel_uncore_cleanup_mmio(struct intel_uncore *uncore)
 {
-       struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev);
-
-       pci_iounmap(pdev, uncore->regs);
+       iounmap(uncore->regs);
 }
 
 void intel_uncore_init_early(struct intel_uncore *uncore,
@@ -2464,17 +2459,46 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore,
        return fw_domains;
 }
 
-u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
-                                          i915_reg_t reg,
-                                          int slice, int subslice)
+/**
+ * uncore_rw_with_mcr_steering_fw - Access a register after programming
+ *                                 the MCR selector register.
+ * @uncore: pointer to struct intel_uncore
+ * @reg: register being accessed
+ * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
+ * @slice: slice number (ignored for multi-cast write)
+ * @subslice: sub-slice number (ignored for multi-cast write)
+ * @value: register value to be written (ignored for read)
+ *
+ * Return: 0 for write access. register value for read access.
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore,
+                                         i915_reg_t reg, u8 rw_flag,
+                                         int slice, int subslice, u32 value)
 {
-       u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
+       u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
        lockdep_assert_held(&uncore->lock);
 
        if (GRAPHICS_VER(uncore->i915) >= 11) {
                mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
                mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
+
+               /*
+                * Wa_22013088509
+                *
+                * The setting of the multicast/unicast bit usually wouldn't
+                * matter for read operations (which always return the value
+                * from a single register instance regardless of how that bit
+                * is set), but some platforms have a workaround requiring us
+                * to remain in multicast mode for reads.  There's no real
+                * downside to this, so we'll just go ahead and do so on all
+                * platforms; we'll only clear the multicast bit from the mask
+                * when exlicitly doing a write operation.
+                */
+               if (rw_flag == FW_REG_WRITE)
+                       mcr_mask |= GEN11_MCR_MULTICAST;
        } else {
                mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
                mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
@@ -2486,7 +2510,10 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
        mcr |= mcr_ss;
        intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
 
-       val = intel_uncore_read_fw(uncore, reg);
+       if (rw_flag == FW_REG_READ)
+               val = intel_uncore_read_fw(uncore, reg);
+       else
+               intel_uncore_write_fw(uncore, reg, value);
 
        mcr &= ~mcr_mask;
        mcr |= old_mcr & mcr_mask;
@@ -2496,14 +2523,16 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
        return val;
 }
 
-u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
-                                       i915_reg_t reg, int slice, int subslice)
+static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore,
+                                      i915_reg_t reg, u8 rw_flag,
+                                      int slice, int subslice,
+                                      u32 value)
 {
        enum forcewake_domains fw_domains;
        u32 val;
 
        fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
-                                                   FW_REG_READ);
+                                                   rw_flag);
        fw_domains |= intel_uncore_forcewake_for_reg(uncore,
                                                     GEN8_MCR_SELECTOR,
                                                     FW_REG_READ | FW_REG_WRITE);
@@ -2511,7 +2540,8 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
        spin_lock_irq(&uncore->lock);
        intel_uncore_forcewake_get__locked(uncore, fw_domains);
 
-       val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice);
+       val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag,
+                                            slice, subslice, value);
 
        intel_uncore_forcewake_put__locked(uncore, fw_domains);
        spin_unlock_irq(&uncore->lock);
@@ -2519,6 +2549,28 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
        return val;
 }
 
+u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
+                                          i915_reg_t reg, int slice, int subslice)
+{
+       return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ,
+                                             slice, subslice, 0);
+}
+
+u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
+                                       i915_reg_t reg, int slice, int subslice)
+{
+       return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ,
+                                          slice, subslice, 0);
+}
+
+void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
+                                         i915_reg_t reg, u32 value,
+                                         int slice, int subslice)
+{
+       uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE,
+                                   slice, subslice, value);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_uncore.c"
 #include "selftests/intel_uncore.c"
index 6ff56d6..52fe3d8 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/notifier.h>
 #include <linux/hrtimer.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/types.h>
 
 #include "i915_reg_defs.h"
 
@@ -214,12 +215,14 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
                                           int slice, int subslice);
 u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
                                        i915_reg_t reg, int slice, int subslice);
-
+void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
+                                         i915_reg_t reg, u32 value,
+                                         int slice, int subslice);
 void
 intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug);
 void intel_uncore_init_early(struct intel_uncore *uncore,
                             struct intel_gt *gt);
-int intel_uncore_setup_mmio(struct intel_uncore *uncore);
+int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr);
 int intel_uncore_init_mmio(struct intel_uncore *uncore);
 void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
                                          struct intel_gt *gt);
index ab75119..5c9bfa4 100644 (file)
@@ -1205,7 +1205,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
                goto out_free;
        }
        GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
-       GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
+       assert_vm_alive(&ppgtt->vm);
 
        err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
@@ -1438,7 +1438,7 @@ static void track_vma_bind(struct i915_vma *vma)
        vma->resource->bi.pages = vma->pages;
 
        mutex_lock(&vma->vm->mutex);
-       list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+       list_move_tail(&vma->vm_link, &vma->vm->bound_list);
        mutex_unlock(&vma->vm->mutex);
 }
 
index 573d9b2..9c31a16 100644 (file)
@@ -73,7 +73,7 @@ static void mock_device_release(struct drm_device *dev)
        destroy_workqueue(i915->wq);
 
        intel_region_ttm_device_fini(i915);
-       intel_gt_driver_late_release(to_gt(i915));
+       intel_gt_driver_late_release_all(i915);
        intel_memory_regions_driver_release(i915);
 
        drm_mode_config_cleanup(&i915->drm);
@@ -112,6 +112,11 @@ static struct dev_pm_domain pm_domain = {
        },
 };
 
+static void mock_gt_probe(struct drm_i915_private *i915)
+{
+       i915->gt[0] = &i915->gt0;
+}
+
 struct drm_i915_private *mock_gem_device(void)
 {
 #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
@@ -180,11 +185,11 @@ struct drm_i915_private *mock_gem_device(void)
        spin_lock_init(&i915->gpu_error.lock);
 
        i915_gem_init__mm(i915);
-       intel_gt_init_early(to_gt(i915), i915);
-       __intel_gt_init_early(to_gt(i915), i915);
+       intel_root_gt_init_early(i915);
        mock_uncore_init(&i915->uncore, i915);
        atomic_inc(&to_gt(i915)->wakeref.count); /* disable; no hw support */
        to_gt(i915)->awake = -ENODEV;
+       mock_gt_probe(i915);
 
        ret = intel_region_ttm_device_init(i915);
        if (ret)
@@ -229,7 +234,7 @@ err_unlock:
 err_drv:
        intel_region_ttm_device_fini(i915);
 err_ttm:
-       intel_gt_driver_late_release(to_gt(i915));
+       intel_gt_driver_late_release_all(i915);
        intel_memory_regions_driver_release(i915);
        drm_mode_config_cleanup(&i915->drm);
        mock_destroy_device(i915);
index f643254..670557c 100644 (file)
@@ -26,6 +26,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
        int err;
 
        obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
+                                                     obj->bo_offset,
                                                      obj->base.size,
                                                      obj->flags);
        if (IS_ERR(obj->mm.res))
@@ -57,6 +58,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = {
 
 static int mock_object_init(struct intel_memory_region *mem,
                            struct drm_i915_gem_object *obj,
+                           resource_size_t offset,
                            resource_size_t size,
                            resource_size_t page_size,
                            unsigned int flags)
@@ -70,6 +72,8 @@ static int mock_object_init(struct intel_memory_region *mem,
        drm_gem_private_object_init(&i915->drm, &obj->base, size);
        i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class, flags);
 
+       obj->bo_offset = offset;
+
        obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
 
        i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
index 05c3642..9ab021c 100644 (file)
@@ -2683,6 +2683,9 @@ struct drm_i915_perf_oa_config {
  *
  * The behaviour is determined by the @query_id. Note that exactly what
  * @data_ptr is also depends on the specific @query_id.
+ *
+ * For specific queries see:
+ *  * `GuC HWCONFIG blob uAPI`_
  */
 struct drm_i915_query_item {
        /** @query_id: The id for this query */
@@ -2691,6 +2694,7 @@ struct drm_i915_query_item {
 #define DRM_I915_QUERY_ENGINE_INFO     2
 #define DRM_I915_QUERY_PERF_CONFIG      3
 #define DRM_I915_QUERY_MEMORY_REGIONS   4
+#define DRM_I915_QUERY_HWCONFIG_BLOB   5
 /* Must be kept compact -- no holes and well documented */
 
        /**
@@ -3135,6 +3139,16 @@ struct drm_i915_query_memory_regions {
 };
 
 /**
+ * DOC: GuC HWCONFIG blob uAPI
+ *
+ * The GuC produces a blob with information about the current device.
+ * i915 reads this blob from GuC and makes it available via this uAPI.
+ *
+ * The format and meaning of the blob content are documented in the
+ * Programmer's Reference Manual.
+ */
+
+/**
  * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
  * extension support using struct i915_user_extension.
  *