drm/amdgpu: add option to stop on VM fault
authorChristian König <christian.koenig@amd.com>
Mon, 28 Sep 2015 10:31:26 +0000 (12:31 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 14 Oct 2015 20:16:40 +0000 (16:16 -0400)
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

index afc9848..a3dbbd1 100644 (file)
@@ -79,6 +79,7 @@ extern int amdgpu_bapm;
 extern int amdgpu_deep_color;
 extern int amdgpu_vm_size;
 extern int amdgpu_vm_block_size;
+extern int amdgpu_vm_fault_stop;
 extern int amdgpu_enable_scheduler;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
@@ -960,6 +961,11 @@ struct amdgpu_ring {
 #define AMDGPU_PTE_FRAG_64KB   (4 << 7)
 #define AMDGPU_LOG2_PAGES_PER_FRAG 4
 
+/* How to programm VM fault handling */
+#define AMDGPU_VM_FAULT_STOP_NEVER     0
+#define AMDGPU_VM_FAULT_STOP_FIRST     1
+#define AMDGPU_VM_FAULT_STOP_ALWAYS    2
+
 struct amdgpu_vm_pt {
        struct amdgpu_bo                *bo;
        uint64_t                        addr;
index c183772..bec0916 100644 (file)
@@ -75,6 +75,7 @@ int amdgpu_bapm = -1;
 int amdgpu_deep_color = 0;
 int amdgpu_vm_size = 8;
 int amdgpu_vm_block_size = -1;
+int amdgpu_vm_fault_stop = 0;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_enable_scheduler = 1;
 int amdgpu_sched_jobs = 16;
@@ -141,6 +142,9 @@ module_param_named(vm_size, amdgpu_vm_size, int, 0444);
 MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
 module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
 
+MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
+module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
+
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
 module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
 
index fab5471..4883482 100644 (file)
@@ -436,6 +436,33 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
 }
 
 /**
+ * gmc_v8_0_set_fault_enable_default - update VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev,
+                                             bool value)
+{
+       u32 tmp;
+
+       tmp = RREG32(mmVM_CONTEXT1_CNTL);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       WREG32(mmVM_CONTEXT1_CNTL, tmp);
+}
+
+/**
  * gmc_v7_0_gart_enable - gart enable
  *
  * @adev: amdgpu_device pointer
@@ -523,15 +550,13 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
        tmp = RREG32(mmVM_CONTEXT1_CNTL);
        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
                            amdgpu_vm_block_size - 9);
        WREG32(mmVM_CONTEXT1_CNTL, tmp);
+       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+               gmc_v7_0_set_fault_enable_default(adev, false);
+       else
+               gmc_v7_0_set_fault_enable_default(adev, true);
 
        if (adev->asic_type == CHIP_KAVERI) {
                tmp = RREG32(mmCHUB_CONTROL);
@@ -1268,6 +1293,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
        if (!addr && !status)
                return 0;
 
+       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
+               gmc_v7_0_set_fault_enable_default(adev, false);
+
        dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
                entry->src_id, entry->src_data);
        dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
index 7bc9e9f..42b5ff8 100644 (file)
@@ -550,6 +550,35 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
 }
 
 /**
+ * gmc_v8_0_set_fault_enable_default - update VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
+                                             bool value)
+{
+       u32 tmp;
+
+       tmp = RREG32(mmVM_CONTEXT1_CNTL);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                           EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+       WREG32(mmVM_CONTEXT1_CNTL, tmp);
+}
+
+/**
  * gmc_v8_0_gart_enable - gart enable
  *
  * @adev: amdgpu_device pointer
@@ -663,6 +692,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
                            amdgpu_vm_block_size - 9);
        WREG32(mmVM_CONTEXT1_CNTL, tmp);
+       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+               gmc_v8_0_set_fault_enable_default(adev, false);
+       else
+               gmc_v8_0_set_fault_enable_default(adev, true);
 
        gmc_v8_0_gart_flush_gpu_tlb(adev, 0);
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -1268,6 +1301,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
        if (!addr && !status)
                return 0;
 
+       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
+               gmc_v8_0_set_fault_enable_default(adev, false);
+
        dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
                entry->src_id, entry->src_data);
        dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",