drm/radeon: add fault decode function for cayman/TN (v2)
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 13 Jun 2013 22:26:25 +0000 (18:26 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Sun, 14 Jul 2013 14:11:28 +0000 (10:11 -0400)
Helpful for debugging GPUVM errors as we can see what
hw block and page generated the fault in the log.

v2: simplify fault decoding

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/nid.h

index e49059d..526e428 100644 (file)
@@ -139,6 +139,8 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 void evergreen_program_aspm(struct radeon_device *rdev);
 extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
                                     int ring, u32 cp_int_cntl);
+extern void cayman_vm_decode_fault(struct radeon_device *rdev,
+                                  u32 status, u32 addr);
 
 static const u32 evergreen_golden_registers[] =
 {
@@ -4586,6 +4588,7 @@ int evergreen_irq_process(struct radeon_device *rdev)
        bool queue_hotplug = false;
        bool queue_hdmi = false;
        bool queue_thermal = false;
+       u32 status, addr;
 
        if (!rdev->ih.enabled || rdev->shutdown)
                return IRQ_NONE;
@@ -4872,11 +4875,14 @@ restart_ih:
                        break;
                case 146:
                case 147:
+                       addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
+                       status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+                               addr);
                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+                               status);
+                       cayman_vm_decode_fault(rdev, status, addr);
                        /* reset addr and status */
                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
                        break;
index 465b17e..56bd4f3 100644 (file)
@@ -2450,6 +2450,167 @@ void cayman_vm_fini(struct radeon_device *rdev)
 {
 }
 
+/**
+ * cayman_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (cayman/TN).
+ */
+void cayman_vm_decode_fault(struct radeon_device *rdev,
+                           u32 status, u32 addr)
+{
+       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+       char *block;
+
+       switch (mc_id) {
+       case 32:
+       case 16:
+       case 96:
+       case 80:
+       case 160:
+       case 144:
+       case 224:
+       case 208:
+               block = "CB";
+               break;
+       case 33:
+       case 17:
+       case 97:
+       case 81:
+       case 161:
+       case 145:
+       case 225:
+       case 209:
+               block = "CB_FMASK";
+               break;
+       case 34:
+       case 18:
+       case 98:
+       case 82:
+       case 162:
+       case 146:
+       case 226:
+       case 210:
+               block = "CB_CMASK";
+               break;
+       case 35:
+       case 19:
+       case 99:
+       case 83:
+       case 163:
+       case 147:
+       case 227:
+       case 211:
+               block = "CB_IMMED";
+               break;
+       case 36:
+       case 20:
+       case 100:
+       case 84:
+       case 164:
+       case 148:
+       case 228:
+       case 212:
+               block = "DB";
+               break;
+       case 37:
+       case 21:
+       case 101:
+       case 85:
+       case 165:
+       case 149:
+       case 229:
+       case 213:
+               block = "DB_HTILE";
+               break;
+       case 38:
+       case 22:
+       case 102:
+       case 86:
+       case 166:
+       case 150:
+       case 230:
+       case 214:
+               block = "SX";
+               break;
+       case 39:
+       case 23:
+       case 103:
+       case 87:
+       case 167:
+       case 151:
+       case 231:
+       case 215:
+               block = "DB_STEN";
+               break;
+       case 40:
+       case 24:
+       case 104:
+       case 88:
+       case 232:
+       case 216:
+       case 168:
+       case 152:
+               block = "TC_TFETCH";
+               break;
+       case 41:
+       case 25:
+       case 105:
+       case 89:
+       case 233:
+       case 217:
+       case 169:
+       case 153:
+               block = "TC_VFETCH";
+               break;
+       case 42:
+       case 26:
+       case 106:
+       case 90:
+       case 234:
+       case 218:
+       case 170:
+       case 154:
+               block = "VC";
+               break;
+       case 112:
+               block = "CP";
+               break;
+       case 113:
+       case 114:
+               block = "SH";
+               break;
+       case 115:
+               block = "VGT";
+               break;
+       case 178:
+               block = "IH";
+               break;
+       case 51:
+               block = "RLC";
+               break;
+       case 55:
+               block = "DMA";
+               break;
+       case 56:
+               block = "HDP";
+               break;
+       default:
+               block = "unknown";
+               break;
+       }
+
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+              protections, vmid, addr,
+              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+              block, mc_id);
+}
+
 #define R600_ENTRY_VALID   (1 << 0)
 #define R600_PTE_SYSTEM    (1 << 1)
 #define R600_PTE_SNOOPED   (1 << 2)
index fe24a93..22421bc 100644 (file)
 #define VM_CONTEXT1_CNTL2                              0x1434
 #define VM_INVALIDATE_REQUEST                          0x1478
 #define VM_INVALIDATE_RESPONSE                         0x147c
+#define        VM_CONTEXT1_PROTECTION_FAULT_ADDR               0x14FC
+#define        VM_CONTEXT1_PROTECTION_FAULT_STATUS             0x14DC
+#define                PROTECTIONS_MASK                        (0xf << 0)
+#define                PROTECTIONS_SHIFT                       0
+               /* bit 0: range
+                * bit 2: pde0
+                * bit 3: valid
+                * bit 4: read
+                * bit 5: write
+                */
+#define                MEMORY_CLIENT_ID_MASK                   (0xff << 12)
+#define                MEMORY_CLIENT_ID_SHIFT                  12
+#define                MEMORY_CLIENT_RW_MASK                   (1 << 24)
+#define                MEMORY_CLIENT_RW_SHIFT                  24
+#define                FAULT_VMID_MASK                         (0x7 << 25)
+#define                FAULT_VMID_SHIFT                        25
 #define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR      0x1518
 #define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR      0x151c
 #define        VM_CONTEXT0_PAGE_TABLE_BASE_ADDR                0x153C