drm/amdgpu: check if vram is lost v2
authorChunming Zhou <David1.Zhou@amd.com>
Mon, 15 May 2017 06:20:00 +0000 (14:20 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 22:10:06 +0000 (18:10 -0400)
backup first 64 byte of gart table as reset magic, check if magic is same
after gpu hw reset.
v2: use memcmp instead of manual innovation.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 8274d8e..ec9774c 100644 (file)
@@ -1427,6 +1427,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
+#define AMDGPU_RESET_MAGIC_NUM 64
 struct amdgpu_device {
        struct device                   *dev;
        struct drm_device               *ddev;
@@ -1619,6 +1620,7 @@ struct amdgpu_device {
 
        /* record hw reset is performed */
        bool has_hw_reset;
+       u8                              reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
 };
 
index 8eb1625..5a17007 100644 (file)
@@ -1658,6 +1658,17 @@ static int amdgpu_init(struct amdgpu_device *adev)
        return 0;
 }
 
+static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
+{
+       memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
+{
+       return !!memcmp(adev->gart.ptr, adev->reset_magic,
+                       AMDGPU_RESET_MAGIC_NUM);
+}
+
 static int amdgpu_late_init(struct amdgpu_device *adev)
 {
        int i = 0, r;
@@ -1688,6 +1699,8 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
                }
        }
 
+       amdgpu_fill_reset_magic(adev);
+
        return 0;
 }
 
@@ -2762,7 +2775,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
        int i, r;
        int resched;
-       bool need_full_reset;
+       bool need_full_reset, vram_lost = false;
 
        if (!amdgpu_check_soft_reset(adev)) {
                DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2825,12 +2838,17 @@ retry:
                        r = amdgpu_resume_phase1(adev);
                        if (r)
                                goto out;
+                       vram_lost = amdgpu_check_vram_lost(adev);
+                       if (vram_lost)
+                               DRM_ERROR("VRAM is lost!\n");
                        r = amdgpu_ttm_recover_gart(adev);
                        if (r)
                                goto out;
                        r = amdgpu_resume_phase2(adev);
                        if (r)
                                goto out;
+                       if (vram_lost)
+                               amdgpu_fill_reset_magic(adev);
                }
        }
 out: