From: John Clements Date: Wed, 25 Mar 2020 08:01:14 +0000 (+0800) Subject: drm/amdgpu: disable ras query and iject during gpu reset X-Git-Tag: v5.10.7~2352^2~20^2~241 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=61380faa4b4cc577df8a7ff5db5859bac6b351f7;p=platform%2Fkernel%2Flinux-rpi.git drm/amdgpu: disable ras query and iject during gpu reset added flag to ras context to indicate if ras query functionality is ready Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4497200..f88fe7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4168,6 +4168,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); + amdgpu_ras_set_error_query_ready(adev, false); + dev_info(adev->dev, "GPU %s begin!\n", (in_ras_intr && !use_baco) ? "jobs stop":"reset"); @@ -4224,6 +4226,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (tmp_adev != adev) { + amdgpu_ras_set_error_query_ready(tmp_adev, false); amdgpu_device_lock_adev(tmp_adev, false); if (!amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_pre_reset(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c32a94..9e9e0f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -80,6 +80,20 @@ atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); +void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready) +{ + if (adev) + amdgpu_ras_get_context(adev)->error_query_ready = ready; +} + +bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev) +{ + if (adev) + return amdgpu_ras_get_context(adev)->error_query_ready; + + return false; +} + static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -281,7 +295,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * struct ras_debug_if data; int ret = 0; - if (amdgpu_ras_intr_triggered()) { + if (!amdgpu_ras_get_error_query_ready(adev)) { DRM_WARN("RAS WARN: error injection currently inaccessible\n"); return size; } @@ -399,7 +413,7 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, .head = obj->head, }; - if (amdgpu_ras_intr_triggered()) + if (!amdgpu_ras_get_error_query_ready(obj->adev)) return snprintf(buf, PAGE_SIZE, "Query currently inaccessible\n"); @@ -1886,8 +1900,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || adev->in_gpu_reset) + if (adev->in_suspend || adev->in_gpu_reset) { + amdgpu_ras_set_error_query_ready(adev, true); return 0; + } if (ih_info->cb) { r = amdgpu_ras_interrupt_add_handler(adev, ih_info); @@ -1899,6 +1915,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (r) goto sysfs; + amdgpu_ras_set_error_query_ready(adev, true); + return 0; cleanup: amdgpu_ras_sysfs_remove(adev, ras_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 55c3ece..e7df5d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -334,6 +334,8 @@ struct amdgpu_ras { uint32_t flags; bool reboot; struct amdgpu_ras_eeprom_control eeprom_control; + + bool error_query_ready; }; struct ras_fs_data { @@ -629,4 +631,6 @@ static inline void amdgpu_ras_intr_cleared(void) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); +void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); + #endif