r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ if (r)
+ return r;
}
}
r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ if (r)
+ return r;
}
}
return r;
}
+static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_bo *bo, *tmp;
+ struct dma_fence *fence = NULL, *next = NULL;
+ long r = 1;
+ int i = 0;
+ long tmo;
+
+ if (amdgpu_sriov_runtime(adev))
+ tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+ else
+ tmo = msecs_to_jiffies(100);
+
+ DRM_INFO("recover vram bo from shadow start\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+ next = NULL;
+ amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+ if (fence) {
+ r = dma_fence_wait_timeout(fence, false, tmo);
+ if (r == 0)
+ pr_err("wait fence %p[%d] timeout\n", fence, i);
+ else if (r < 0)
+ pr_err("wait fence %p[%d] interrupted\n", fence, i);
+ if (r < 1) {
+ dma_fence_put(fence);
+ fence = next;
+ break;
+ }
+ i++;
+ }
+
+ dma_fence_put(fence);
+ fence = next;
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+
+ if (fence) {
+ r = dma_fence_wait_timeout(fence, false, tmo);
+ if (r == 0)
+ pr_err("wait fence %p[%d] timeout\n", fence, i);
+ else if (r < 0)
+ pr_err("wait fence %p[%d] interrupted\n", fence, i);
+
+ }
+ dma_fence_put(fence);
+
+ if (r > 0)
+ DRM_INFO("recover vram bo from shadow done\n");
+ else
+ DRM_ERROR("recover vram bo from shadow failed\n");
+
+ return (r > 0?0:1);
+}
+
/*
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
*
* @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
*
* attempt to do soft-reset or full-reset and reinitialize Asic
* return 0 means successed otherwise failed
*/
-static int amdgpu_device_reset(struct amdgpu_device *adev,
- uint64_t* reset_flags)
+static int amdgpu_device_reset(struct amdgpu_device *adev)
{
bool need_full_reset, vram_lost = 0;
int r;
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
-
}
if (need_full_reset) {
}
}
- if (reset_flags) {
- if (vram_lost)
- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-
- if (need_full_reset)
- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
- }
+ if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
+ r = amdgpu_device_handle_vram_lost(adev);
return r;
}
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
*
* do VF FLR and reinitialize Asic
* return 0 means successed otherwise failed
*/
-static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
- uint64_t *reset_flags,
- bool from_hypervisor)
+static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor)
{
int r;
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
if (r)
goto error;
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
- if (r)
- dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
-error:
- /* release full control of GPU after ib test */
- amdgpu_virt_release_full_gpu(adev, true);
-
- if (reset_flags) {
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
- atomic_inc(&adev->vram_lost_counter);
- }
-
- /* VF FLR or hotlink reset is always full-reset */
- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+ if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ atomic_inc(&adev->vram_lost_counter);
+ r = amdgpu_device_handle_vram_lost(adev);
}
+error:
+
return r;
}
struct amdgpu_job *job, bool force)
{
struct drm_atomic_state *state = NULL;
- uint64_t reset_flags = 0;
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
}
if (amdgpu_sriov_vf(adev))
- r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
+ r = amdgpu_device_reset_sriov(adev, job ? false : true);
else
- r = amdgpu_device_reset(adev, &reset_flags);
-
- if (!r) {
- if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
- (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct amdgpu_bo *bo, *tmp;
- struct dma_fence *fence = NULL, *next = NULL;
-
- DRM_INFO("recover vram bo from shadow\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
- next = NULL;
- amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
- if (fence) {
- r = dma_fence_wait(fence, false);
- if (r) {
- WARN(r, "recovery from shadow isn't completed\n");
- break;
- }
- }
-
- dma_fence_put(fence);
- fence = next;
- }
- mutex_unlock(&adev->shadow_list_lock);
- if (fence) {
- r = dma_fence_wait(fence, false);
- if (r)
- WARN(r, "recovery from shadow isn't completed\n");
- }
- dma_fence_put(fence);
- }
- }
+ r = amdgpu_device_reset(adev);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];