drm/amdgpu: adjust timeout for ib_ring_tests(v2)
authorMonk Liu <Monk.Liu@amd.com>
Tue, 23 Jan 2018 10:26:20 +0000 (18:26 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 May 2018 05:52:37 +0000 (07:52 +0200)
[ Upstream commit dbf797655a43c6318ebb90b899e6583fcadc6472 ]

issue:
sometime GFX/MM ib test hit timeout under SRIOV env, root cause
is that engine doesn't come back soon enough so the current
IB test considered as timed out.

fix:
for SRIOV GFX IB test wait time need to be expanded a lot during
SRIOV runtimei mode since it couldn't really begin before GFX engine
come back.

for SRIOV MM IB test it always need more time since MM scheduling
is not go together with GFX engine, it is controled by h/w MM
scheduler so no matter runtime or exclusive mode MM IB test
always need more time.

v2:
use ring type instead of idx to judge

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index 659997bfff303b789f9f5fa6ae8ec17b0a02ae5c..cd84bd0b1eafdc571d51fa3e604416ac7088f375 100644 (file)
@@ -322,14 +322,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
        unsigned i;
        int r, ret = 0;
+       long tmo_gfx, tmo_mm;
+
+       tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+       if (amdgpu_sriov_vf(adev)) {
+               /* for MM engines in hypervisor side they are not scheduled together
+                * with CP and SDMA engines, so even in exclusive mode MM engine could
+                * still running on other VF thus the IB TEST TIMEOUT for MM engines
+                * under SR-IOV should be set to a long time. 8 sec should be enough
+                * for the MM comes back to this VF.
+                */
+               tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
+
+       if (amdgpu_sriov_runtime(adev)) {
+               /* for CP & SDMA engines since they are scheduled together so
+                * need to make the timeout width enough to cover the time
+                * cost waiting for it coming back under RUNTIME only
+               */
+               tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                struct amdgpu_ring *ring = adev->rings[i];
+               long tmo;
 
                if (!ring || !ring->ready)
                        continue;
 
-               r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+               /* MM engine need more time */
+               if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+                       tmo = tmo_mm;
+               else
+                       tmo = tmo_gfx;
+
+               r = amdgpu_ring_test_ib(ring, tmo);
                if (r) {
                        ring->ready = false;