winsys/amdgpu: add a kernel GDS management workaround retrying on -ENOMEM
authorMarek Olšák <marek.olsak@amd.com>
Mon, 6 Jun 2022 11:26:34 +0000 (07:26 -0400)
committerMarge Bot <emma+marge@anholt.net>
Sat, 11 Jun 2022 11:14:16 +0000 (11:14 +0000)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>

src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index f507847..2b03a62 100644 (file)
@@ -1628,14 +1628,26 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
 
       assert(num_chunks <= ARRAY_SIZE(chunks));
 
-      r = noop ? 0 : amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
-                                           num_chunks, chunks, &seq_no);
+      r = 0;
+
+      if (!noop) {
+         /* The kernel returns -ENOMEM with many parallel processes using GDS such as test suites
+          * quite often, but it eventually succeeds after enough attempts. This happens frequently
+          * with dEQP using NGG streamout.
+          */
+         do {
+            /* Wait 1 ms and try again. */
+            if (r == -ENOMEM)
+               os_time_sleep(1000);
+
+            r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
+                                      num_chunks, chunks, &seq_no);
+         } while (r == -ENOMEM);
+      }
    }
 
    if (r) {
-      if (r == -ENOMEM)
-         fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
-      else if (r == -ECANCELED)
+      if (r == -ECANCELED)
          fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
       else
          fprintf(stderr, "amdgpu: The CS has been rejected, "