anv: Use the new common device lost tracking
authorJason Ekstrand <jason@jlekstrand.net>
Tue, 19 Oct 2021 23:44:01 +0000 (18:44 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Tue, 16 Nov 2021 16:02:08 +0000 (10:02 -0600)
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13427>

src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_perf.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_queue.c
src/intel/vulkan/genX_query.c

index ca54807..70e1cbf 100644 (file)
@@ -2018,9 +2018,9 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
          int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
                                (void *)(uintptr_t) query_info->oa_metrics_set_id);
          if (ret < 0) {
-            result = anv_device_set_lost(device,
-                                         "i915-perf config failed: %s",
-                                         strerror(errno));
+            result = vk_device_set_lost(&device->vk,
+                                        "i915-perf config failed: %s",
+                                        strerror(errno));
          }
       }
 
@@ -2043,13 +2043,13 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
       int ret = queue->device->info.no_hw ? 0 :
          anv_gem_execbuffer(queue->device, &query_pass_execbuf);
       if (ret)
-         result = anv_queue_set_lost(queue, "execbuf2 failed: %m");
+         result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
    }
 
    int ret = queue->device->info.no_hw ? 0 :
       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
    if (ret)
-      result = anv_queue_set_lost(queue, "execbuf2 failed: %m");
+      result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
 
    struct drm_i915_gem_exec_object2 *objects = execbuf.objects;
    for (uint32_t k = 0; k < execbuf.bo_count; k++) {
index aedcacc..a77152b 100644 (file)
@@ -3010,7 +3010,6 @@ VkResult anv_CreateDevice(
    }
 
    device->physical = physical_device;
-   device->_lost = false;
 
    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
@@ -3439,74 +3438,6 @@ VkResult anv_EnumerateInstanceLayerProperties(
    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
 }
 
-void
-_anv_device_report_lost(struct anv_device *device)
-{
-   assert(p_atomic_read(&device->_lost) > 0);
-
-   device->lost_reported = true;
-
-   for (uint32_t i = 0; i < device->queue_count; i++) {
-      struct anv_queue *queue = &device->queues[i];
-      if (queue->lost) {
-         __vk_errorf(queue, VK_ERROR_DEVICE_LOST,
-                     queue->error_file, queue->error_line,
-                     "%s", queue->error_msg);
-      }
-   }
-}
-
-VkResult
-_anv_device_set_lost(struct anv_device *device,
-                     const char *file, int line,
-                     const char *msg, ...)
-{
-   VkResult err;
-   va_list ap;
-
-   if (p_atomic_read(&device->_lost) > 0)
-      return VK_ERROR_DEVICE_LOST;
-
-   p_atomic_inc(&device->_lost);
-   device->lost_reported = true;
-
-   va_start(ap, msg);
-   err = __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap);
-   va_end(ap);
-
-   if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
-      abort();
-
-   return err;
-}
-
-VkResult
-_anv_queue_set_lost(struct anv_queue *queue,
-                     const char *file, int line,
-                     const char *msg, ...)
-{
-   va_list ap;
-
-   if (queue->lost)
-      return VK_ERROR_DEVICE_LOST;
-
-   queue->lost = true;
-
-   queue->error_file = file;
-   queue->error_line = line;
-   va_start(ap, msg);
-   vsnprintf(queue->error_msg, sizeof(queue->error_msg),
-             msg, ap);
-   va_end(ap);
-
-   p_atomic_inc(&queue->device->_lost);
-
-   if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
-      abort();
-
-   return VK_ERROR_DEVICE_LOST;
-}
-
 VkResult
 anv_device_query_status(struct anv_device *device)
 {
@@ -3514,7 +3445,7 @@ anv_device_query_status(struct anv_device *device)
     * for it.  However, it doesn't hurt to check and it potentially lets us
     * avoid an ioctl.
     */
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    uint32_t active, pending;
@@ -3522,13 +3453,13 @@ anv_device_query_status(struct anv_device *device)
                                              &active, &pending);
    if (ret == -1) {
       /* We don't know the real error. */
-      return anv_device_set_lost(device, "get_reset_stats failed: %m");
+      return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
    }
 
    if (active) {
-      return anv_device_set_lost(device, "GPU hung on one of our command buffers");
+      return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
    } else if (pending) {
-      return anv_device_set_lost(device, "GPU hung with commands in-flight");
+      return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
    }
 
    return VK_SUCCESS;
@@ -3546,7 +3477,7 @@ anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
       return VK_NOT_READY;
    } else if (ret == -1) {
       /* We don't know the real error. */
-      return anv_device_set_lost(device, "gem wait failed: %m");
+      return vk_device_set_lost(&device->vk, "gem wait failed: %m");
    }
 
    /* Query for device status after the busy call.  If the BO we're checking
@@ -3567,7 +3498,7 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo,
       return VK_TIMEOUT;
    } else if (ret == -1) {
       /* We don't know the real error. */
-      return anv_device_set_lost(device, "gem wait failed: %m");
+      return vk_device_set_lost(&device->vk, "gem wait failed: %m");
    }
 
    /* Query for device status after the wait.  If the BO we're waiting on got
@@ -4198,7 +4129,7 @@ VkResult anv_QueueBindSparse(
     VkFence                                     fence)
 {
    ANV_FROM_HANDLE(anv_queue, queue, _queue);
-   if (anv_device_is_lost(queue->device))
+   if (vk_device_is_lost(&queue->device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
@@ -4254,7 +4185,7 @@ VkResult anv_GetEventStatus(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_event, event, _event);
 
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    return *(uint64_t *)event->state.map;
@@ -4609,8 +4540,8 @@ VkResult anv_GetCalibratedTimestampsEXT(
                                 &pTimestamps[d]);
 
          if (ret != 0) {
-            return anv_device_set_lost(device, "Failed to read the TIMESTAMP "
-                                               "register: %m");
+            return vk_device_set_lost(&device->vk, "Failed to read the "
+                                      "TIMESTAMP register: %m");
          }
          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
          max_clock_period = MAX2(max_clock_period, device_period);
index 7e8a3c5..e7bf30b 100644 (file)
@@ -285,7 +285,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
          int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
                                (void *)(uintptr_t) config->config_id);
          if (ret < 0)
-            return anv_device_set_lost(device, "i915-perf config failed: %m");
+            return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
       }
    }
 
index c7a4b4d..b6a3a0f 100644 (file)
@@ -1074,15 +1074,6 @@ struct anv_queue {
 
    uint32_t                                  exec_flags;
 
-   /* Set once from the device api calls. */
-   bool                                      lost_signaled;
-
-   /* Only set once atomically by the queue */
-   int                                       lost;
-   int                                       error_line;
-   const char *                              error_file;
-   char                                      error_msg[80];
-
    /*
     * This mutext protects the variables below.
     */
@@ -1241,8 +1232,6 @@ struct anv_device {
 
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
-    int                                         _lost;
-    int                                         lost_reported;
 
     struct intel_batch_decode_ctx               decoder_ctx;
     /*
@@ -1322,31 +1311,6 @@ anv_mocs(const struct anv_device *device,
 void anv_device_init_blorp(struct anv_device *device);
 void anv_device_finish_blorp(struct anv_device *device);
 
-void _anv_device_report_lost(struct anv_device *device);
-VkResult _anv_device_set_lost(struct anv_device *device,
-                              const char *file, int line,
-                              const char *msg, ...)
-   anv_printflike(4, 5);
-VkResult _anv_queue_set_lost(struct anv_queue *queue,
-                              const char *file, int line,
-                              const char *msg, ...)
-   anv_printflike(4, 5);
-#define anv_device_set_lost(dev, ...) \
-   _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
-#define anv_queue_set_lost(queue, ...) \
-   (queue)->device->has_thread_submit ? \
-   _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
-   _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
-
-static inline bool
-anv_device_is_lost(struct anv_device *device)
-{
-   int lost = p_atomic_read(&device->_lost);
-   if (unlikely(lost && !device->lost_reported))
-      _anv_device_report_lost(device);
-   return lost;
-}
-
 VkResult anv_device_query_status(struct anv_device *device);
 
 
index d1cbbac..c8a67a4 100644 (file)
@@ -389,7 +389,7 @@ anv_queue_task(void *_queue)
           * wakeup the second queue thread first, this would make that execbuf
           * fail because the dma-fence it depends on hasn't materialized yet.
           */
-         if (!queue->lost && submit->wait_timeline_count > 0) {
+         if (!vk_queue_is_lost(&queue->vk) && submit->wait_timeline_count > 0) {
             int ret = queue->device->info.no_hw ? 0 :
                anv_gem_syncobj_timeline_wait(
                   queue->device, submit->wait_timeline_syncobjs,
@@ -397,13 +397,13 @@ anv_queue_task(void *_queue)
                   anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
                   true /* wait for all */, true /* wait for materialize */);
             if (ret) {
-               result = anv_queue_set_lost(queue, "timeline timeout: %s",
-                                           strerror(errno));
+               result = vk_queue_set_lost(&queue->vk, "timeline timeout: %s",
+                                          strerror(errno));
             }
          }
 
          /* Now submit */
-         if (!queue->lost) {
+         if (!vk_queue_is_lost(&queue->vk)) {
             pthread_mutex_lock(&queue->device->mutex);
             result = anv_queue_execbuf_locked(queue, submit);
             pthread_mutex_unlock(&queue->device->mutex);
@@ -459,7 +459,7 @@ anv_queue_submit_post(struct anv_queue *queue,
             int ret = pthread_cond_wait(&queue->device->queue_submit,
                                         &queue->device->mutex);
             if (ret != 0) {
-               result = anv_device_set_lost(queue->device, "wait timeout");
+               result = vk_device_set_lost(&queue->device->vk, "wait timeout");
                break;
             }
 
@@ -491,7 +491,6 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue,
    queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
 
    queue->exec_flags = exec_flags;
-   queue->lost = false;
    queue->quit = false;
 
    list_inithead(&queue->queued_submits);
@@ -800,7 +799,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
       if (has_syncobj_wait) {
          if (anv_gem_syncobj_wait(device, &syncobj, 1,
                                   anv_get_absolute_timeout(INT64_MAX), true))
-            result = anv_device_set_lost(device, "anv_gem_syncobj_wait failed: %m");
+            result = vk_device_set_lost(&device->vk, "anv_gem_syncobj_wait failed: %m");
          anv_gem_syncobj_destroy(device, syncobj);
       } else {
          result = anv_device_wait(device, sync_bo,
@@ -1004,8 +1003,8 @@ anv_queue_submit_add_in_semaphore(struct anv_queue *queue,
                                        true /* wait_all */,
                                        true /* wait_materialize */);
       if (ret != 0) {
-         return anv_queue_set_lost(queue,
-                                   "unable to wait on syncobj to materialize");
+         return vk_queue_set_lost(&queue->vk,
+                                  "unable to wait on syncobj to materialize");
       }
    }
 
@@ -1459,7 +1458,7 @@ out:
        * anv_device_set_lost() would have been called already by a callee of
        * anv_queue_submit().
        */
-      result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
+      result = vk_device_set_lost(&device->vk, "vkQueueSubmit2KHR() failed");
    }
 
    return result;
@@ -1470,7 +1469,7 @@ VkResult anv_QueueWaitIdle(
 {
    ANV_FROM_HANDLE(anv_queue, queue, _queue);
 
-   if (anv_device_is_lost(queue->device))
+   if (vk_device_is_lost(&queue->device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    return anv_queue_submit_simple_batch(queue, NULL);
@@ -1626,7 +1625,7 @@ VkResult anv_GetFenceStatus(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_fence, fence, _fence);
 
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    struct anv_fence_impl *impl =
@@ -1670,7 +1669,7 @@ VkResult anv_GetFenceStatus(
                return VK_NOT_READY;
             } else {
                /* We don't know the real error. */
-               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+               return vk_device_set_lost(&device->vk, "drm_syncobj_wait failed: %m");
             }
          } else {
             return VK_SUCCESS;
@@ -1682,7 +1681,7 @@ VkResult anv_GetFenceStatus(
                return VK_NOT_READY;
             } else {
                /* We don't know the real error. */
-               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+               return vk_device_set_lost(&device->vk, "drm_syncobj_wait failed: %m");
             }
          } else {
             return VK_SUCCESS;
@@ -1737,7 +1736,7 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
          return VK_TIMEOUT;
       } else {
          /* We don't know the real error. */
-         return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+         return vk_device_set_lost(&device->vk, "drm_syncobj_wait failed: %m");
       }
    } else {
       return VK_SUCCESS;
@@ -1850,7 +1849,7 @@ anv_wait_for_bo_fences(struct anv_device *device,
    }
 
 done:
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    return result;
@@ -1953,7 +1952,7 @@ VkResult anv_WaitForFences(
    if (device->info.no_hw)
       return VK_SUCCESS;
 
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    uint64_t abs_timeout = anv_get_absolute_timeout(timeout);
@@ -2104,7 +2103,7 @@ wait_syncobj_materialize(struct anv_device *device,
                                      anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
                                      true /* wait_all */,
                                      true /* wait_materialize */))
-      return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
+      return vk_device_set_lost(&device->vk, "anv_gem_syncobj_timeline_wait failed: %m");
 
    return VK_SUCCESS;
 }
@@ -2555,7 +2554,7 @@ VkResult anv_GetSemaphoreCounterValue(
       int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
 
       if (ret != 0)
-         return anv_device_set_lost(device, "unable to query timeline syncobj");
+         return vk_device_set_lost(&device->vk, "unable to query timeline syncobj");
 
       return VK_SUCCESS;
    }
@@ -2728,7 +2727,7 @@ VkResult anv_WaitSemaphores(
                                           false);
          if (ret != 0)
             result = errno == ETIME ? VK_TIMEOUT :
-               anv_device_set_lost(device, "unable to wait on timeline syncobj");
+               vk_device_set_lost(&device->vk, "unable to wait on timeline syncobj");
       } else {
          result =
             anv_timelines_wait(device, timelines, values, handle_count,
@@ -2782,7 +2781,7 @@ VkResult anv_SignalSemaphore(
                                                 &pSignalInfo->value, 1);
 
       return ret == 0 ? VK_SUCCESS :
-         anv_device_set_lost(device, "unable to signal timeline syncobj");
+         vk_device_set_lost(&device->vk, "unable to signal timeline syncobj");
    }
 
    default:
index 075cccf..f66e7d5 100644 (file)
@@ -425,7 +425,7 @@ wait_for_available(struct anv_device *device,
          return status;
    }
 
-   return anv_device_set_lost(device, "query timeout");
+   return vk_device_set_lost(&device->vk, "query timeout");
 }
 
 VkResult genX(GetQueryPoolResults)(
@@ -448,7 +448,7 @@ VkResult genX(GetQueryPoolResults)(
           pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
           pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL);
 
-   if (anv_device_is_lost(device))
+   if (vk_device_is_lost(&device->vk))
       return VK_ERROR_DEVICE_LOST;
 
    if (pData == NULL)