drm/amdgpu: add RAS fatal error interrupt handler
authorTao Zhou <tao.zhou1@amd.com>
Tue, 19 Apr 2022 06:45:09 +0000 (14:45 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 22 Apr 2022 18:50:18 +0000 (14:50 -0400)
The fatal error handler is independent from general ras interrupt
handler since there is no related IH ring.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index ea3e8c6..b4cf871 100644 (file)
@@ -193,20 +193,7 @@ static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
        if (ret == IRQ_HANDLED)
                pm_runtime_mark_last_busy(dev->dev);
 
-       /* For the hardware that cannot enable bif ring for both ras_controller_irq
-         * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
-        * register to check whether the interrupt is triggered or not, and properly
-        * ack the interrupt if it is there
-        */
-       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
-               if (adev->nbio.ras &&
-                   adev->nbio.ras->handle_ras_controller_intr_no_bifring)
-                       adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
-
-               if (adev->nbio.ras &&
-                   adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
-                       adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
-       }
+       amdgpu_ras_interrupt_fatal_error_handler(adev);
 
        return ret;
 }
index 1c86ec9..03ce3ce 100644 (file)
@@ -1515,6 +1515,26 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
 /* ras fs end */
 
 /* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
+               return;
+
+       if (adev->nbio.ras &&
+           adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+               adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+       if (adev->nbio.ras &&
+           adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+               adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
 static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
                                struct amdgpu_iv_entry *entry)
 {
index c4b6178..b9a6fac 100644 (file)
@@ -683,4 +683,5 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
 
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
                                struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
 #endif