drm/amdgpu: Modify xgmi block to fit for the unified ras block data and ops
authoryipechai <YiPeng.Chai@amd.com>
Tue, 4 Jan 2022 10:56:20 +0000 (18:56 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jan 2022 22:51:59 +0000 (17:51 -0500)
1.Modify gmc block to fit for the unified ras block data and ops.
2.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of gmc ras variable so that gmc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into amdgpu device ras block link list.
5.Remove the redundant code about gmc in amdgpu_ras.c after using the unified ras block.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

index 2430d62..d86ee53 100644 (file)
@@ -454,12 +454,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
                        return r;
        }
 
-       if (!adev->gmc.xgmi.connected_to_cpu)
-               adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
+       if (!adev->gmc.xgmi.connected_to_cpu) {
+               adev->gmc.xgmi.ras = &xgmi_ras;
+               amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
+       }
 
-       if (adev->gmc.xgmi.ras_funcs &&
-           adev->gmc.xgmi.ras_funcs->ras_late_init) {
-               r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
+       if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
+               r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
                if (r)
                        return r;
        }
@@ -505,9 +506,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
            adev->mmhub.ras_funcs->ras_fini)
                adev->mmhub.ras_funcs->ras_fini(adev);
 
-       if (adev->gmc.xgmi.ras_funcs &&
-           adev->gmc.xgmi.ras_funcs->ras_fini)
-               adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+       if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
+               adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
 
        if (adev->hdp.ras_funcs &&
            adev->hdp.ras_funcs->ras_fini)
index 8458ceb..0001631 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 
 #include "amdgpu_irq.h"
+#include "amdgpu_ras.h"
 
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_GMC_HOLE_START  0x0000800000000000ULL
@@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs {
        unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
 };
 
-struct amdgpu_xgmi_ras_funcs {
-       int (*ras_late_init)(struct amdgpu_device *adev);
-       void (*ras_fini)(struct amdgpu_device *adev);
-       int (*query_ras_error_count)(struct amdgpu_device *adev,
-                                    void *ras_error_status);
-       void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_xgmi_ras {
+       struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_xgmi {
@@ -159,7 +156,7 @@ struct amdgpu_xgmi {
        struct ras_common_if *ras_if;
        bool connected_to_cpu;
        bool pending_reset;
-       const struct amdgpu_xgmi_ras_funcs *ras_funcs;
+       struct amdgpu_xgmi_ras *ras;
 };
 
 struct amdgpu_gmc {
index a5812c2..28997b7 100644 (file)
@@ -1012,9 +1012,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                        adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
                break;
        case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-               if (adev->gmc.xgmi.ras_funcs &&
-                   adev->gmc.xgmi.ras_funcs->query_ras_error_count)
-                       adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
+               if (!block_obj || !block_obj->hw_ops)   {
+                       dev_info(adev->dev, "%s doesn't config ras function \n",
+                               get_ras_block_str(&info->head));
+                       return -EINVAL;
+               }
+               if (block_obj->hw_ops->query_ras_error_count)
+                       block_obj->hw_ops->query_ras_error_count(adev, &err_data);
                break;
        case AMDGPU_RAS_BLOCK__HDP:
                if (adev->hdp.ras_funcs &&
index e8b8f28..d29acd3 100644 (file)
@@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
        return psp_xgmi_terminate(&adev->psp);
 }
 
-static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
        int r;
        struct ras_ih_if ih_info = {
@@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
            adev->gmc.xgmi.num_physical_nodes == 0)
                return 0;
 
-       adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+       adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
 
        if (!adev->gmc.xgmi.ras_if) {
                adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
@@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
        return 0;
 }
 
-static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
                                             void *ras_error_status)
 {
        struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
        uint32_t ue_cnt = 0, ce_cnt = 0;
 
        if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
-               return -EINVAL;
+               return ;
 
        err_data->ue_count = 0;
        err_data->ce_count = 0;
@@ -940,17 +940,23 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
                break;
        }
 
-       adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+       adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
 
        err_data->ue_count += ue_cnt;
        err_data->ce_count += ce_cnt;
-
-       return 0;
 }
 
-const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
-       .ras_late_init = amdgpu_xgmi_ras_late_init,
-       .ras_fini = amdgpu_xgmi_ras_fini,
+struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
        .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
        .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
 };
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+       .ras_block = {
+               .name = "xgmi",
+               .block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+               .hw_ops = &xgmi_ras_hw_ops,
+               .ras_late_init = amdgpu_xgmi_ras_late_init,
+               .ras_fini = amdgpu_xgmi_ras_fini,
+       },
+};
index d2189bf..0afca51 100644 (file)
@@ -24,7 +24,7 @@
 
 #include <drm/task_barrier.h>
 #include "amdgpu_psp.h"
-
+#include "amdgpu_ras.h"
 
 struct amdgpu_hive_info {
        struct kobject kobj;
@@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field {
        uint32_t pcs_err_shift;
 };
 
-extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
+extern struct amdgpu_xgmi_ras  xgmi_ras;
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
 void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);