drm/amdgpu: split umc callbacks to ras and non-ras ones
authorHawking Zhang <Hawking.Zhang@amd.com>
Wed, 17 Mar 2021 11:17:52 +0000 (19:17 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Apr 2021 20:51:11 +0000 (16:51 -0400)
umc ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. split umc callbacks
into ras and non-ras ones so gpu driver only
initializes umc ras callbacks when it manages
umc ras.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
12 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
drivers/gpu/drm/amd/amdgpu/umc_v8_7.h

index 082f9d0..3411c65 100644 (file)
@@ -391,8 +391,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
        int r;
 
-       if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
-               r = adev->umc.funcs->ras_late_init(adev);
+       if (adev->umc.ras_funcs &&
+           adev->umc.ras_funcs->ras_late_init) {
+               r = adev->umc.ras_funcs->ras_late_init(adev);
                if (r)
                        return r;
        }
@@ -418,8 +419,12 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
-       amdgpu_umc_ras_fini(adev);
+       if (adev->umc.ras_funcs &&
+           adev->umc.ras_funcs->ras_fini)
+               adev->umc.ras_funcs->ras_fini(adev);
+
        amdgpu_mmhub_ras_fini(adev);
+
        if (adev->gmc.xgmi.ras_funcs &&
            adev->gmc.xgmi.ras_funcs->ras_fini)
                adev->gmc.xgmi.ras_funcs->ras_fini(adev);
index 172738c..459a470 100644 (file)
@@ -774,13 +774,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 
        switch (info->head.block) {
        case AMDGPU_RAS_BLOCK__UMC:
-               if (adev->umc.funcs->query_ras_error_count)
-                       adev->umc.funcs->query_ras_error_count(adev, &err_data);
+               if (adev->umc.ras_funcs &&
+                   adev->umc.ras_funcs->query_ras_error_count)
+                       adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
                /* umc query_ras_error_address is also responsible for clearing
                 * error status
                 */
-               if (adev->umc.funcs->query_ras_error_address)
-                       adev->umc.funcs->query_ras_error_address(adev, &err_data);
+               if (adev->umc.ras_funcs &&
+                   adev->umc.ras_funcs->query_ras_error_address)
+                       adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
                break;
        case AMDGPU_RAS_BLOCK__SDMA:
                if (adev->sdma.funcs->query_ras_error_count) {
index a2975c8..ea6f99b 100644 (file)
@@ -60,8 +60,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
        }
 
        /* ras init of specific umc version */
-       if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
-               adev->umc.funcs->err_cnt_init(adev);
+       if (adev->umc.ras_funcs &&
+           adev->umc.ras_funcs->err_cnt_init)
+               adev->umc.ras_funcs->err_cnt_init(adev);
 
        return 0;
 
@@ -95,12 +96,12 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
        struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 
        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-       if (adev->umc.funcs &&
-           adev->umc.funcs->query_ras_error_count)
-           adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
+       if (adev->umc.ras_funcs &&
+           adev->umc.ras_funcs->query_ras_error_count)
+           adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
 
-       if (adev->umc.funcs &&
-           adev->umc.funcs->query_ras_error_address &&
+       if (adev->umc.ras_funcs &&
+           adev->umc.ras_funcs->query_ras_error_address &&
            adev->umc.max_ras_err_cnt_per_query) {
                err_data->err_addr =
                        kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -116,7 +117,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
                /* umc query_ras_error_address is also responsible for clearing
                 * error status
                 */
-               adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
+               adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
        }
 
        /* only uncorrectable error needs gpu reset */
index 1838144..bbcccf5 100644 (file)
 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
 
-struct amdgpu_umc_funcs {
+struct amdgpu_umc_ras_funcs {
        void (*err_cnt_init)(struct amdgpu_device *adev);
        int (*ras_late_init)(struct amdgpu_device *adev);
+       void (*ras_fini)(struct amdgpu_device *adev);
        void (*query_ras_error_count)(struct amdgpu_device *adev,
-                                       void *ras_error_status);
+                                     void *ras_error_status);
        void (*query_ras_error_address)(struct amdgpu_device *adev,
                                        void *ras_error_status);
+};
+
+struct amdgpu_umc_funcs {
        void (*init_registers)(struct amdgpu_device *adev);
 };
 
@@ -59,6 +63,7 @@ struct amdgpu_umc {
        struct ras_common_if *ras_if;
 
        const struct amdgpu_umc_funcs *funcs;
+       const struct amdgpu_umc_ras_funcs *ras_funcs;
 };
 
 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
index 33e54ee..2bfd620 100644 (file)
@@ -655,7 +655,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
                adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
-               adev->umc.funcs = &umc_v8_7_funcs;
+               adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
                break;
        default:
                break;
index 6d784c6..1970ae9 100644 (file)
@@ -1155,7 +1155,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
-               adev->umc.funcs = &umc_v6_1_funcs;
+               adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
                break;
        case CHIP_ARCTURUS:
                adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
@@ -1163,7 +1163,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
-               adev->umc.funcs = &umc_v6_1_funcs;
+               adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
                break;
        default:
                break;
@@ -1194,12 +1194,6 @@ static int gmc_v9_0_early_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       gmc_v9_0_set_gmc_funcs(adev);
-       gmc_v9_0_set_irq_funcs(adev);
-       gmc_v9_0_set_umc_funcs(adev);
-       gmc_v9_0_set_mmhub_funcs(adev);
-       gmc_v9_0_set_gfxhub_funcs(adev);
-
        if (adev->asic_type == CHIP_VEGA20 ||
            adev->asic_type == CHIP_ARCTURUS)
                adev->gmc.xgmi.supported = true;
@@ -1210,6 +1204,12 @@ static int gmc_v9_0_early_init(void *handle)
                        adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
        }
 
+       gmc_v9_0_set_gmc_funcs(adev);
+       gmc_v9_0_set_irq_funcs(adev);
+       gmc_v9_0_set_umc_funcs(adev);
+       gmc_v9_0_set_mmhub_funcs(adev);
+       gmc_v9_0_set_gfxhub_funcs(adev);
+
        adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
        adev->gmc.shared_aperture_end =
                adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
index 96d7769..20b4498 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "umc_v6_1.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
 #include "amdgpu.h"
 
 #include "rsmu/rsmu_0_0_2_offset.h"
@@ -464,9 +465,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
                umc_v6_1_enable_umc_index_mode(adev);
 }
 
-const struct amdgpu_umc_funcs umc_v6_1_funcs = {
+const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
        .err_cnt_init = umc_v6_1_err_cnt_init,
        .ras_late_init = amdgpu_umc_ras_late_init,
+       .ras_fini = amdgpu_umc_ras_fini,
        .query_ras_error_count = umc_v6_1_query_ras_error_count,
        .query_ras_error_address = umc_v6_1_query_ras_error_address,
 };
index 0ce1d32..5dc36c7 100644 (file)
@@ -45,7 +45,7 @@
 /* umc ce count initial value */
 #define UMC_V6_1_CE_CNT_INIT   (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
 
-extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
+extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
 extern const uint32_t
        umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
 
index 3c3fb01..3a8f787 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "umc_v6_7.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
 #include "amdgpu.h"
 
 #include "umc/umc_6_7_0_offset.h"
@@ -272,8 +273,9 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
        }
 }
 
-const struct amdgpu_umc_funcs umc_v6_7_funcs = {
+const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
        .ras_late_init = amdgpu_umc_ras_late_init,
+       .ras_fini = amdgpu_umc_ras_fini,
        .query_ras_error_count = umc_v6_7_query_ras_error_count,
        .query_ras_error_address = umc_v6_7_query_ras_error_address,
 };
index e59dbdb..4eb85f2 100644 (file)
@@ -32,6 +32,6 @@
 
 #define UMC_V6_7_INST_DIST     0x40000
 
-extern const struct amdgpu_umc_funcs umc_v6_7_funcs;
+extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
 
 #endif
index a064c09..89d20ad 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "umc_v8_7.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
 #include "amdgpu.h"
 
 #include "rsmu/rsmu_0_0_2_offset.h"
@@ -323,9 +324,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_umc_funcs umc_v8_7_funcs = {
+const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
        .err_cnt_init = umc_v8_7_err_cnt_init,
        .ras_late_init = amdgpu_umc_ras_late_init,
+       .ras_fini = amdgpu_umc_ras_fini,
        .query_ras_error_count = umc_v8_7_query_ras_error_count,
        .query_ras_error_address = umc_v8_7_query_ras_error_address,
 };
index d4d0468..37e6dc7 100644 (file)
@@ -44,7 +44,7 @@
 /* umc ce count initial value */
 #define UMC_V8_7_CE_CNT_INIT   (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
 
-extern const struct amdgpu_umc_funcs umc_v8_7_funcs;
+extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
 extern const uint32_t
        umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];