drm/amdgpu: add umc_fill_error_record to make code more simple
authorTao Zhou <tao.zhou1@amd.com>
Wed, 19 Jan 2022 07:42:55 +0000 (15:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 27 Jan 2022 20:48:56 +0000 (15:48 -0500)
Create common amdgpu_umc_fill_error_record function for all versions
of UMC and clean up related codes.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c

index d4e07d0..e632499 100644 (file)
@@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
        }
 
        memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
-
-       err_rec.address = address;
-       err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
-       err_rec.ts = (uint64_t)ktime_get_real_seconds();
-       err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-
        err_data.err_addr = &err_rec;
-       err_data.err_addr_cnt = 1;
+       amdgpu_umc_fill_error_record(&err_data, address,
+                       (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
 
        if (amdgpu_bad_page_threshold != 0) {
                amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
@@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
        dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
                             umc_inst, ch_inst);
 
-       memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
-
        /*
         * Translate UMC channel address to Physical address
         */
@@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
                        ADDR_OF_256B_BLOCK(channel_index) |
                        OFFSET_IN_256B_BLOCK(m->addr);
 
-       err_rec.address = m->addr;
-       err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-       err_rec.ts = (uint64_t)ktime_get_real_seconds();
-       err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-       err_rec.cu = 0;
-       err_rec.mem_channel = channel_index;
-       err_rec.mcumc_id = umc_inst;
-
+       memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
        err_data.err_addr = &err_rec;
-       err_data.err_addr_cnt = 1;
+       amdgpu_umc_fill_error_record(&err_data, m->addr,
+                       retired_page, channel_index, umc_inst);
 
        if (amdgpu_bad_page_threshold != 0) {
                amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
index b4c68c0..ff7805b 100644 (file)
@@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
        amdgpu_ras_interrupt_dispatch(adev, &ih_data);
        return 0;
 }
+
+void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+               uint64_t err_addr,
+               uint64_t retired_page,
+               uint32_t channel_index,
+               uint32_t umc_inst)
+{
+       struct eeprom_table_record *err_rec =
+               &err_data->err_addr[err_data->err_addr_cnt];
+
+       err_rec->address = err_addr;
+       /* page frame address is saved */
+       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+       err_rec->ts = (uint64_t)ktime_get_real_seconds();
+       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+       err_rec->cu = 0;
+       err_rec->mem_channel = channel_index;
+       err_rec->mcumc_id = umc_inst;
+
+       err_data->err_addr_cnt++;
+}
index 195740a..4db0526 100644 (file)
@@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
                struct amdgpu_irq_src *source,
                struct amdgpu_iv_entry *entry);
+void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+               uint64_t err_addr,
+               uint64_t retired_page,
+               uint32_t channel_index,
+               uint32_t umc_inst);
 #endif
index 4776301..939cb20 100644 (file)
@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 {
        uint32_t lsb, mc_umc_status_addr;
        uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
-       struct eeprom_table_record *err_rec;
        uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
 
        if (adev->asic_type == CHIP_ARCTURUS) {
@@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
                return;
        }
 
-       err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
        /* calculate error address if ue/ce error is detected */
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
            (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 
                /* we only save ue error information currently, ce is skipped */
                if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-                               == 1) {
-                       err_rec->address = err_addr;
-                       /* page frame address is saved */
-                       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-                       err_rec->ts = (uint64_t)ktime_get_real_seconds();
-                       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-                       err_rec->cu = 0;
-                       err_rec->mem_channel = channel_index;
-                       err_rec->mcumc_id = umc_inst;
-
-                       err_data->err_addr_cnt++;
-               }
+                               == 1)
+                       amdgpu_umc_fill_error_record(err_data, err_addr,
+                                       retired_page, channel_index, umc_inst);
        }
 
        /* clear umc status */
index f5a1ba7..300dee9 100644 (file)
@@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
                                         uint32_t umc_inst)
 {
        uint64_t mc_umc_status, err_addr, retired_page;
-       struct eeprom_table_record *err_rec;
        uint32_t channel_index;
        uint32_t eccinfo_table_idx;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
        if (!err_data->err_addr)
                return;
 
-       err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
        /* calculate error address if ue/ce error is detected */
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
            (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 
                /* we only save ue error information currently, ce is skipped */
                if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-                               == 1) {
-                       err_rec->address = err_addr;
-                       /* page frame address is saved */
-                       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-                       err_rec->ts = (uint64_t)ktime_get_real_seconds();
-                       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-                       err_rec->cu = 0;
-                       err_rec->mem_channel = channel_index;
-                       err_rec->mcumc_id = umc_inst;
-
-                       err_data->err_addr_cnt++;
-               }
+                               == 1)
+                       amdgpu_umc_fill_error_record(err_data, err_addr,
+                                       retired_page, channel_index, umc_inst);
        }
 }
 
@@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 {
        uint32_t mc_umc_status_addr;
        uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
-       struct eeprom_table_record *err_rec;
        uint32_t channel_index;
 
        mc_umc_status_addr =
@@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
                return;
        }
 
-       err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
        channel_index =
                adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
 
@@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 
                /* we only save ue error information currently, ce is skipped */
                if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-                               == 1) {
-                       err_rec->address = err_addr;
-                       /* page frame address is saved */
-                       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-                       err_rec->ts = (uint64_t)ktime_get_real_seconds();
-                       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-                       err_rec->cu = 0;
-                       err_rec->mem_channel = channel_index;
-                       err_rec->mcumc_id = umc_inst;
-
-                       err_data->err_addr_cnt++;
-               }
+                               == 1)
+                       amdgpu_umc_fill_error_record(err_data, err_addr,
+                                       retired_page, channel_index, umc_inst);
        }
 
        /* clear umc status */
index d704171..de85a99 100644 (file)
@@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
                                        uint32_t umc_inst)
 {
        uint64_t mc_umc_status, err_addr, retired_page;
-       struct eeprom_table_record *err_rec;
        uint32_t channel_index;
        uint32_t eccinfo_table_idx;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
        if (!err_data->err_addr)
                return;
 
-       err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
        /* calculate error address if ue/ce error is detected */
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
            (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 
                /* we only save ue error information currently, ce is skipped */
                if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-                               == 1) {
-                       err_rec->address = err_addr;
-                       /* page frame address is saved */
-                       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-                       err_rec->ts = (uint64_t)ktime_get_real_seconds();
-                       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-                       err_rec->cu = 0;
-                       err_rec->mem_channel = channel_index;
-                       err_rec->mcumc_id = umc_inst;
-
-                       err_data->err_addr_cnt++;
-               }
+                               == 1)
+                       amdgpu_umc_fill_error_record(err_data, err_addr,
+                                       retired_page, channel_index, umc_inst);
        }
 }
 
@@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
 {
        uint32_t lsb, mc_umc_status_addr;
        uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
-       struct eeprom_table_record *err_rec;
        uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
 
        mc_umc_status_addr =
@@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
                return;
        }
 
-       err_rec = &err_data->err_addr[err_data->err_addr_cnt];
-
        /* calculate error address if ue/ce error is detected */
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
            (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
 
                /* we only save ue error information currently, ce is skipped */
                if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
-                               == 1) {
-                       err_rec->address = err_addr;
-                       /* page frame address is saved */
-                       err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-                       err_rec->ts = (uint64_t)ktime_get_real_seconds();
-                       err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-                       err_rec->cu = 0;
-                       err_rec->mem_channel = channel_index;
-                       err_rec->mcumc_id = umc_inst;
-
-                       err_data->err_addr_cnt++;
-               }
+                               == 1)
+                       amdgpu_umc_fill_error_record(err_data, err_addr,
+                                       retired_page, channel_index, umc_inst);
        }
 
        /* clear umc status */