drm/amd: Convert amdgpu to use suballocation helper.
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdgpu / umc_v6_7.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include "umc_v6_7.h"
24 #include "amdgpu_ras.h"
25 #include "amdgpu_umc.h"
26 #include "amdgpu.h"
27
28 #include "umc/umc_6_7_0_offset.h"
29 #include "umc/umc_6_7_0_sh_mask.h"
30
31 const uint32_t
32         umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
33                 {28, 20, 24, 16, 12, 4, 8, 0},
34                 {6, 30, 2, 26, 22, 14, 18, 10},
35                 {19, 11, 15, 7, 3, 27, 31, 23},
36                 {9, 1, 5, 29, 25, 17, 21, 13}
37 };
38 const uint32_t
39         umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
40                 {19, 11, 15, 7, 3, 27, 31, 23},
41                 {9, 1, 5, 29, 25, 17, 21, 13},
42                 {28, 20, 24, 16, 12, 4, 8, 0},
43                 {6, 30, 2, 26, 22, 14, 18, 10},
44 };
45
46 static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
47                                               uint32_t umc_inst,
48                                               uint32_t ch_inst)
49 {
50         uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
51
52         /* adjust umc and channel index offset,
53          * the register address is not linear on each umc instace */
54         umc_inst = index / 4;
55         ch_inst = index % 4;
56
57         return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
58 }
59
60 static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
61                                                   uint64_t mc_umc_status, uint32_t umc_reg_offset)
62 {
63         uint32_t mc_umc_addr;
64         uint64_t reg_value;
65
66         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
67                 dev_info(adev->dev, "Deferred error, no user action is needed.\n");
68
69         if (mc_umc_status)
70                 dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
71
72         /* print IPID registers value */
73         mc_umc_addr =
74                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
75         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
76         if (reg_value)
77                 dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
78
79         /* print SYND registers value */
80         mc_umc_addr =
81                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
82         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
83         if (reg_value)
84                 dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
85
86         /* print MISC0 registers value */
87         mc_umc_addr =
88                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
89         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
90         if (reg_value)
91                 dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
92 }
93
94 static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
95                                                    uint32_t umc_inst, uint32_t ch_inst,
96                                                    unsigned long *error_count)
97 {
98         uint64_t mc_umc_status;
99         uint32_t eccinfo_table_idx;
100         uint32_t umc_reg_offset;
101         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
102
103         umc_reg_offset = get_umc_v6_7_reg_offset(adev,
104                                                 umc_inst, ch_inst);
105
106         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
107         /* check for SRAM correctable error
108           MCUMC_STATUS is a 64 bit register */
109         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
110         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
111             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
112                 *error_count += 1;
113
114                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
115
116                 if (ras->umc_ecc.record_ce_addr_supported)      {
117                         uint64_t err_addr, soc_pa;
118                         uint32_t channel_index =
119                                 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
120
121                         err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
122                         err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
123                         /* translate umc channel address to soc pa, 3 parts are included */
124                         soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
125                                         ADDR_OF_256B_BLOCK(channel_index) |
126                                         OFFSET_IN_256B_BLOCK(err_addr);
127
128                         /* The umc channel bits are not original values, they are hashed */
129                         SET_CHANNEL_HASH(channel_index, soc_pa);
130
131                         dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
132                 }
133         }
134 }
135
136 static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
137                                                           uint32_t umc_inst, uint32_t ch_inst,
138                                                       unsigned long *error_count)
139 {
140         uint64_t mc_umc_status;
141         uint32_t eccinfo_table_idx;
142         uint32_t umc_reg_offset;
143         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
144
145         umc_reg_offset = get_umc_v6_7_reg_offset(adev,
146                                                 umc_inst, ch_inst);
147
148         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
149         /* check the MCUMC_STATUS */
150         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
151         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
152             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
153             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
154             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
155             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
156             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
157                 *error_count += 1;
158
159                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
160         }
161 }
162
163 static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
164                                            void *ras_error_status)
165 {
166         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
167
168         uint32_t umc_inst        = 0;
169         uint32_t ch_inst         = 0;
170
171         /*TODO: driver needs to toggle DF Cstate to ensure
172          * safe access of UMC registers. Will add the protection */
173         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
174                 umc_v6_7_ecc_info_query_correctable_error_count(adev,
175                                                       umc_inst, ch_inst,
176                                                       &(err_data->ce_count));
177                 umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
178                                                       umc_inst, ch_inst,
179                                                           &(err_data->ue_count));
180         }
181 }
182
183 void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
184                                     struct ras_err_data *err_data, uint64_t err_addr,
185                                     uint32_t ch_inst, uint32_t umc_inst)
186 {
187         uint32_t channel_index;
188         uint64_t soc_pa, retired_page, column;
189
190         channel_index =
191                 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
192         /* translate umc channel address to soc pa, 3 parts are included */
193         soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
194                         ADDR_OF_256B_BLOCK(channel_index) |
195                         OFFSET_IN_256B_BLOCK(err_addr);
196
197         /* The umc channel bits are not original values, they are hashed */
198         SET_CHANNEL_HASH(channel_index, soc_pa);
199
200         /* clear [C4 C3 C2] in soc physical address */
201         soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
202
203         /* loop for all possibilities of [C4 C3 C2] */
204         for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
205                 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
206                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
207                 amdgpu_umc_fill_error_record(err_data, err_addr,
208                         retired_page, channel_index, umc_inst);
209
210                 /* shift R14 bit */
211                 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
212                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
213                 amdgpu_umc_fill_error_record(err_data, err_addr,
214                         retired_page, channel_index, umc_inst);
215         }
216 }
217
218 static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
219                                          struct ras_err_data *err_data,
220                                          uint32_t ch_inst,
221                                          uint32_t umc_inst)
222 {
223         uint64_t mc_umc_status, err_addr;
224         uint32_t eccinfo_table_idx;
225         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
226
227         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
228         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
229
230         if (mc_umc_status == 0)
231                 return;
232
233         if (!err_data->err_addr)
234                 return;
235
236         /* calculate error address if ue error is detected */
237         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
238             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
239
240                 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
241                 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
242
243                 umc_v6_7_convert_error_address(adev, err_data, err_addr,
244                                         ch_inst, umc_inst);
245         }
246 }
247
248 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
249                                              void *ras_error_status)
250 {
251         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
252
253         uint32_t umc_inst        = 0;
254         uint32_t ch_inst         = 0;
255
256         /*TODO: driver needs to toggle DF Cstate to ensure
257          * safe access of UMC resgisters. Will add the protection
258          * when firmware interface is ready */
259         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
260                 umc_v6_7_ecc_info_query_error_address(adev,
261                                              err_data,
262                                              ch_inst,
263                                              umc_inst);
264         }
265 }
266
267 static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
268                                                    uint32_t umc_reg_offset,
269                                                    unsigned long *error_count,
270                                                    uint32_t ch_inst,
271                                                    uint32_t umc_inst)
272 {
273         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
274         uint32_t ecc_err_cnt, ecc_err_cnt_addr;
275         uint64_t mc_umc_status;
276         uint32_t mc_umc_status_addr;
277
278         /* UMC 6_1_1 registers */
279         ecc_err_cnt_sel_addr =
280                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
281         ecc_err_cnt_addr =
282                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
283         mc_umc_status_addr =
284                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
285
286         /* select the lower chip and check the error count */
287         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
288         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
289                                         EccErrCntCsSel, 0);
290         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
291
292         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
293         *error_count +=
294                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
295                  UMC_V6_7_CE_CNT_INIT);
296
297         /* select the higher chip and check the err counter */
298         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
299                                         EccErrCntCsSel, 1);
300         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
301
302         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
303         *error_count +=
304                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
305                  UMC_V6_7_CE_CNT_INIT);
306
307         /* check for SRAM correctable error
308           MCUMC_STATUS is a 64 bit register */
309         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
310         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
311             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
312                 *error_count += 1;
313
314                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
315
316                 {
317                         uint64_t err_addr, soc_pa;
318                         uint32_t mc_umc_addrt0;
319                         uint32_t channel_index;
320
321                         mc_umc_addrt0 =
322                                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
323
324                         channel_index =
325                                 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
326
327                         err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
328                         err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
329
330                         /* translate umc channel address to soc pa, 3 parts are included */
331                         soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
332                                         ADDR_OF_256B_BLOCK(channel_index) |
333                                         OFFSET_IN_256B_BLOCK(err_addr);
334
335                         /* The umc channel bits are not original values, they are hashed */
336                         SET_CHANNEL_HASH(channel_index, soc_pa);
337
338                         dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
339                 }
340         }
341 }
342
343 static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
344                                                       uint32_t umc_reg_offset,
345                                                       unsigned long *error_count)
346 {
347         uint64_t mc_umc_status;
348         uint32_t mc_umc_status_addr;
349
350         mc_umc_status_addr =
351                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
352
353         /* check the MCUMC_STATUS */
354         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
355         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
356             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
357             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
358             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
359             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
360             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
361                 *error_count += 1;
362
363                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
364         }
365 }
366
367 static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
368                                                    uint32_t umc_reg_offset)
369 {
370         uint32_t ecc_err_cnt_addr;
371         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
372
373         ecc_err_cnt_sel_addr =
374                 SOC15_REG_OFFSET(UMC, 0,
375                                 regUMCCH0_0_EccErrCntSel);
376         ecc_err_cnt_addr =
377                 SOC15_REG_OFFSET(UMC, 0,
378                                 regUMCCH0_0_EccErrCnt);
379
380         /* select the lower chip */
381         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
382                                        umc_reg_offset) * 4);
383         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
384                                         UMCCH0_0_EccErrCntSel,
385                                         EccErrCntCsSel, 0);
386         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
387                         ecc_err_cnt_sel);
388
389         /* clear lower chip error count */
390         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
391                         UMC_V6_7_CE_CNT_INIT);
392
393         /* select the higher chip */
394         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
395                                         umc_reg_offset) * 4);
396         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
397                                         UMCCH0_0_EccErrCntSel,
398                                         EccErrCntCsSel, 1);
399         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
400                         ecc_err_cnt_sel);
401
402         /* clear higher chip error count */
403         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
404                         UMC_V6_7_CE_CNT_INIT);
405 }
406
407 static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
408 {
409         uint32_t umc_inst        = 0;
410         uint32_t ch_inst         = 0;
411         uint32_t umc_reg_offset  = 0;
412
413         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
414                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
415                                                          umc_inst,
416                                                          ch_inst);
417
418                 umc_v6_7_reset_error_count_per_channel(adev,
419                                                        umc_reg_offset);
420         }
421 }
422
423 static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
424                                            void *ras_error_status)
425 {
426         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
427
428         uint32_t umc_inst        = 0;
429         uint32_t ch_inst         = 0;
430         uint32_t umc_reg_offset  = 0;
431
432         /*TODO: driver needs to toggle DF Cstate to ensure
433          * safe access of UMC registers. Will add the protection */
434         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
435                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
436                                                          umc_inst,
437                                                          ch_inst);
438                 umc_v6_7_query_correctable_error_count(adev,
439                                                        umc_reg_offset,
440                                                        &(err_data->ce_count),
441                                                        ch_inst, umc_inst);
442                 umc_v6_7_querry_uncorrectable_error_count(adev,
443                                                           umc_reg_offset,
444                                                           &(err_data->ue_count));
445         }
446
447         umc_v6_7_reset_error_count(adev);
448 }
449
450 static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
451                                          struct ras_err_data *err_data,
452                                          uint32_t umc_reg_offset, uint32_t ch_inst,
453                                          uint32_t umc_inst)
454 {
455         uint32_t mc_umc_status_addr;
456         uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
457
458         mc_umc_status_addr =
459                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
460         mc_umc_addrt0 =
461                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
462
463         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
464
465         if (mc_umc_status == 0)
466                 return;
467
468         if (!err_data->err_addr) {
469                 /* clear umc status */
470                 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
471                 return;
472         }
473
474         /* calculate error address if ue error is detected */
475         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
476             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
477                 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
478                 err_addr =
479                         REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
480
481                 umc_v6_7_convert_error_address(adev, err_data, err_addr,
482                                         ch_inst, umc_inst);
483         }
484
485         /* clear umc status */
486         WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
487 }
488
489 static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
490                                              void *ras_error_status)
491 {
492         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
493
494         uint32_t umc_inst        = 0;
495         uint32_t ch_inst         = 0;
496         uint32_t umc_reg_offset  = 0;
497
498         /*TODO: driver needs to toggle DF Cstate to ensure
499          * safe access of UMC resgisters. Will add the protection
500          * when firmware interface is ready */
501         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
502                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
503                                                          umc_inst,
504                                                          ch_inst);
505                 umc_v6_7_query_error_address(adev,
506                                              err_data,
507                                              umc_reg_offset, ch_inst,
508                                              umc_inst);
509         }
510 }
511
512 static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
513                                                 struct amdgpu_device *adev,
514                                                 uint32_t umc_reg_offset)
515 {
516         uint32_t ecc_ctrl_addr, ecc_ctrl;
517
518         ecc_ctrl_addr =
519                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl);
520         ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
521                                         umc_reg_offset) * 4);
522
523         return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn);
524 }
525
526 static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
527 {
528         uint32_t umc_reg_offset  = 0;
529
530         /* Enabling fatal error in umc instance0 channel0 will be
531          * considered as fatal error mode
532          */
533         umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
534         return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
535 }
536
537 const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
538         .query_ras_error_count = umc_v6_7_query_ras_error_count,
539         .query_ras_error_address = umc_v6_7_query_ras_error_address,
540 };
541
542 struct amdgpu_umc_ras umc_v6_7_ras = {
543         .ras_block = {
544                 .hw_ops = &umc_v6_7_ras_hw_ops,
545         },
546         .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
547         .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
548         .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
549 };