drm/amdgpu: don't use ATRM for external devices
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdgpu / umc_v6_1.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include "umc_v6_1.h"
24 #include "amdgpu_ras.h"
25 #include "amdgpu_umc.h"
26 #include "amdgpu.h"
27
28 #include "rsmu/rsmu_0_0_2_offset.h"
29 #include "rsmu/rsmu_0_0_2_sh_mask.h"
30 #include "umc/umc_6_1_1_offset.h"
31 #include "umc/umc_6_1_1_sh_mask.h"
32 #include "umc/umc_6_1_2_offset.h"
33
34 #define UMC_6_INST_DIST                 0x40000
35
36 const uint32_t
37         umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
38                 {2, 18, 11, 27},        {4, 20, 13, 29},
39                 {1, 17, 8, 24},         {7, 23, 14, 30},
40                 {10, 26, 3, 19},        {12, 28, 5, 21},
41                 {9, 25, 0, 16},         {15, 31, 6, 22}
42 };
43
44 static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
45 {
46         uint32_t rsmu_umc_addr, rsmu_umc_val;
47
48         rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
49                         mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
50         rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
51
52         rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
53                         RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
54                         RSMU_UMC_INDEX_MODE_EN, 1);
55
56         WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
57 }
58
59 static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
60 {
61         uint32_t rsmu_umc_addr, rsmu_umc_val;
62
63         rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
64                         mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
65         rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
66
67         rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
68                         RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
69                         RSMU_UMC_INDEX_MODE_EN, 0);
70
71         WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
72 }
73
74 static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
75 {
76         uint32_t rsmu_umc_addr, rsmu_umc_val;
77
78         rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
79                         mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
80         rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
81
82         return REG_GET_FIELD(rsmu_umc_val,
83                         RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
84                         RSMU_UMC_INDEX_MODE_EN);
85 }
86
87 static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
88                                             uint32_t umc_inst,
89                                             uint32_t ch_inst)
90 {
91         return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
92 }
93
94 static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev,
95                                         uint32_t umc_reg_offset)
96 {
97         uint32_t ecc_err_cnt_addr;
98         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
99
100         if (adev->asic_type == CHIP_ARCTURUS) {
101                 /* UMC 6_1_2 registers */
102                 ecc_err_cnt_sel_addr =
103                         SOC15_REG_OFFSET(UMC, 0,
104                                         mmUMCCH0_0_EccErrCntSel_ARCT);
105                 ecc_err_cnt_addr =
106                         SOC15_REG_OFFSET(UMC, 0,
107                                         mmUMCCH0_0_EccErrCnt_ARCT);
108         } else {
109                 /* UMC 6_1_1 registers */
110                 ecc_err_cnt_sel_addr =
111                         SOC15_REG_OFFSET(UMC, 0,
112                                         mmUMCCH0_0_EccErrCntSel);
113                 ecc_err_cnt_addr =
114                         SOC15_REG_OFFSET(UMC, 0,
115                                         mmUMCCH0_0_EccErrCnt);
116         }
117
118         /* select the lower chip */
119         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
120                                         umc_reg_offset) * 4);
121         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
122                                         UMCCH0_0_EccErrCntSel,
123                                         EccErrCntCsSel, 0);
124         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
125                         ecc_err_cnt_sel);
126
127         /* clear lower chip error count */
128         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
129                         UMC_V6_1_CE_CNT_INIT);
130
131         /* select the higher chip */
132         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
133                                         umc_reg_offset) * 4);
134         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
135                                         UMCCH0_0_EccErrCntSel,
136                                         EccErrCntCsSel, 1);
137         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
138                         ecc_err_cnt_sel);
139
140         /* clear higher chip error count */
141         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
142                         UMC_V6_1_CE_CNT_INIT);
143 }
144
145 static void umc_v6_1_clear_error_count(struct amdgpu_device *adev)
146 {
147         uint32_t umc_inst        = 0;
148         uint32_t ch_inst         = 0;
149         uint32_t umc_reg_offset  = 0;
150         uint32_t rsmu_umc_index_state =
151                                 umc_v6_1_get_umc_index_mode_state(adev);
152
153         if (rsmu_umc_index_state)
154                 umc_v6_1_disable_umc_index_mode(adev);
155
156         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
157                 umc_reg_offset = get_umc_6_reg_offset(adev,
158                                                 umc_inst,
159                                                 ch_inst);
160
161                 umc_v6_1_clear_error_count_per_channel(adev,
162                                                 umc_reg_offset);
163         }
164
165         if (rsmu_umc_index_state)
166                 umc_v6_1_enable_umc_index_mode(adev);
167 }
168
169 static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
170                                                    uint32_t umc_reg_offset,
171                                                    unsigned long *error_count)
172 {
173         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
174         uint32_t ecc_err_cnt, ecc_err_cnt_addr;
175         uint64_t mc_umc_status;
176         uint32_t mc_umc_status_addr;
177
178         if (adev->asic_type == CHIP_ARCTURUS) {
179                 /* UMC 6_1_2 registers */
180                 ecc_err_cnt_sel_addr =
181                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
182                 ecc_err_cnt_addr =
183                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
184                 mc_umc_status_addr =
185                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
186         } else {
187                 /* UMC 6_1_1 registers */
188                 ecc_err_cnt_sel_addr =
189                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
190                 ecc_err_cnt_addr =
191                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
192                 mc_umc_status_addr =
193                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
194         }
195
196         /* select the lower chip and check the error count */
197         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
198         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
199                                         EccErrCntCsSel, 0);
200         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
201
202         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
203         *error_count +=
204                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
205                  UMC_V6_1_CE_CNT_INIT);
206
207         /* select the higher chip and check the err counter */
208         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
209                                         EccErrCntCsSel, 1);
210         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
211
212         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
213         *error_count +=
214                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
215                  UMC_V6_1_CE_CNT_INIT);
216
217         /* check for SRAM correctable error
218           MCUMC_STATUS is a 64 bit register */
219         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
220         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
221             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
222             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
223                 *error_count += 1;
224 }
225
226 static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
227                                                       uint32_t umc_reg_offset,
228                                                       unsigned long *error_count)
229 {
230         uint64_t mc_umc_status;
231         uint32_t mc_umc_status_addr;
232
233         if (adev->asic_type == CHIP_ARCTURUS) {
234                 /* UMC 6_1_2 registers */
235                 mc_umc_status_addr =
236                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
237         } else {
238                 /* UMC 6_1_1 registers */
239                 mc_umc_status_addr =
240                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
241         }
242
243         /* check the MCUMC_STATUS */
244         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
245         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
246             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
247             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
248             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
249             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
250             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
251                 *error_count += 1;
252 }
253
254 static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
255                                            void *ras_error_status)
256 {
257         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
258
259         uint32_t umc_inst        = 0;
260         uint32_t ch_inst         = 0;
261         uint32_t umc_reg_offset  = 0;
262
263         uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
264
265         if (rsmu_umc_index_state)
266                 umc_v6_1_disable_umc_index_mode(adev);
267
268         if ((adev->asic_type == CHIP_ARCTURUS) &&
269                 amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
270                 DRM_WARN("Fail to disable DF-Cstate.\n");
271
272         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
273                 umc_reg_offset = get_umc_6_reg_offset(adev,
274                                                       umc_inst,
275                                                       ch_inst);
276
277                 umc_v6_1_query_correctable_error_count(adev,
278                                                        umc_reg_offset,
279                                                        &(err_data->ce_count));
280                 umc_v6_1_querry_uncorrectable_error_count(adev,
281                                                           umc_reg_offset,
282                                                           &(err_data->ue_count));
283         }
284
285         if ((adev->asic_type == CHIP_ARCTURUS) &&
286                 amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
287                 DRM_WARN("Fail to enable DF-Cstate\n");
288
289         if (rsmu_umc_index_state)
290                 umc_v6_1_enable_umc_index_mode(adev);
291
292         umc_v6_1_clear_error_count(adev);
293 }
294
295 static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
296                                          struct ras_err_data *err_data,
297                                          uint32_t umc_reg_offset,
298                                          uint32_t ch_inst,
299                                          uint32_t umc_inst)
300 {
301         uint32_t lsb, mc_umc_status_addr;
302         uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
303         uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
304
305         if (adev->asic_type == CHIP_ARCTURUS) {
306                 /* UMC 6_1_2 registers */
307                 mc_umc_status_addr =
308                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
309                 mc_umc_addrt0 =
310                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
311         } else {
312                 /* UMC 6_1_1 registers */
313                 mc_umc_status_addr =
314                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
315                 mc_umc_addrt0 =
316                         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
317         }
318
319         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
320
321         if (mc_umc_status == 0)
322                 return;
323
324         if (!err_data->err_addr) {
325                 /* clear umc status */
326                 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
327                 return;
328         }
329
330         /* calculate error address if ue error is detected */
331         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
332             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
333
334                 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
335                 /* the lowest lsb bits should be ignored */
336                 lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
337                 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
338                 err_addr &= ~((0x1ULL << lsb) - 1);
339
340                 /* translate umc channel address to soc pa, 3 parts are included */
341                 retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
342                                 ADDR_OF_256B_BLOCK(channel_index) |
343                                 OFFSET_IN_256B_BLOCK(err_addr);
344
345                 amdgpu_umc_fill_error_record(err_data, err_addr,
346                                         retired_page, channel_index, umc_inst);
347         }
348
349         /* clear umc status */
350         WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
351 }
352
353 static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
354                                              void *ras_error_status)
355 {
356         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
357
358         uint32_t umc_inst        = 0;
359         uint32_t ch_inst         = 0;
360         uint32_t umc_reg_offset  = 0;
361
362         uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
363
364         if (rsmu_umc_index_state)
365                 umc_v6_1_disable_umc_index_mode(adev);
366
367         if ((adev->asic_type == CHIP_ARCTURUS) &&
368                 amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
369                 DRM_WARN("Fail to disable DF-Cstate.\n");
370
371         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
372                 umc_reg_offset = get_umc_6_reg_offset(adev,
373                                                       umc_inst,
374                                                       ch_inst);
375
376                 umc_v6_1_query_error_address(adev,
377                                              err_data,
378                                              umc_reg_offset,
379                                              ch_inst,
380                                              umc_inst);
381         }
382
383         if ((adev->asic_type == CHIP_ARCTURUS) &&
384                 amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
385                 DRM_WARN("Fail to enable DF-Cstate\n");
386
387         if (rsmu_umc_index_state)
388                 umc_v6_1_enable_umc_index_mode(adev);
389 }
390
391 static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
392                                               uint32_t umc_reg_offset)
393 {
394         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
395         uint32_t ecc_err_cnt_addr;
396
397         if (adev->asic_type == CHIP_ARCTURUS) {
398                 /* UMC 6_1_2 registers */
399                 ecc_err_cnt_sel_addr =
400                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
401                 ecc_err_cnt_addr =
402                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
403         } else {
404                 /* UMC 6_1_1 registers */
405                 ecc_err_cnt_sel_addr =
406                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
407                 ecc_err_cnt_addr =
408                         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
409         }
410
411         /* select the lower chip and check the error count */
412         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
413         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
414                                         EccErrCntCsSel, 0);
415         /* set ce error interrupt type to APIC based interrupt */
416         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
417                                         EccErrInt, 0x1);
418         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
419         /* set error count to initial value */
420         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
421
422         /* select the higher chip and check the err counter */
423         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
424                                         EccErrCntCsSel, 1);
425         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
426         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
427 }
428
429 static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
430 {
431         uint32_t umc_inst        = 0;
432         uint32_t ch_inst         = 0;
433         uint32_t umc_reg_offset  = 0;
434
435         uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
436
437         if (rsmu_umc_index_state)
438                 umc_v6_1_disable_umc_index_mode(adev);
439
440         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
441                 umc_reg_offset = get_umc_6_reg_offset(adev,
442                                                       umc_inst,
443                                                       ch_inst);
444
445                 umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
446         }
447
448         if (rsmu_umc_index_state)
449                 umc_v6_1_enable_umc_index_mode(adev);
450 }
451
452 const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
453         .query_ras_error_count = umc_v6_1_query_ras_error_count,
454         .query_ras_error_address = umc_v6_1_query_ras_error_address,
455 };
456
457 struct amdgpu_umc_ras umc_v6_1_ras = {
458         .ras_block = {
459                 .hw_ops = &umc_v6_1_ras_hw_ops,
460         },
461         .err_cnt_init = umc_v6_1_err_cnt_init,
462 };