1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2020-2022 HabanaLabs, Ltd.
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
31 #define GAUDI2_RESET_POLL_CNT 3
32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT 512
36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46 * and the code relies on that value (for array size etc..) we define another value
47 * for MAX faulty TPCs which reflects the cluster binning requirements
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
50 #define MAX_FAULTY_XBARS 1
51 #define MAX_FAULTY_EDMAS 1
52 #define MAX_FAULTY_DECODERS 1
54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK 0x3FF
58 #define GAUDI2_NA_EVENT_CAUSE 0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
85 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
88 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
90 #define IS_DMA_IDLE(dma_core_sts0) \
91 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
93 #define IS_DMA_HALTED(dma_core_sts1) \
94 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
105 #define PCIE_DEC_EN_MASK 0x300
106 #define DEC_WORK_STATE_IDLE 0
107 #define DEC_WORK_STATE_PEND 3
108 #define IS_DEC_IDLE(dec_swreg15) \
109 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
120 #define MMU_RANGE_INV_EN_SHIFT 0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
122 #define MMU_RANGE_INV_ASID_SHIFT 2
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125 * a 2 entries FIFO, and hence it is not enabled for it.
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
130 #define GAUDI2_MAX_STRING_LEN 64
132 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139 ((x & 0xF8001FF0) >> 4)
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142 ((x & 0x00001FF0) >> 4)
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
165 struct gaudi2_razwi_info {
172 static struct gaudi2_razwi_info common_razwi_info[] = {
173 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
174 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
175 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
176 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
177 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
178 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
179 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
180 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
181 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
182 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
183 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
184 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
185 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
186 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
187 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
188 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
189 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
190 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
191 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
192 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
193 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
194 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
195 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
196 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
197 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
198 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
199 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
200 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
201 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
202 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
203 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
204 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
205 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
206 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
207 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
208 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
209 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
210 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
211 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
212 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
213 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
214 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
215 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
216 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
217 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
218 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
219 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
220 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
221 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
222 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
223 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
224 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
225 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
226 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
227 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
228 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
229 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
230 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
231 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
232 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
233 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
234 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
235 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
236 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
237 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
238 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
239 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
240 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
241 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
242 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
243 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
244 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
245 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
246 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
248 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
250 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
252 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
253 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
254 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
255 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
256 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
258 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
260 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
261 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
262 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
263 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
264 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
266 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
267 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
268 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
269 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
270 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
272 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
274 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
275 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
276 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
277 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
278 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
279 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
280 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
281 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
282 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
283 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
284 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
285 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
286 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
287 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
288 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
289 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
290 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
291 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
293 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
295 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
296 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
297 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
298 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
311 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
312 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
313 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
314 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
327 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
328 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
329 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
330 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 GAUDI2_ENGINE_ID_PSOC, "CPU"},
333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
334 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
337 static struct gaudi2_razwi_info mme_razwi_info[] = {
338 /* MME X high coordinate is N/A, hence using only low coordinates */
339 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
340 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
341 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
342 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
343 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
347 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
349 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
351 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
353 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
355 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
357 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
358 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
359 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
360 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
361 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
365 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
367 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
369 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
371 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
373 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
375 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
376 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
377 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
378 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
379 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
383 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
385 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
387 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
389 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
391 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
393 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
394 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
395 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
396 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
397 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
401 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
403 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
405 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
407 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
409 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
413 enum hl_pmmu_fatal_cause {
414 LATENCY_RD_OUT_FIFO_OVERRUN,
415 LATENCY_WR_OUT_FIFO_OVERRUN,
418 enum hl_pcie_drain_ind_cause {
423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
440 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
441 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
442 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
443 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
444 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
445 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
446 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
447 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
450 static const int gaudi2_qman_async_event_id[] = {
451 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
452 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
453 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
454 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
455 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
456 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
457 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
458 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
459 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
460 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
461 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
462 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
467 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
468 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
469 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
470 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
471 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
472 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
473 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
474 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
475 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
476 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
477 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
478 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
479 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
480 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
481 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
482 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
483 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
484 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
485 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
486 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
487 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
488 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
489 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
490 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
491 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
492 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
493 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
494 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
495 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
496 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
497 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
498 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
499 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
500 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
501 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
502 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
507 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
508 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
509 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
510 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
511 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
512 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
513 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
514 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
515 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
516 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
517 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
518 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
519 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
520 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
521 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
522 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
523 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
524 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
525 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
526 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
527 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
528 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
529 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
530 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
531 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
532 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
533 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
534 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
535 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
536 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
537 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
538 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
543 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
544 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
545 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
546 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
547 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
548 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
549 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
550 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
551 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
552 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
553 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
554 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
555 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
556 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
557 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
558 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
559 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
560 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
561 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
562 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
563 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
564 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
565 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
566 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
567 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
568 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
569 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
570 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
571 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
572 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
573 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
574 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
579 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
580 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
581 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
582 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
583 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
584 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
585 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
586 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
587 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
588 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
589 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
590 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
591 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
592 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
593 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
594 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
595 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
596 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
597 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
598 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
599 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
600 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
601 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
602 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
603 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
604 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
605 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
606 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
607 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
608 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
609 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
610 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
611 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
612 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
613 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
614 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
615 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
616 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
617 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
618 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
619 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
620 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
621 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
622 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
623 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
624 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
625 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
626 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
627 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
628 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
629 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
630 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
631 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
632 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
633 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
634 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
635 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
636 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
637 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
638 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
639 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
640 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
641 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
642 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
643 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
644 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
645 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
646 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
647 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
648 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
649 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
650 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
651 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
652 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
653 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
654 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
655 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
656 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
657 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
658 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
659 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
660 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
661 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
662 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
663 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
664 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
665 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
666 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
667 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
668 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
669 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
670 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
671 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
672 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
673 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
674 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
675 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
676 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
677 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
678 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
679 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
680 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
681 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
682 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
683 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
684 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
685 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
686 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
687 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
688 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
689 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
690 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
691 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
692 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
693 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
694 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
695 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
696 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
697 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
698 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
699 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
700 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
701 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
702 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
703 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
704 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
705 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
708 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
709 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
713 static const int gaudi2_dma_core_async_event_id[] = {
714 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
715 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
716 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
717 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
718 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
719 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
720 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
721 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
722 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
723 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
724 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
735 "AXI SPLIT SEI Status"
738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
739 "cbu_bresp_sei_intr_cause",
740 "cbu_rresp_sei_intr_cause",
741 "lbu_bresp_sei_intr_cause",
742 "lbu_rresp_sei_intr_cause",
743 "cbu_axi_split_intr_cause",
744 "lbu_axi_split_intr_cause",
745 "arc_ip_excptn_sei_intr_cause",
746 "dmi_bresp_sei_intr_cause",
747 "aux2apb_err_sei_intr_cause",
748 "cfg_lbw_wr_terminated_intr_cause",
749 "cfg_lbw_rd_terminated_intr_cause",
750 "cfg_dccm_wr_terminated_intr_cause",
751 "cfg_dccm_rd_terminated_intr_cause",
752 "cfg_hbw_rd_terminated_intr_cause"
755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
759 "msix_abnrm_hbw_sei",
763 "msix_abnrm_lbw_sei",
772 "axi_split_bresp_err_sei",
773 "hbw_axi_wr_viol_sei",
774 "hbw_axi_rd_viol_sei",
775 "lbw_axi_wr_viol_sei",
776 "lbw_axi_rd_viol_sei",
783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
787 "CP error due to undefined OPCODE",
788 "CP encountered STOP OPCODE",
790 "CP WRREG32 or WRBULK returned error",
792 "FENCE 0 inc over max value and clipped",
793 "FENCE 1 inc over max value and clipped",
794 "FENCE 2 inc over max value and clipped",
795 "FENCE 3 inc over max value and clipped",
796 "FENCE 0 dec under min value and clipped",
797 "FENCE 1 dec under min value and clipped",
798 "FENCE 2 dec under min value and clipped",
799 "FENCE 3 dec under min value and clipped",
804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
808 "CP error due to undefined OPCODE",
809 "CP encountered STOP OPCODE",
811 "CP WRREG32 or WRBULK returned error",
813 "FENCE 0 inc over max value and clipped",
814 "FENCE 1 inc over max value and clipped",
815 "FENCE 2 inc over max value and clipped",
816 "FENCE 3 inc over max value and clipped",
817 "FENCE 0 dec under min value and clipped",
818 "FENCE 1 dec under min value and clipped",
819 "FENCE 2 dec under min value and clipped",
820 "FENCE 3 dec under min value and clipped",
823 "CQ_WR_IFIFO_CI_ERR",
826 "ARC_CQ_WR_IFIFO_CI_ERR",
827 "ARC_CQ_WR_CTL_CI_ERR",
832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
833 "Choice push while full error",
834 "Choice Q watchdog error",
835 "MSG AXI LBW returned with error"
838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
840 "qm_trace_fence_events",
843 "lbw_mstr_rresp_err",
844 "lbw_mstr_bresp_err",
848 "hbw_mstr_rresp_err",
849 "hbw_mstr_bresp_err",
860 "async_arc2cpu_sei_intr",
863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
864 "tpc_address_exceed_slm",
866 "tpc_spu_mac_overflow",
867 "tpc_spu_addsub_overflow",
868 "tpc_spu_abs_overflow",
869 "tpc_spu_fma_fp_dst_nan",
870 "tpc_spu_fma_fp_dst_inf",
871 "tpc_spu_convert_fp_dst_nan",
872 "tpc_spu_convert_fp_dst_inf",
873 "tpc_spu_fp_dst_denorm",
874 "tpc_vpu_mac_overflow",
875 "tpc_vpu_addsub_overflow",
876 "tpc_vpu_abs_overflow",
877 "tpc_vpu_convert_fp_dst_nan",
878 "tpc_vpu_convert_fp_dst_inf",
879 "tpc_vpu_fma_fp_dst_nan",
880 "tpc_vpu_fma_fp_dst_inf",
881 "tpc_vpu_fp_dst_denorm",
883 "tpc_illegal_instruction",
884 "tpc_pc_wrap_around",
890 "st_unlock_already_locked",
891 "invalid_lock_access",
892 "LD_L protection violation",
893 "ST_L protection violation",
896 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
899 "wap sei (wbc axi err)",
915 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
923 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
933 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
934 "HBW Read returned with error RRESP",
935 "HBW write returned with error BRESP",
936 "LBW write returned with error BRESP",
937 "descriptor_fifo_overflow",
938 "KDMA SB LBW Read returned with error",
939 "KDMA WBC LBW Write returned with error",
940 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
941 "WRONG CFG FOR COMMIT IN LIN DMA"
944 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
945 "HBW/LBW Read returned with error RRESP",
946 "HBW/LBW write returned with error BRESP",
947 "LBW write returned with error BRESP",
948 "descriptor_fifo_overflow",
949 "KDMA SB LBW Read returned with error",
950 "KDMA WBC LBW Write returned with error",
951 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
952 "WRONG CFG FOR COMMIT IN LIN DMA"
955 struct gaudi2_sm_sei_cause_data {
956 const char *cause_name;
957 const char *log_name;
960 static const struct gaudi2_sm_sei_cause_data
961 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
962 {"calculated SO value overflow/underflow", "SOB ID"},
963 {"payload address of monitor is not aligned to 4B", "monitor addr"},
964 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
967 static const char * const
968 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
969 "LATENCY_RD_OUT_FIFO_OVERRUN",
970 "LATENCY_WR_OUT_FIFO_OVERRUN",
973 static const char * const
974 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
975 "LATENCY_RD_OUT_FIFO_OVERRUN",
976 "LATENCY_WR_OUT_FIFO_OVERRUN",
979 static const char * const
980 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
985 static const char * const
986 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
987 "HBW error response",
988 "LBW error response",
989 "TLP is blocked by RR"
992 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
993 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
994 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
995 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
996 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
997 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
998 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
999 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1000 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1001 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1002 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1003 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1004 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1005 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1006 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1007 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1008 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1009 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1010 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1011 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1012 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1013 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1014 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1015 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1017 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1019 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1021 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1023 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1025 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1027 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1029 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1030 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1031 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1032 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1033 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1034 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1035 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1036 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1037 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1038 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1039 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1040 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1041 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1042 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1043 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1044 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1045 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1046 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1047 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1048 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1049 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1050 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1051 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1052 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1053 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1054 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1055 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1056 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1057 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1058 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1059 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1060 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1061 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1062 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1063 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1064 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1065 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1066 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1067 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1068 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1069 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1070 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1071 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1072 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1073 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1074 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1075 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1076 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1077 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1078 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1079 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1080 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1081 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1082 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1083 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1084 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1085 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1086 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1087 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1088 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1089 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1090 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1091 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1092 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1093 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1094 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1095 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1096 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1097 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1098 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1099 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1100 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1101 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1102 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1103 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1104 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1105 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1106 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1107 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1108 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1109 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1110 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1111 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1112 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1113 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1114 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1115 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1116 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1117 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1118 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1119 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1120 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1121 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1122 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1123 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1124 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1125 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1126 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1127 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1128 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1129 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1130 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1131 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1132 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1133 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1134 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1135 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1136 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1137 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1138 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1139 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1140 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1141 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1142 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1143 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1144 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1145 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1146 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1147 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1148 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1149 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1150 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1151 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1152 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1153 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1154 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1155 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1156 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1157 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1158 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1159 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1160 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1161 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1162 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1163 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1164 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1165 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1166 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1167 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1168 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1169 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1170 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1171 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1172 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1173 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1174 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1175 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1176 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1177 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1178 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1179 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1180 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1181 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1182 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1183 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1184 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1185 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1186 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1187 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1188 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1189 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1190 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1191 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1192 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1193 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1194 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1195 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1196 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1197 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1198 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1199 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1200 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1201 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1202 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1203 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1204 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1205 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1206 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1207 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1208 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1209 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1210 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1211 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1212 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1213 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1214 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1215 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1216 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1217 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1218 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1219 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1220 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1221 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1222 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1223 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1224 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1225 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1226 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1227 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1228 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1229 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1230 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1231 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1232 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1233 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1234 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1235 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1236 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1237 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1238 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1239 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1240 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1241 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1242 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1243 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1244 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1245 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1246 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1247 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1248 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1249 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1250 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1251 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1252 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1255 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1256 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1257 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1258 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1259 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1260 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1261 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1262 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1263 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1264 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1265 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1266 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1267 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1268 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1269 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1270 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1271 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1272 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1273 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1274 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1275 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1276 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1277 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1278 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1279 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1280 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1281 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1282 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1283 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1284 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1285 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1286 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1287 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1288 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1289 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1290 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1291 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1292 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1293 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1294 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1295 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1296 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1297 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1298 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1299 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1300 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1301 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1302 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1303 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1304 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1305 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1306 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1307 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1308 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1309 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1310 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1311 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1312 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1313 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1314 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1315 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1316 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1317 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1318 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1319 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1320 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1321 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1322 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1323 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1324 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1327 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1328 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1329 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1330 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1331 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1332 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1333 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1334 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1335 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1336 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1337 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1338 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1339 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1340 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1341 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1342 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1343 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1344 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1345 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1346 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1347 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1348 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1349 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1350 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1351 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1352 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1353 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1354 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1355 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1356 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1357 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1358 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1359 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1360 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1361 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1362 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1363 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1364 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1365 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1366 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1367 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1368 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1369 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1370 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1371 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1372 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1373 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1374 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1375 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1376 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1377 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1378 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1379 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1380 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1381 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1382 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1383 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1384 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1385 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1386 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1387 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1388 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1389 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1390 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1391 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1392 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1393 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1394 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1395 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1396 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1399 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1400 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1401 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1402 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1403 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1406 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1407 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1408 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1409 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1410 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1411 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1412 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1413 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1414 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1415 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1416 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1417 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1418 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1419 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1420 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1421 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1422 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1423 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1424 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1425 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1426 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1427 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1428 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1429 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1430 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1431 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1432 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1433 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1434 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1435 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1436 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1437 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1438 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1439 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1440 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1441 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1442 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1443 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1444 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1445 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1446 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1447 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1448 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1449 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1450 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1451 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1452 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1453 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1454 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1455 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1456 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1457 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1458 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1459 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1460 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1461 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1462 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1463 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1464 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1465 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1466 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1467 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1468 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1469 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1470 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1471 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1472 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1473 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1474 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1475 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1476 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1477 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1478 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1479 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1480 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1481 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1482 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1483 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1484 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1485 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1486 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1487 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1488 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1489 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1490 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1491 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1492 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1493 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1494 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1495 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1496 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1497 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1498 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1499 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1500 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1501 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1502 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1503 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1504 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1505 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1506 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1507 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1508 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1509 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1510 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1511 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1512 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1513 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1514 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1515 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1516 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1517 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1518 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1519 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1520 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1521 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1522 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1523 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1524 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1525 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1526 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1527 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1528 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1529 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1530 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1531 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1532 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1533 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1534 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1535 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1536 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1537 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1538 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1539 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1540 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1541 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1542 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1543 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1544 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1545 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1546 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1547 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1548 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1549 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1550 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1551 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1552 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1553 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1554 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1555 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1556 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1557 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1558 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1559 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1560 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1561 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1562 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1563 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1564 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1565 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1566 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1567 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1568 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1569 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1570 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1571 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1572 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1573 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1574 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1575 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1576 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1577 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1578 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1579 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1580 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1581 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1582 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1583 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1584 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1585 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1586 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1587 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1588 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1589 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1590 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1591 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1592 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1593 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1594 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1595 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1596 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1597 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1598 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1599 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1600 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1601 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1602 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1603 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1604 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1605 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1606 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1607 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1608 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1609 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1610 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1611 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1612 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1613 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1614 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1615 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1616 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1617 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1618 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1619 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1620 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1621 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1622 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1623 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1624 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1625 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1626 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1627 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1628 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1629 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1630 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1631 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1632 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1633 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1634 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1635 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1636 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1637 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1638 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1639 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1640 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1641 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1642 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1643 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1644 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1645 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1646 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1647 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1648 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1649 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1650 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1651 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1652 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1653 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1654 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1655 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1656 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1657 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1658 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1659 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1660 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1661 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1662 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1663 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1664 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1665 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1666 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1669 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1670 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1671 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1672 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1673 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1674 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1675 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1676 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1677 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1678 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1679 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1680 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1683 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1684 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1685 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1686 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1687 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1690 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1691 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1692 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1693 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1694 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1695 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1696 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1697 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1698 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1699 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1700 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1701 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1702 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1703 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1704 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1705 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1706 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1707 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1708 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1709 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1710 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1711 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1712 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1713 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1714 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1715 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1718 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1719 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1720 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1721 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1722 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1723 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1724 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1725 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1726 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1727 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1728 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1729 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1730 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1731 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1732 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1733 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1734 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1735 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1736 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1737 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1738 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1739 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1740 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1741 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1742 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1743 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1746 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1747 [ROTATOR_ID_0] = mmROT0_BASE,
1748 [ROTATOR_ID_1] = mmROT1_BASE
1751 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1752 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1753 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1754 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1755 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1756 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1757 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1758 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1759 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1760 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1761 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1762 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1763 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1764 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1765 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1766 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1767 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1768 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1769 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1770 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1771 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1772 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1773 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1774 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1775 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1776 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1779 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1780 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1781 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1784 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1785 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1786 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1787 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1788 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1789 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1790 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1791 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1792 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1793 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1794 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1795 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1796 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1797 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1798 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1799 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1800 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1801 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1802 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1803 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1804 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1805 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1806 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1807 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1808 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1809 /* the PCI TPC is placed last (mapped liked HW) */
1810 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1813 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1814 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1815 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1816 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1817 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1820 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1821 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1822 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1823 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1824 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1825 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1826 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1827 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1828 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1829 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1830 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1831 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1834 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1835 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1836 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1837 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1838 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1839 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1840 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1841 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1842 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1845 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1846 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1847 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1848 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1849 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1850 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1851 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1852 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1853 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1854 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1855 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1893 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1894 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1895 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1896 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1897 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1901 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1902 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1903 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1904 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1905 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1909 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1910 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1911 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1914 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1915 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1916 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1919 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1920 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1921 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1924 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1925 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1926 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1929 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1930 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1931 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1932 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1933 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1934 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1935 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1936 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1937 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1940 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1941 DCORE0_RTR0, DCORE0_RTR0
1944 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1945 DCORE0_RTR2, DCORE0_RTR2
1948 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1949 DCORE2_RTR0, DCORE3_RTR7
1952 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1953 DCORE2_RTR2, DCORE3_RTR5
1956 struct mme_initiators_rtr_id {
1968 enum mme_initiators {
1981 static const struct mme_initiators_rtr_id
1982 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1983 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1984 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1985 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1986 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1987 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1988 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1989 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1990 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1993 enum razwi_event_sources {
2003 struct hbm_mc_error_causes {
2008 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2010 /* Special blocks iterator is currently used to configure security protection bits,
2011 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2012 * must be skipped. Following configurations are commonly used for both PB config
2013 * and global error reading, since currently they both share the same settings.
2014 * Once it changes, we must remember to use separate configurations for either one.
2016 static int gaudi2_iterator_skip_block_types[] = {
2017 GAUDI2_BLOCK_TYPE_PLL,
2018 GAUDI2_BLOCK_TYPE_EU_BIST,
2019 GAUDI2_BLOCK_TYPE_HBM,
2020 GAUDI2_BLOCK_TYPE_XFT
2023 static struct range gaudi2_iterator_skip_block_ranges[] = {
2024 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2025 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2026 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2027 /* Skip all CPU blocks except for CPU_IF */
2028 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2029 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2032 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2033 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2034 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2035 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2036 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2037 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2040 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2041 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2042 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2043 [HBM_SEI_READ_ERR] = "SEI read data error",
2044 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2045 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2046 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2047 [HBM_SEI_DFI] = "SEI DFI error",
2048 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2049 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2052 struct mmu_spi_sei_cause {
2057 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2058 {"page fault", 1}, /* INTERRUPT_CLR[1] */
2059 {"page access", 1}, /* INTERRUPT_CLR[1] */
2060 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
2061 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
2062 {"mmu rei0", -1}, /* no clear register bit */
2063 {"mmu rei1", -1}, /* no clear register bit */
2064 {"stlb rei0", -1}, /* no clear register bit */
2065 {"stlb rei1", -1}, /* no clear register bit */
2066 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2067 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
2068 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
2069 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
2070 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2071 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2072 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2073 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2074 {"slave error", 16}, /* INTERRUPT_CLR[16] */
2075 {"dec error", 17}, /* INTERRUPT_CLR[17] */
2076 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
2079 struct gaudi2_cache_invld_params {
2084 bool range_invalidation;
2087 struct gaudi2_tpc_idle_data {
2088 struct engines_data *e;
2089 unsigned long *mask;
2091 const char *tpc_fmt;
2094 struct gaudi2_tpc_mmu_data {
2098 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2100 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2101 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2102 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2103 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2104 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2105 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2106 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2108 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2109 struct engines_data *e);
2110 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2111 struct engines_data *e);
2112 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113 struct engines_data *e);
2114 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2115 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2117 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2122 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2124 return sizeof(struct packet_msg_short);
2127 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2129 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2132 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2134 struct asic_fixed_properties *prop = &hdev->asic_prop;
2135 int dcore, inst, tpc_seq;
2138 /* init the return code */
2141 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2142 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2143 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2145 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2148 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2150 ctx->fn(hdev, dcore, inst, offset, ctx);
2152 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2159 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2162 /* special check for PCI TPC (DCORE0_TPC6) */
2163 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2164 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2166 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2169 static bool gaudi2_host_phys_addr_valid(u64 addr)
2171 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2177 static int set_number_of_functional_hbms(struct hl_device *hdev)
2179 struct asic_fixed_properties *prop = &hdev->asic_prop;
2180 u8 faulty_hbms = hweight64(hdev->dram_binning);
2182 /* check if all HBMs should be used */
2184 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2185 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2190 * check for error condition in which number of binning
2191 * candidates is higher than the maximum supported by the
2192 * driver (in which case binning mask shall be ignored and driver will
2195 if (faulty_hbms > MAX_FAULTY_HBMS) {
2197 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2198 MAX_FAULTY_HBMS, hdev->dram_binning);
2203 * by default, number of functional HBMs in Gaudi2 is always
2204 * GAUDI2_HBM_NUM - 1.
2206 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2210 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2212 struct asic_fixed_properties *prop = &hdev->asic_prop;
2213 u32 basic_hbm_page_size;
2216 rc = set_number_of_functional_hbms(hdev);
2221 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2222 * in which we are using x16 bigger page size to be able to populate the entire
2223 * HBM mappings in the TLB
2225 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2226 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2227 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2228 prop->dram_size = prop->num_functional_hbms * SZ_16G;
2229 prop->dram_base_address = DRAM_PHYS_BASE;
2230 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2231 prop->dram_supports_virtual_memory = true;
2233 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2234 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2235 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2236 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2238 /* since DRAM page size differs from DMMU page size we need to allocate
2239 * DRAM memory in units of dram_page size and mapping this memory in
2240 * units of DMMU page size. we overcome this size mismatch using a
2241 * scrambling routine which takes a DRAM page and converts it to a DMMU
2244 * 1. partition the virtual address space to DRAM-page (whole) pages.
2245 * (suppose we get n such pages)
2246 * 2. limit the amount of virtual address space we got from 1 above to
2247 * a multiple of 64M as we don't want the scrambled address to cross
2248 * the DRAM virtual address space.
2249 * ( m = (n * DRAM_page_size) / DMMU_page_size).
2250 * 3. determine the and address accordingly
2251 * end_addr = start_addr + m * 48M
2253 * the DRAM address MSBs (63:48) are not part of the roundup calculation
2255 prop->dmmu.start_addr = prop->dram_base_address +
2256 (prop->dram_page_size *
2257 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2259 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2260 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2265 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2267 struct asic_fixed_properties *prop = &hdev->asic_prop;
2268 struct hw_queue_properties *q_props;
2269 u32 num_sync_stream_queues = 0;
2272 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2273 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2276 if (!prop->hw_queues_props)
2279 q_props = prop->hw_queues_props;
2281 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2282 q_props[i].type = QUEUE_TYPE_HW;
2283 q_props[i].driver_only = 0;
2285 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2286 q_props[i].supports_sync_stream = 0;
2288 q_props[i].supports_sync_stream = 1;
2289 num_sync_stream_queues++;
2292 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2295 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2296 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2297 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2299 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2300 prop->cfg_base_address = CFG_BASE;
2301 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2302 prop->host_base_address = HOST_PHYS_BASE_0;
2303 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2304 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2305 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2306 prop->user_dec_intr_count = NUMBER_OF_DEC;
2307 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2308 prop->completion_mode = HL_COMPLETION_MODE_CS;
2309 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2310 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2312 prop->sram_base_address = SRAM_BASE_ADDR;
2313 prop->sram_size = SRAM_SIZE;
2314 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2315 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2317 prop->hints_range_reservation = true;
2319 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2322 prop->mmu_pgt_size = 0x800000; /* 8MB */
2324 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2326 prop->mmu_pte_size = HL_PTE_SIZE;
2327 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2328 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2330 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2331 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2332 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2333 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2334 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2335 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2336 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2337 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2338 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2339 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2340 prop->dmmu.page_size = PAGE_SIZE_1GB;
2341 prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2342 prop->dmmu.last_mask = LAST_MASK;
2343 prop->dmmu.host_resident = 1;
2344 prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2345 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2348 * this is done in order to be able to validate FW descriptor (i.e. validating that
2349 * the addresses and allocated space for FW image does not cross memory bounds).
2350 * for this reason we set the DRAM size to the minimum possible and later it will
2351 * be modified according to what reported in the cpucp info packet
2353 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2355 hdev->pmmu_huge_range = true;
2356 prop->pmmu.host_resident = 1;
2357 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2358 prop->pmmu.last_mask = LAST_MASK;
2359 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2360 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2362 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2363 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2364 prop->hints_host_hpage_reserved_va_range.start_addr =
2365 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2366 prop->hints_host_hpage_reserved_va_range.end_addr =
2367 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2369 if (PAGE_SIZE == SZ_64K) {
2370 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2371 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2372 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2373 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2374 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2375 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2376 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2377 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2378 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2379 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2380 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2381 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2382 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2383 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2384 prop->pmmu.page_size = PAGE_SIZE_64KB;
2386 /* shifts and masks are the same in PMMU and HPMMU */
2387 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2388 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2389 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2390 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2392 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2393 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2394 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2395 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2396 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2397 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2398 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2399 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2400 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2401 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2402 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2403 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2404 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2405 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2406 prop->pmmu.page_size = PAGE_SIZE_4KB;
2408 /* shifts and masks are the same in PMMU and HPMMU */
2409 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2410 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2411 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2412 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2415 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2416 prop->num_engine_cores = CPU_ID_MAX;
2417 prop->cfg_size = CFG_SIZE;
2418 prop->max_asid = MAX_ASID;
2419 prop->num_of_events = GAUDI2_EVENT_SIZE;
2421 prop->supports_engine_modes = true;
2423 prop->dc_power_default = DC_POWER_DEFAULT;
2425 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2426 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2427 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2428 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2430 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2432 prop->mme_master_slave_mode = 1;
2434 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2435 (num_sync_stream_queues * HL_RSVD_SOBS);
2437 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2438 (num_sync_stream_queues * HL_RSVD_MONS);
2440 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2441 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2442 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2444 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2446 prop->fw_cpu_boot_dev_sts0_valid = false;
2447 prop->fw_cpu_boot_dev_sts1_valid = false;
2448 prop->hard_reset_done_by_fw = false;
2449 prop->gic_interrupts_enable = true;
2451 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2453 prop->max_dec = NUMBER_OF_DEC;
2455 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2457 prop->dma_mask = 64;
2459 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2464 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2466 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2467 bool is_wc[3] = {false, false, true};
2470 rc = hl_pci_bars_map(hdev, name, is_wc);
2474 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2479 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2481 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2482 struct hl_inbound_pci_region pci_region;
2483 u64 old_addr = addr;
2486 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2489 if (hdev->asic_prop.iatu_done_by_fw)
2492 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2493 pci_region.mode = PCI_BAR_MATCH_MODE;
2494 pci_region.bar = DRAM_BAR_ID;
2495 pci_region.addr = addr;
2496 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2501 old_addr = gaudi2->dram_bar_cur_addr;
2502 gaudi2->dram_bar_cur_addr = addr;
2508 static int gaudi2_init_iatu(struct hl_device *hdev)
2510 struct hl_inbound_pci_region inbound_region;
2511 struct hl_outbound_pci_region outbound_region;
2512 u32 bar_addr_low, bar_addr_high;
2515 if (hdev->asic_prop.iatu_done_by_fw)
2518 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2519 * We must map this region in BAR match mode in order to
2520 * fetch BAR physical base address
2522 inbound_region.mode = PCI_BAR_MATCH_MODE;
2523 inbound_region.bar = SRAM_CFG_BAR_ID;
2524 /* Base address must be aligned to Bar size which is 256 MB */
2525 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2526 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2530 /* Fetch physical BAR address */
2531 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2532 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2534 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2536 /* Inbound Region 0 - Bar 0 - Point to CFG */
2537 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2538 inbound_region.bar = SRAM_CFG_BAR_ID;
2539 inbound_region.offset_in_bar = 0;
2540 inbound_region.addr = STM_FLASH_BASE_ADDR;
2541 inbound_region.size = CFG_REGION_SIZE;
2542 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2546 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2547 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2548 inbound_region.bar = SRAM_CFG_BAR_ID;
2549 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2550 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2551 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2552 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2556 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2557 inbound_region.mode = PCI_BAR_MATCH_MODE;
2558 inbound_region.bar = DRAM_BAR_ID;
2559 inbound_region.addr = DRAM_PHYS_BASE;
2560 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2564 /* Outbound Region 0 - Point to Host */
2565 outbound_region.addr = HOST_PHYS_BASE_0;
2566 outbound_region.size = HOST_PHYS_SIZE_0;
2567 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2572 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2574 return RREG32(mmHW_STATE);
2577 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2579 struct asic_fixed_properties *prop = &hdev->asic_prop;
2582 * check for error condition in which number of binning candidates
2583 * is higher than the maximum supported by the driver
2585 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2586 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2587 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2592 prop->tpc_binning_mask = hdev->tpc_binning;
2593 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2598 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2600 struct asic_fixed_properties *prop = &hdev->asic_prop;
2601 struct hw_queue_properties *q_props = prop->hw_queues_props;
2602 u64 tpc_binning_mask;
2606 rc = gaudi2_tpc_binning_init_prop(hdev);
2610 tpc_binning_mask = prop->tpc_binning_mask;
2612 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2613 u8 subst_seq, binned, qid_base;
2615 if (tpc_binning_mask == 0)
2618 if (subst_idx == 0) {
2619 subst_seq = TPC_ID_DCORE0_TPC6;
2620 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2622 subst_seq = TPC_ID_DCORE3_TPC5;
2623 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2627 /* clear bit from mask */
2628 binned = __ffs(tpc_binning_mask);
2630 * Coverity complains about possible out-of-bound access in
2633 if (binned >= TPC_ID_SIZE) {
2635 "Invalid binned TPC (binning mask: %llx)\n",
2639 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2641 /* also clear replacing TPC bit from enabled mask */
2642 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2644 /* bin substite TPC's Qs */
2645 q_props[qid_base].binned = 1;
2646 q_props[qid_base + 1].binned = 1;
2647 q_props[qid_base + 2].binned = 1;
2648 q_props[qid_base + 3].binned = 1;
2656 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2658 struct asic_fixed_properties *prop = &hdev->asic_prop;
2661 num_faulty = hweight32(hdev->decoder_binning);
2664 * check for error condition in which number of binning candidates
2665 * is higher than the maximum supported by the driver
2667 if (num_faulty > MAX_FAULTY_DECODERS) {
2668 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2669 hdev->decoder_binning);
2673 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2675 if (prop->decoder_binning_mask)
2676 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2678 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2683 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2685 struct asic_fixed_properties *prop = &hdev->asic_prop;
2687 /* check if we should override default binning */
2688 if (!hdev->dram_binning) {
2689 prop->dram_binning_mask = 0;
2690 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2694 /* set DRAM binning constraints */
2695 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2696 prop->dram_binning_mask = hdev->dram_binning;
2697 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2700 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2702 struct asic_fixed_properties *prop = &hdev->asic_prop;
2703 struct hw_queue_properties *q_props;
2706 num_faulty = hweight32(hdev->edma_binning);
2709 * check for error condition in which number of binning candidates
2710 * is higher than the maximum supported by the driver
2712 if (num_faulty > MAX_FAULTY_EDMAS) {
2714 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2715 hdev->edma_binning);
2719 if (!hdev->edma_binning) {
2720 prop->edma_binning_mask = 0;
2721 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2725 seq = __ffs((unsigned long)hdev->edma_binning);
2727 /* set binning constraints */
2728 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2729 prop->edma_binning_mask = hdev->edma_binning;
2730 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2732 /* bin substitute EDMA's queue */
2733 q_props = prop->hw_queues_props;
2734 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2735 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2736 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2737 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2742 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2744 struct asic_fixed_properties *prop = &hdev->asic_prop;
2747 /* check if we should override default binning */
2748 if (!xbar_edge_iso_mask) {
2749 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2754 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2755 * only the FW can set a redundancy value). for user it'll always be 0.
2757 num_faulty = hweight32(xbar_edge_iso_mask);
2760 * check for error condition in which number of binning candidates
2761 * is higher than the maximum supported by the driver
2763 if (num_faulty > MAX_FAULTY_XBARS) {
2764 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2769 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2771 /* set binning constraints */
2772 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2773 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2778 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2783 * mark all clusters as good, each component will "fail" cluster
2784 * based on eFuse/user values.
2785 * If more than single cluster is faulty- the chip is unusable
2787 hdev->asic_prop.faulty_dram_cluster_map = 0;
2789 gaudi2_set_dram_binning_masks(hdev);
2791 rc = gaudi2_set_edma_binning_masks(hdev);
2795 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2800 /* always initially set to full mask */
2801 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2806 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2808 struct asic_fixed_properties *prop = &hdev->asic_prop;
2811 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2815 /* if we have DRAM binning reported by FW we should perform cluster config */
2816 if (prop->faulty_dram_cluster_map) {
2817 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2819 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2825 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2829 rc = gaudi2_set_cluster_binning_masks(hdev);
2833 rc = gaudi2_set_tpc_binning_masks(hdev);
2837 rc = gaudi2_set_dec_binning_masks(hdev);
2844 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2846 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2847 struct asic_fixed_properties *prop = &hdev->asic_prop;
2852 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2855 /* No point of asking this information again when not doing hard reset, as the device
2856 * CPU hasn't been reset
2858 if (hdev->reset_info.in_compute_reset)
2861 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2866 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2868 /* we can have wither 5 or 6 HBMs. other values are invalid */
2870 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2871 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2873 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2874 dram_size, prop->dram_size);
2875 dram_size = prop->dram_size;
2878 prop->dram_size = dram_size;
2879 prop->dram_end_address = prop->dram_base_address + dram_size;
2882 if (!strlen(prop->cpucp_info.card_name))
2883 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2885 /* Overwrite binning masks with the actual binning values from F/W */
2886 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2887 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2888 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2889 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2891 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2892 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2893 hdev->decoder_binning);
2896 * at this point the DRAM parameters need to be updated according to data obtained
2899 rc = hdev->asic_funcs->set_dram_properties(hdev);
2903 rc = hdev->asic_funcs->set_binning_masks(hdev);
2907 max_power = hl_fw_get_max_power(hdev);
2911 prop->max_power_default = (u64) max_power;
2916 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2918 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2919 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2922 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2925 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2929 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2934 static int gaudi2_early_init(struct hl_device *hdev)
2936 struct asic_fixed_properties *prop = &hdev->asic_prop;
2937 struct pci_dev *pdev = hdev->pdev;
2938 resource_size_t pci_bar_size;
2941 rc = gaudi2_set_fixed_properties(hdev);
2945 /* Check BAR sizes */
2946 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2948 if (pci_bar_size != CFG_BAR_SIZE) {
2949 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2950 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2952 goto free_queue_props;
2955 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2956 if (pci_bar_size != MSIX_BAR_SIZE) {
2957 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2958 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2960 goto free_queue_props;
2963 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2964 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2967 * Only in pldm driver config iATU
2970 hdev->asic_prop.iatu_done_by_fw = false;
2972 hdev->asic_prop.iatu_done_by_fw = true;
2974 rc = hl_pci_init(hdev);
2976 goto free_queue_props;
2978 /* Before continuing in the initialization, we need to read the preboot
2979 * version to determine whether we run with a security-enabled firmware
2981 rc = hl_fw_read_preboot_status(hdev);
2983 if (hdev->reset_on_preboot_fail)
2984 /* we are already on failure flow, so don't check if hw_fini fails. */
2985 hdev->asic_funcs->hw_fini(hdev, true, false);
2989 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2990 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2991 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2993 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3003 kfree(hdev->asic_prop.hw_queues_props);
3007 static int gaudi2_early_fini(struct hl_device *hdev)
3009 kfree(hdev->asic_prop.hw_queues_props);
3015 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3018 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3025 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3028 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3035 static void gaudi2_init_arcs(struct hl_device *hdev)
3037 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3038 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3042 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3043 if (gaudi2_is_arc_enabled(hdev, i))
3046 gaudi2_set_arc_id_cap(hdev, i);
3049 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3050 if (!gaudi2_is_queue_enabled(hdev, i))
3053 arc_id = gaudi2_queue_id_to_arc_id[i];
3054 if (gaudi2_is_arc_enabled(hdev, arc_id))
3057 if (gaudi2_is_arc_nic_owned(arc_id) &&
3058 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3061 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3062 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3065 gaudi2_set_arc_id_cap(hdev, arc_id);
3068 /* Fetch ARC scratchpad address */
3069 hdev->asic_prop.engine_core_interrupt_reg_addr =
3070 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3073 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3075 u32 reg_base, reg_val;
3079 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3080 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3081 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3082 ARC_DCCM_BLOCK_SIZE * 2, true);
3086 case CPU_ID_SCHED_ARC4:
3087 case CPU_ID_SCHED_ARC5:
3088 case CPU_ID_MME_QMAN_ARC0:
3089 case CPU_ID_MME_QMAN_ARC1:
3090 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3092 /* Scrub lower DCCM block */
3093 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3094 ARC_DCCM_BLOCK_SIZE, true);
3098 /* Switch to upper DCCM block */
3099 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3100 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3102 /* Scrub upper DCCM block */
3103 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3104 ARC_DCCM_BLOCK_SIZE, true);
3108 /* Switch to lower DCCM block */
3109 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3110 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3113 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3114 ARC_DCCM_BLOCK_SIZE, true);
3122 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3127 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3128 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3131 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3139 static int gaudi2_late_init(struct hl_device *hdev)
3141 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3144 hdev->asic_prop.supports_advanced_cpucp_rc = true;
3146 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3147 gaudi2->virt_msix_db_dma_addr);
3149 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3153 rc = gaudi2_fetch_psoc_frequency(hdev);
3155 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3156 goto disable_pci_access;
3159 gaudi2_init_arcs(hdev);
3161 rc = gaudi2_scrub_arcs_dccm(hdev);
3163 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3164 goto disable_pci_access;
3167 gaudi2_init_security(hdev);
3172 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3177 static void gaudi2_late_fini(struct hl_device *hdev)
3179 hl_hwmon_release_resources(hdev);
3182 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3184 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3186 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3187 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3188 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3189 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3190 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3191 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3192 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3193 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3194 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3195 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3198 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3200 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3202 u32 block_size, umr_start_idx, num_umr_blocks;
3205 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3206 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3207 block_size = ARC_DCCM_BLOCK_SIZE * 2;
3209 block_size = ARC_DCCM_BLOCK_SIZE;
3211 blocks[i].address = gaudi2_arc_dccm_bases[i];
3212 blocks[i].size = block_size;
3215 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3216 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3218 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3219 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3221 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3222 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3224 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3225 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3227 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3228 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3230 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3231 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3233 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3234 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3236 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3237 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3239 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3240 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3241 for (i = 0 ; i < num_umr_blocks ; i++) {
3242 u8 nic_id, umr_block_id;
3244 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3245 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3247 blocks[umr_start_idx + i].address =
3248 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3249 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3250 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3251 umr_block_id * NIC_UMR_OFFSET;
3252 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3255 /* Expose decoder HW configuration block to user */
3256 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3258 for (i = 1; i < NUM_OF_DCORES; ++i) {
3259 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3260 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3262 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3263 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3265 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3266 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3270 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3272 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3273 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3276 /* The device ARC works with 32-bits addresses, and because there is a single HW register
3277 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3281 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3282 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3283 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3284 if (!virt_addr_arr[i]) {
3286 goto free_dma_mem_arr;
3289 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3290 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3294 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3296 "MSB of ARC accessible DMA memory are not identical in all range\n");
3298 goto free_dma_mem_arr;
3301 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3302 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3305 for (j = 0 ; j < i ; j++)
3306 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3312 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3314 struct asic_fixed_properties *prop = &hdev->asic_prop;
3315 struct pci_mem_region *region;
3318 region = &hdev->pci_mem_region[PCI_REGION_CFG];
3319 region->region_base = CFG_BASE;
3320 region->region_size = CFG_SIZE;
3321 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3322 region->bar_size = CFG_BAR_SIZE;
3323 region->bar_id = SRAM_CFG_BAR_ID;
3327 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3328 region->region_base = SRAM_BASE_ADDR;
3329 region->region_size = SRAM_SIZE;
3330 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3331 region->bar_size = CFG_BAR_SIZE;
3332 region->bar_id = SRAM_CFG_BAR_ID;
3336 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3337 region->region_base = DRAM_PHYS_BASE;
3338 region->region_size = hdev->asic_prop.dram_size;
3339 region->offset_in_bar = 0;
3340 region->bar_size = prop->dram_pci_bar_size;
3341 region->bar_id = DRAM_BAR_ID;
3345 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3347 struct asic_fixed_properties *prop = &hdev->asic_prop;
3350 /* Initialize TPC interrupt */
3351 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3353 /* Initialize unexpected error interrupt */
3354 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3355 HL_USR_INTERRUPT_UNEXPECTED);
3357 /* Initialize common user CQ interrupt */
3358 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3359 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3361 /* Initialize common decoder interrupt */
3362 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3363 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3365 /* User interrupts structure holds both decoder and user interrupts from various engines.
3366 * We first initialize the decoder interrupts and then we add the user interrupts.
3367 * The only limitation is that the last decoder interrupt id must be smaller
3368 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3371 /* Initialize decoder interrupts, expose only normal interrupts,
3372 * error interrupts to be handled by driver
3374 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3376 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3377 HL_USR_INTERRUPT_DECODER);
3379 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3380 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3383 static inline int gaudi2_get_non_zero_random_int(void)
3385 int rand = get_random_u32();
3387 return rand ? rand : 1;
3390 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3392 struct asic_fixed_properties *prop = &hdev->asic_prop;
3393 struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3394 &prop->skip_special_blocks_cfg;
3396 kfree(prop->special_blocks);
3397 kfree(skip_special_blocks_cfg->block_types);
3398 kfree(skip_special_blocks_cfg->block_ranges);
3401 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3403 gaudi2_special_blocks_free(hdev);
3406 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3407 struct hl_special_blocks_cfg *special_blocks_cfg,
3408 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3413 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3415 struct asic_fixed_properties *prop = &hdev->asic_prop;
3418 /* Configure Special blocks */
3419 prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3420 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3421 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3422 sizeof(*prop->special_blocks), GFP_KERNEL);
3423 if (!prop->special_blocks)
3426 for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3427 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3428 sizeof(*prop->special_blocks));
3430 /* Configure when to skip Special blocks */
3431 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3432 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3434 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3435 prop->skip_special_blocks_cfg.block_types =
3436 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3437 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3438 if (!prop->skip_special_blocks_cfg.block_types) {
3440 goto free_special_blocks;
3443 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3444 sizeof(gaudi2_iterator_skip_block_types));
3446 prop->skip_special_blocks_cfg.block_types_len =
3447 ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3450 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3451 prop->skip_special_blocks_cfg.block_ranges =
3452 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3453 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3454 if (!prop->skip_special_blocks_cfg.block_ranges) {
3456 goto free_skip_special_blocks_types;
3459 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3460 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3461 &gaudi2_iterator_skip_block_ranges[i],
3462 sizeof(struct range));
3464 prop->skip_special_blocks_cfg.block_ranges_len =
3465 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3470 free_skip_special_blocks_types:
3471 kfree(prop->skip_special_blocks_cfg.block_types);
3472 free_special_blocks:
3473 kfree(prop->special_blocks);
3478 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3480 return gaudi2_special_blocks_config(hdev);
3483 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3485 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3486 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3489 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3490 /* bail-out if this is an allocation failure point */
3491 if (!msg_info[i].kern_addr)
3494 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3495 msg_info[i].kern_addr = NULL;
3499 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3501 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3502 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3505 /* allocate a message-short buf for each Q we intend to test */
3506 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3507 msg_info[i].kern_addr =
3508 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3509 GFP_KERNEL, &msg_info[i].dma_addr);
3510 if (!msg_info[i].kern_addr) {
3512 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3521 gaudi2_test_queues_msgs_free(hdev);
3525 static int gaudi2_sw_init(struct hl_device *hdev)
3527 struct asic_fixed_properties *prop = &hdev->asic_prop;
3528 struct gaudi2_device *gaudi2;
3531 /* Allocate device structure */
3532 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3536 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3537 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3540 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3541 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3544 goto free_gaudi2_device;
3547 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3550 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3551 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3553 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3555 hdev->asic_specific = gaudi2;
3557 /* Create DMA pool for small allocations.
3558 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3559 * PI/CI registers allocated from this pool have this restriction
3561 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3562 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3563 if (!hdev->dma_pool) {
3564 dev_err(hdev->dev, "failed to create DMA pool\n");
3566 goto free_gaudi2_device;
3569 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3573 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3574 if (!hdev->cpu_accessible_dma_pool) {
3575 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3577 goto free_cpu_dma_mem;
3580 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3581 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3583 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3585 goto free_cpu_accessible_dma_pool;
3588 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3589 &gaudi2->virt_msix_db_dma_addr);
3590 if (!gaudi2->virt_msix_db_cpu_addr) {
3591 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3593 goto free_cpu_accessible_dma_pool;
3596 spin_lock_init(&gaudi2->hw_queues_lock);
3598 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3599 &gaudi2->scratchpad_bus_address,
3600 GFP_KERNEL | __GFP_ZERO);
3601 if (!gaudi2->scratchpad_kernel_address) {
3603 goto free_virt_msix_db_mem;
3606 gaudi2_user_mapped_blocks_init(hdev);
3608 /* Initialize user interrupts */
3609 gaudi2_user_interrupt_setup(hdev);
3611 hdev->supports_coresight = true;
3612 hdev->supports_sync_stream = true;
3613 hdev->supports_cb_mapping = true;
3614 hdev->supports_wait_for_multi_cs = false;
3616 prop->supports_compute_reset = true;
3618 hdev->asic_funcs->set_pci_memory_regions(hdev);
3620 rc = gaudi2_special_blocks_iterator_config(hdev);
3622 goto free_scratchpad_mem;
3624 rc = gaudi2_test_queues_msgs_alloc(hdev);
3626 goto special_blocks_free;
3630 special_blocks_free:
3631 gaudi2_special_blocks_iterator_free(hdev);
3632 free_scratchpad_mem:
3633 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3634 gaudi2->scratchpad_bus_address);
3635 free_virt_msix_db_mem:
3636 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3637 free_cpu_accessible_dma_pool:
3638 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3640 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3641 hdev->cpu_accessible_dma_address);
3643 dma_pool_destroy(hdev->dma_pool);
3649 static int gaudi2_sw_fini(struct hl_device *hdev)
3651 struct asic_fixed_properties *prop = &hdev->asic_prop;
3652 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3654 gaudi2_test_queues_msgs_free(hdev);
3656 gaudi2_special_blocks_iterator_free(hdev);
3658 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3660 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3662 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3663 hdev->cpu_accessible_dma_address);
3665 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3666 gaudi2->scratchpad_bus_address);
3668 dma_pool_destroy(hdev->dma_pool);
3675 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3677 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3678 QM_GLBL_CFG1_CQF_STOP |
3679 QM_GLBL_CFG1_CP_STOP);
3681 /* stop also the ARC */
3682 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3685 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3687 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3688 QM_GLBL_CFG1_CQF_FLUSH |
3689 QM_GLBL_CFG1_CP_FLUSH);
3692 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3694 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3698 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3700 * @hdev: pointer to the habanalabs device structure
3701 * @queue_id: queue to clear fence counters to
3702 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3703 * getting stuck on any fence value. otherwise set all fence
3704 * counters to 0 (standard clear of fence counters)
3706 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3712 reg_base = gaudi2_qm_blocks_bases[queue_id];
3714 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3715 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3718 * in case we want to make sure that QM that is stuck on a fence will
3719 * be released we should set the fence counter to a higher value that
3720 * the value the QM waiting for. to comply with any fence counter of
3721 * any value we set maximum fence value to all counters
3723 val = skip_fence ? U32_MAX : 0;
3724 gaudi2_memset_device_lbw(hdev, addr, size, val);
3727 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3729 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3731 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3732 gaudi2_flush_qman_common(hdev, reg_base);
3733 gaudi2_flush_qman_arc_common(hdev, reg_base);
3736 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3738 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3741 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3742 goto stop_edma_qmans;
3744 /* Stop CPs of PDMA QMANs */
3745 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3746 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3749 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3752 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3753 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3754 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3757 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3760 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3761 inst * DCORE_EDMA_OFFSET;
3763 /* Stop CPs of EDMA QMANs */
3764 gaudi2_stop_qman_common(hdev, qm_base);
3769 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3771 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3774 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3776 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3777 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3780 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3784 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3786 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3790 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3793 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3794 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3797 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3798 gaudi2_stop_qman_common(hdev, reg_base);
3802 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3804 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3808 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3811 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3812 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3815 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3816 gaudi2_stop_qman_common(hdev, reg_base);
3820 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3822 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3823 u32 reg_base, queue_id;
3826 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3829 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3831 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3832 if (!(hdev->nic_ports_mask & BIT(i)))
3835 reg_base = gaudi2_qm_blocks_bases[queue_id];
3836 gaudi2_stop_qman_common(hdev, reg_base);
3840 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3844 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3845 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3848 static void gaudi2_dma_stall(struct hl_device *hdev)
3850 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3853 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3856 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3857 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3860 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3863 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3864 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3865 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3868 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3871 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3872 inst * DCORE_EDMA_OFFSET;
3874 /* Stall CPs of EDMA QMANs */
3875 gaudi2_stall_dma_common(hdev, core_base);
3880 static void gaudi2_mme_stall(struct hl_device *hdev)
3882 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3885 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3887 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3888 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3889 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3892 static void gaudi2_tpc_stall(struct hl_device *hdev)
3894 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3898 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3901 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3902 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3905 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3906 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3910 static void gaudi2_rotator_stall(struct hl_device *hdev)
3912 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3916 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3919 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3920 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3921 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3923 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3924 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3927 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3931 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3933 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3936 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3938 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3941 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3942 goto stop_edma_qmans;
3944 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3945 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3948 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3951 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3952 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3953 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3956 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3959 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3960 inst * DCORE_EDMA_OFFSET;
3962 /* Disable CPs of EDMA QMANs */
3963 gaudi2_disable_qman_common(hdev, qm_base);
3968 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3970 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3973 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3975 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3976 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3977 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3980 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3982 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3986 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3989 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3990 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3993 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3994 gaudi2_disable_qman_common(hdev, reg_base);
3998 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4000 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4004 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4007 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4008 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4011 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4012 gaudi2_disable_qman_common(hdev, reg_base);
4016 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4018 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4019 u32 reg_base, queue_id;
4022 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4025 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4027 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4028 if (!(hdev->nic_ports_mask & BIT(i)))
4031 reg_base = gaudi2_qm_blocks_bases[queue_id];
4032 gaudi2_disable_qman_common(hdev, reg_base);
4036 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4038 /* Disable the timestamp counter */
4039 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4041 /* Zero the lower/upper parts of the 64-bit counter */
4042 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4043 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4045 /* Enable the counter */
4046 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4049 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4051 /* Disable the timestamp counter */
4052 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4055 static const char *gaudi2_irq_name(u16 irq_number)
4057 switch (irq_number) {
4058 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4059 return "gaudi2 cpu eq";
4060 case GAUDI2_IRQ_NUM_COMPLETION:
4061 return "gaudi2 completion";
4062 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4063 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4064 case GAUDI2_IRQ_NUM_TPC_ASSERT:
4065 return "gaudi2 tpc assert";
4066 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4067 return "gaudi2 unexpected error";
4068 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4069 return "gaudi2 user completion";
4075 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4077 int i, irq, relative_idx;
4080 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4081 irq = pci_irq_vector(hdev->pdev, i);
4082 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4084 dec = hdev->dec + relative_idx / 2;
4086 /* We pass different structures depending on the irq handler. For the abnormal
4087 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4088 * user_interrupt entry
4090 free_irq(irq, ((relative_idx % 2) ?
4092 (void *) &hdev->user_interrupt[dec->core_id]));
4096 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4098 int rc, i, irq_init_cnt, irq, relative_idx;
4101 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4102 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4103 i++, irq_init_cnt++) {
4105 irq = pci_irq_vector(hdev->pdev, i);
4106 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4108 /* We pass different structures depending on the irq handler. For the abnormal
4109 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4110 * user_interrupt entry
4112 * TODO: change the dec abnrm to threaded irq
4115 dec = hdev->dec + relative_idx / 2;
4116 if (relative_idx % 2) {
4117 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4118 gaudi2_irq_name(i), (void *) dec);
4120 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4121 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4123 (void *) &hdev->user_interrupt[dec->core_id]);
4127 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4135 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4139 static int gaudi2_enable_msix(struct hl_device *hdev)
4141 struct asic_fixed_properties *prop = &hdev->asic_prop;
4142 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4143 int rc, irq, i, j, user_irq_init_cnt;
4146 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4149 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4152 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4153 GAUDI2_MSIX_ENTRIES, rc);
4157 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4158 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4159 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4161 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4162 goto free_irq_vectors;
4165 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4166 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4167 &hdev->event_queue);
4169 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4170 goto free_completion_irq;
4173 rc = gaudi2_dec_enable_msix(hdev);
4175 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4176 goto free_event_irq;
4179 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4180 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4181 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4182 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4184 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4188 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4189 rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4190 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4191 &hdev->unexpected_error_interrupt);
4193 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4197 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4198 user_irq_init_cnt < prop->user_interrupt_count;
4199 i++, j++, user_irq_init_cnt++) {
4201 irq = pci_irq_vector(hdev->pdev, i);
4202 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4203 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4204 gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4207 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4212 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4217 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4218 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4220 irq = pci_irq_vector(hdev->pdev, i);
4221 free_irq(irq, &hdev->user_interrupt[j]);
4223 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4224 free_irq(irq, &hdev->unexpected_error_interrupt);
4226 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4227 free_irq(irq, &hdev->tpc_interrupt);
4229 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4231 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4234 free_completion_irq:
4235 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4239 pci_free_irq_vectors(hdev->pdev);
4244 static void gaudi2_sync_irqs(struct hl_device *hdev)
4246 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4250 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4253 /* Wait for all pending IRQs to be finished */
4254 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4256 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4257 irq = pci_irq_vector(hdev->pdev, i);
4258 synchronize_irq(irq);
4261 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4262 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4264 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4266 irq = pci_irq_vector(hdev->pdev, i);
4267 synchronize_irq(irq);
4270 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4273 static void gaudi2_disable_msix(struct hl_device *hdev)
4275 struct asic_fixed_properties *prop = &hdev->asic_prop;
4276 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4280 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4283 gaudi2_sync_irqs(hdev);
4285 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4286 free_irq(irq, &hdev->event_queue);
4288 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4290 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4291 free_irq(irq, &hdev->tpc_interrupt);
4293 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4294 free_irq(irq, &hdev->unexpected_error_interrupt);
4296 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4297 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4299 irq = pci_irq_vector(hdev->pdev, i);
4300 free_irq(irq, &hdev->user_interrupt[j]);
4303 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4304 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4307 pci_free_irq_vectors(hdev->pdev);
4309 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4312 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4314 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4315 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4316 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4320 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4322 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4324 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4325 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4326 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4329 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4331 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4333 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4335 /* Wait till all traffic from decoder stops
4336 * before apply core reset.
4338 rc = hl_poll_timeout(
4340 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4342 (graceful & graceful_pend_mask),
4347 "Failed to stop traffic from DCORE%d Decoder %d\n",
4352 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4354 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4355 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4356 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4360 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4362 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4364 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4365 dec_bit = PCIE_DEC_SHIFT + dec_id;
4366 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4369 offset = dec_id * PCIE_VDEC_OFFSET;
4371 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4373 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4375 /* Wait till all traffic from decoder stops
4376 * before apply core reset.
4378 rc = hl_poll_timeout(
4380 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4382 (graceful & graceful_pend_mask),
4387 "Failed to stop traffic from PCIe Decoder %d\n",
4392 static void gaudi2_stop_dec(struct hl_device *hdev)
4394 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4397 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4400 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4401 gaudi2_stop_dcore_dec(hdev, dcore_id);
4403 gaudi2_stop_pcie_dec(hdev);
4406 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4408 u32 reg_base, reg_val;
4410 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4411 if (run_mode == HL_ENGINE_CORE_RUN)
4412 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4414 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4416 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4419 static void gaudi2_halt_arcs(struct hl_device *hdev)
4423 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4424 if (gaudi2_is_arc_enabled(hdev, arc_id))
4425 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4429 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4432 u32 reg_base, val, ack_mask, timeout_usec = 100000;
4435 timeout_usec *= 100;
4437 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4438 if (run_mode == HL_ENGINE_CORE_RUN)
4439 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4441 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4443 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4444 val, ((val & ack_mask) == ack_mask),
4445 1000, timeout_usec);
4449 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4450 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4456 static void gaudi2_reset_arcs(struct hl_device *hdev)
4458 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4464 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4465 if (gaudi2_is_arc_enabled(hdev, arc_id))
4466 gaudi2_clr_arc_id_cap(hdev, arc_id);
4469 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4471 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4475 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4478 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4480 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4481 if (!(hdev->nic_ports_mask & BIT(i)))
4484 gaudi2_qman_manual_flush_common(hdev, queue_id);
4488 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4489 u32 num_cores, u32 core_command)
4493 for (i = 0 ; i < num_cores ; i++) {
4494 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4495 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4498 for (i = 0 ; i < num_cores ; i++) {
4499 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4500 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4503 dev_err(hdev->dev, "failed to %s arc: %d\n",
4504 (core_command == HL_ENGINE_CORE_HALT) ?
4505 "HALT" : "RUN", core_ids[i]);
4514 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4516 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4517 u32 reg_base, reg_addr, reg_val, tpc_id;
4519 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4522 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4523 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4526 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4527 reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4528 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4529 !!(engine_command == HL_ENGINE_STALL));
4530 WREG32(reg_addr, reg_val);
4532 if (engine_command == HL_ENGINE_RESUME) {
4533 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4534 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4535 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4541 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4543 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4544 u32 reg_base, reg_addr, reg_val, mme_id;
4546 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4547 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4550 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4551 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4552 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4553 !!(engine_command == HL_ENGINE_STALL));
4554 WREG32(reg_addr, reg_val);
4559 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4561 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4562 u32 reg_base, reg_addr, reg_val, edma_id;
4564 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4567 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4568 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4571 reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4572 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4573 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4574 !!(engine_command == HL_ENGINE_STALL));
4575 WREG32(reg_addr, reg_val);
4577 if (engine_command == HL_ENGINE_STALL) {
4578 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4579 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4580 WREG32(reg_addr, reg_val);
4586 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4587 u32 *engine_ids, u32 num_engines, u32 engine_command)
4591 for (i = 0 ; i < num_engines ; ++i) {
4592 switch (engine_ids[i]) {
4593 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4594 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4595 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4596 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4597 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4602 case GAUDI2_DCORE0_ENGINE_ID_MME:
4603 case GAUDI2_DCORE1_ENGINE_ID_MME:
4604 case GAUDI2_DCORE2_ENGINE_ID_MME:
4605 case GAUDI2_DCORE3_ENGINE_ID_MME:
4606 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4611 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4612 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4613 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4614 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4615 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4621 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4629 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4630 u32 num_engines, u32 engine_command)
4632 switch (engine_command) {
4633 case HL_ENGINE_CORE_HALT:
4634 case HL_ENGINE_CORE_RUN:
4635 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4637 case HL_ENGINE_STALL:
4638 case HL_ENGINE_RESUME:
4639 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4642 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4647 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4649 u32 wait_timeout_ms;
4652 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4654 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4659 gaudi2_stop_dma_qmans(hdev);
4660 gaudi2_stop_mme_qmans(hdev);
4661 gaudi2_stop_tpc_qmans(hdev);
4662 gaudi2_stop_rot_qmans(hdev);
4663 gaudi2_stop_nic_qmans(hdev);
4664 msleep(wait_timeout_ms);
4666 gaudi2_halt_arcs(hdev);
4667 gaudi2_dma_stall(hdev);
4668 gaudi2_mme_stall(hdev);
4669 gaudi2_tpc_stall(hdev);
4670 gaudi2_rotator_stall(hdev);
4672 msleep(wait_timeout_ms);
4674 gaudi2_stop_dec(hdev);
4677 * in case of soft reset do a manual flush for QMANs (currently called
4678 * only for NIC QMANs
4681 gaudi2_nic_qmans_manual_flush(hdev);
4683 gaudi2_disable_dma_qmans(hdev);
4684 gaudi2_disable_mme_qmans(hdev);
4685 gaudi2_disable_tpc_qmans(hdev);
4686 gaudi2_disable_rot_qmans(hdev);
4687 gaudi2_disable_nic_qmans(hdev);
4688 gaudi2_disable_timestamp(hdev);
4692 gaudi2_disable_msix(hdev);
4696 gaudi2_sync_irqs(hdev);
4699 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4701 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4703 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4704 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4705 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4706 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4707 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4708 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4711 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4713 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4714 struct dynamic_fw_load_mgr *dynamic_loader;
4715 struct cpu_dyn_regs *dyn_regs;
4717 /* fill common fields */
4718 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4719 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4720 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4721 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4722 fw_loader->skip_bmc = false;
4723 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4724 fw_loader->dram_bar_id = DRAM_BAR_ID;
4725 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4727 /* here we update initial values for few specific dynamic regs (as
4728 * before reading the first descriptor from FW those value has to be
4729 * hard-coded). in later stages of the protocol those values will be
4730 * updated automatically by reading the FW descriptor so data there
4731 * will always be up-to-date
4733 dynamic_loader = &hdev->fw_loader.dynamic_loader;
4734 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4735 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4736 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4737 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4740 static int gaudi2_init_cpu(struct hl_device *hdev)
4742 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4745 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4748 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4751 rc = hl_fw_init_cpu(hdev);
4755 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4760 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4762 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4763 struct asic_fixed_properties *prop = &hdev->asic_prop;
4764 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4765 struct cpu_dyn_regs *dyn_regs;
4770 if (!hdev->cpu_queues_enable)
4773 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4776 eq = &hdev->event_queue;
4778 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4780 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4781 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4783 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4784 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4786 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4787 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4789 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4790 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4791 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4793 /* Used for EQ CI */
4794 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4796 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4798 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4800 /* Let the ARC know we are ready as it is now handling those queues */
4802 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4803 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4805 err = hl_poll_timeout(
4807 mmCPU_IF_QUEUE_INIT,
4809 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4814 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4818 /* update FW application security bits */
4819 if (prop->fw_cpu_boot_dev_sts0_valid)
4820 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4822 if (prop->fw_cpu_boot_dev_sts1_valid)
4823 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4825 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4829 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4832 struct hl_hw_queue *q;
4833 u32 pq_id, pq_offset;
4835 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4836 q = &hdev->kernel_queues[queue_id_base + pq_id];
4837 pq_offset = pq_id * 4;
4839 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4840 lower_32_bits(q->bus_address));
4841 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4842 upper_32_bits(q->bus_address));
4843 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4844 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4845 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4849 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4851 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4853 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4854 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4855 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4856 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4858 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4859 cp_offset = cp_id * 4;
4861 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4862 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4863 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4864 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4867 /* allow QMANs to accept work from ARC CQF */
4868 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4871 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4874 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4875 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4877 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4878 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4880 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4881 pq_offset = pq_id * 4;
4883 /* Configure QMAN HBW to scratchpad as it is not needed */
4884 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4885 lower_32_bits(gaudi2->scratchpad_bus_address));
4886 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4887 upper_32_bits(gaudi2->scratchpad_bus_address));
4888 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4889 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4891 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4892 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4893 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4894 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4897 /* Enable QMAN H/W completion */
4898 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4901 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4903 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4906 switch (queue_id_base) {
4907 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4909 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4911 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4913 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4915 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4916 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4918 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4920 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4922 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4924 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4925 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4927 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4929 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4931 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4933 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4934 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4936 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4937 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4939 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4940 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4943 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4950 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4953 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4954 int map_table_entry;
4956 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4958 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4959 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4960 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4962 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4963 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4964 gaudi2_irq_map_table[map_table_entry].cpu_id);
4966 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4968 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4969 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4970 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4972 /* Enable the QMAN channel.
4973 * PDMA QMAN configuration is different, as we do not allow user to
4974 * access some of the CPs.
4975 * PDMA0: CP2/3 are reserved for the ARC usage.
4976 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4978 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4979 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4980 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4981 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4983 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4986 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4991 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4992 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4994 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4995 gaudi2_init_qman_cp(hdev, reg_base);
4996 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4997 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5000 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5001 u32 dma_core_id, bool is_secure)
5003 u32 prot, irq_handler_offset;
5004 struct cpu_dyn_regs *dyn_regs;
5005 int map_table_entry;
5007 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5009 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5011 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5013 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5014 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5016 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5017 lower_32_bits(CFG_BASE + irq_handler_offset));
5019 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5020 upper_32_bits(CFG_BASE + irq_handler_offset));
5022 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5023 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5024 gaudi2_irq_map_table[map_table_entry].cpu_id);
5026 /* Enable the DMA channel */
5027 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5030 static void gaudi2_init_kdma(struct hl_device *hdev)
5032 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5035 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5038 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5040 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5042 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5045 static void gaudi2_init_pdma(struct hl_device *hdev)
5047 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5050 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5053 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5054 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5056 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5057 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5059 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5060 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5062 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5063 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5065 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5068 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5070 u32 reg_base, base_edma_core_id, base_edma_qman_id;
5072 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5073 base_edma_qman_id = edma_stream_base[seq];
5075 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5076 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5078 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5079 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5082 static void gaudi2_init_edma(struct hl_device *hdev)
5084 struct asic_fixed_properties *prop = &hdev->asic_prop;
5085 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5088 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5091 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5092 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5093 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5095 if (!(prop->edma_enabled_mask & BIT(seq)))
5098 gaudi2_init_edma_instance(hdev, seq);
5100 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5106 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5107 * @hdev: pointer to habanalabs device structure.
5108 * @sob_id: sync object ID.
5109 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5110 * @interrupt_id: interrupt ID.
5112 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5113 * write directly to the HBW host memory of the virtual MSI-X doorbell.
5114 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5116 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5117 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5118 * completion, by decrementing the sync object value and re-arming the monitor.
5120 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5121 u32 first_mon_id, u32 interrupt_id)
5123 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5124 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5128 /* Reset the SOB value */
5129 sob_offset = sob_id * sizeof(u32);
5130 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5132 /* Configure 3 monitors:
5133 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5134 * 2. Decrement SOB value by 1.
5135 * 3. Re-arm the master monitor.
5138 first_mon_offset = first_mon_id * sizeof(u32);
5140 /* 2nd monitor: Decrement SOB value by 1 */
5141 mon_offset = first_mon_offset + sizeof(u32);
5143 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5144 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5145 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5147 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5148 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5149 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5150 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5152 /* 3rd monitor: Re-arm the master monitor */
5153 mon_offset = first_mon_offset + 2 * sizeof(u32);
5155 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5156 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5157 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5159 sob_group = sob_id / 8;
5160 mask = ~BIT(sob_id & 0x7);
5161 mode = 0; /* comparison mode is "greater than or equal to" */
5162 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5163 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5164 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5165 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5168 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5170 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5171 mon_offset = first_mon_offset;
5173 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5174 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5176 addr = gaudi2->virt_msix_db_dma_addr;
5177 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5178 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5180 payload = interrupt_id;
5181 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5183 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5186 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5188 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5189 struct asic_fixed_properties *prop = &hdev->asic_prop;
5191 /* Decoder normal/abnormal interrupts */
5192 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5193 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5196 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5197 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5198 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5199 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5201 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5202 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5204 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5208 static void gaudi2_init_sm(struct hl_device *hdev)
5210 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5215 /* Enable HBW/LBW CQ for completion monitors */
5216 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5217 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5219 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5220 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5222 /* Enable only HBW CQ for KDMA completion monitor */
5223 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5224 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5226 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5227 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5228 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5229 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5231 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5233 hdev->completion_queue[i].bus_address;
5235 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5236 lower_32_bits(cq_address));
5237 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5238 upper_32_bits(cq_address));
5239 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5240 ilog2(HL_CQ_SIZE_IN_BYTES));
5243 /* Configure kernel ASID and MMU BP*/
5244 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5245 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5247 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5248 gaudi2_prepare_sm_for_virt_msix_db(hdev);
5251 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5253 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5257 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5258 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5259 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5260 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5261 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5262 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5264 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5265 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5267 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5268 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5269 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5273 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5274 bool config_qman_only)
5276 u32 queue_id_base, reg_base;
5280 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5283 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5286 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5289 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5292 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5296 if (!config_qman_only) {
5297 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5298 gaudi2_init_mme_acc(hdev, reg_base);
5301 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5302 gaudi2_init_qman(hdev, reg_base, queue_id_base);
5305 static void gaudi2_init_mme(struct hl_device *hdev)
5307 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5310 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5313 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5314 gaudi2_init_dcore_mme(hdev, i, false);
5316 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5320 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5322 /* Mask arithmetic and QM interrupts in TPC */
5323 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5325 /* Set 16 cache lines */
5326 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5327 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5330 struct gaudi2_tpc_init_cfg_data {
5331 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5334 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5335 u32 offset, struct iterate_module_ctx *ctx)
5337 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5338 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5342 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5344 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5345 /* gets last sequence number */
5346 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5348 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5350 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5351 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5353 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5356 static void gaudi2_init_tpc(struct hl_device *hdev)
5358 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5359 struct gaudi2_tpc_init_cfg_data init_cfg_data;
5360 struct iterate_module_ctx tpc_iter;
5362 if (!hdev->asic_prop.tpc_enabled_mask)
5365 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5368 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5369 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5370 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5371 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5372 tpc_iter.fn = &gaudi2_init_tpc_config;
5373 tpc_iter.data = &init_cfg_data;
5374 gaudi2_iterate_tpcs(hdev, &tpc_iter);
5377 static void gaudi2_init_rotator(struct hl_device *hdev)
5379 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5380 u32 i, reg_base, queue_id;
5382 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5384 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5385 reg_base = gaudi2_qm_blocks_bases[queue_id];
5386 gaudi2_init_qman(hdev, reg_base, queue_id);
5388 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5392 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5396 /* VCMD normal interrupt */
5397 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5398 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5399 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5400 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5402 /* VCMD abnormal interrupt */
5403 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5404 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5405 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5406 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5409 static void gaudi2_init_dec(struct hl_device *hdev)
5411 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5412 u32 dcore_id, dec_id, dec_bit;
5415 if (!hdev->asic_prop.decoder_enabled_mask)
5418 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5421 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5422 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5423 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5425 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5428 base_addr = mmDCORE0_DEC0_CMD_BASE +
5429 BRDG_CTRL_BLOCK_OFFSET +
5430 dcore_id * DCORE_OFFSET +
5431 dec_id * DCORE_VDEC_OFFSET;
5433 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5435 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5438 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5439 dec_bit = PCIE_DEC_SHIFT + dec_id;
5440 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5443 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5444 dec_id * DCORE_VDEC_OFFSET;
5446 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5448 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5452 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5453 u32 stlb_base, u32 asid, u64 phys_addr)
5455 u32 status, timeout_usec;
5458 if (hdev->pldm || !hdev->pdev)
5459 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5461 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5463 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5464 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5465 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5466 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5468 rc = hl_poll_timeout(
5470 stlb_base + STLB_BUSY_OFFSET,
5472 !(status & 0x80000000),
5477 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5484 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5485 u32 start_offset, u32 inv_start_val,
5488 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5489 if (flags & MMU_OP_CLEAR_MEMCACHE)
5490 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5492 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5495 WREG32(stlb_base + start_offset, inv_start_val);
5498 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5499 struct gaudi2_cache_invld_params *inv_params)
5501 u32 status, timeout_usec, start_offset;
5504 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5505 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5507 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5508 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5509 rc = hl_poll_timeout(
5511 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5520 /* Need to manually reset the status to 0 */
5521 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5524 /* Lower cache does not work with cache lines, hence we can skip its
5525 * invalidation upon map and invalidate only upon unmap
5527 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5530 start_offset = inv_params->range_invalidation ?
5531 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5533 rc = hl_poll_timeout(
5535 stlb_base + start_offset,
5544 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5546 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5549 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5551 if (gaudi2->hw_cap_initialized & hw_cap)
5557 /* this function shall be called only for HMMUs for which capability bit is set */
5558 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5562 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5563 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5566 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5567 struct gaudi2_cache_invld_params *inv_params)
5571 if (inv_params->range_invalidation) {
5572 /* Set the addresses range
5573 * Note: that the start address we set in register, is not included in
5574 * the range of the invalidation, by design.
5575 * that's why we need to set lower address than the one we actually
5576 * want to be included in the range invalidation.
5578 u64 start = inv_params->start_va - 1;
5580 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5582 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5583 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5585 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5586 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5588 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5589 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5591 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5592 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5594 start_offset = STLB_INV_ALL_START_OFFSET;
5597 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5598 inv_params->inv_start_val, inv_params->flags);
5601 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5602 int dcore_id, int hmmu_id,
5603 struct gaudi2_cache_invld_params *inv_params)
5605 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5607 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5610 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5611 int dcore_id, int hmmu_id,
5612 struct gaudi2_cache_invld_params *inv_params)
5614 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5616 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5619 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5620 struct gaudi2_cache_invld_params *inv_params)
5622 int dcore_id, hmmu_id;
5624 /* first send all invalidation commands */
5625 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5626 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5627 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5630 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5634 /* next, poll all invalidations status */
5635 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5636 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5639 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5642 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5652 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5654 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5655 struct gaudi2_cache_invld_params invld_params;
5658 if (hdev->reset_info.hard_reset_pending)
5661 invld_params.range_invalidation = false;
5662 invld_params.inv_start_val = 1;
5664 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5665 invld_params.flags = flags;
5666 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5667 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5669 } else if (flags & MMU_OP_PHYS_PACK) {
5670 invld_params.flags = 0;
5671 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5677 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5678 u32 flags, u32 asid, u64 va, u64 size)
5680 struct gaudi2_cache_invld_params invld_params = {0};
5681 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5682 u64 start_va, end_va;
5686 if (hdev->reset_info.hard_reset_pending)
5689 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5690 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5691 asid << MMU_RANGE_INV_ASID_SHIFT);
5693 end_va = start_va + size;
5695 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5696 /* As range invalidation does not support zero address we will
5697 * do full invalidation in this case
5700 invld_params.range_invalidation = true;
5701 invld_params.start_va = start_va;
5702 invld_params.end_va = end_va;
5703 invld_params.inv_start_val = inv_start_val;
5704 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5706 invld_params.range_invalidation = false;
5707 invld_params.inv_start_val = 1;
5708 invld_params.flags = flags;
5712 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5713 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5718 } else if (flags & MMU_OP_PHYS_PACK) {
5719 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5720 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5721 invld_params.inv_start_val = inv_start_val;
5722 invld_params.flags = flags;
5723 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5729 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5731 struct asic_fixed_properties *prop = &hdev->asic_prop;
5733 u32 asid, max_asid = prop->max_asid;
5736 /* it takes too much time to init all of the ASIDs on palladium */
5738 max_asid = min((u32) 8, max_asid);
5740 for (asid = 0 ; asid < max_asid ; asid++) {
5741 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5742 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5744 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5752 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5754 u32 status, timeout_usec;
5757 if (hdev->pldm || !hdev->pdev)
5758 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5760 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5762 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5764 rc = hl_poll_timeout(
5766 stlb_base + STLB_SRAM_INIT_OFFSET,
5773 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5775 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5779 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5781 rc = hl_poll_timeout(
5783 stlb_base + STLB_INV_ALL_START_OFFSET,
5790 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5792 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5797 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5799 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5800 u32 mmu_base, stlb_base;
5803 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5806 mmu_base = mmPMMU_HBW_MMU_BASE;
5807 stlb_base = mmPMMU_HBW_STLB_BASE;
5809 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5810 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5811 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5812 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5813 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5814 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5815 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5816 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5817 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5818 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5819 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5821 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5823 if (PAGE_SIZE == SZ_64K) {
5824 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5825 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5826 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5827 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5829 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5831 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5832 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5833 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5836 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5838 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5842 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5847 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5850 struct asic_fixed_properties *prop = &hdev->asic_prop;
5851 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852 u32 offset, mmu_base, stlb_base, hw_cap;
5856 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5857 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5860 * return if DMMU is already initialized or if it's not out of
5861 * isolation (due to cluster binning)
5863 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5866 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5867 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5868 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5870 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5871 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5873 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5874 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5875 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5876 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5877 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5878 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5879 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5880 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5881 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5882 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5883 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5885 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5886 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5888 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5890 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5894 gaudi2->hw_cap_initialized |= hw_cap;
5899 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5901 int rc, dcore_id, hmmu_id;
5903 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5904 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5905 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5913 static int gaudi2_mmu_init(struct hl_device *hdev)
5917 rc = gaudi2_pci_mmu_init(hdev);
5921 rc = gaudi2_hbm_mmu_init(hdev);
5928 static int gaudi2_hw_init(struct hl_device *hdev)
5930 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5933 /* Let's mark in the H/W that we have reached this point. We check
5934 * this value in the reset_before_init function to understand whether
5935 * we need to reset the chip before doing H/W init. This register is
5936 * cleared by the H/W upon H/W reset
5938 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5940 /* Perform read from the device to make sure device is up */
5943 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5944 * So we set it here and if anyone tries to move it later to
5945 * a different address, there will be an error
5947 if (hdev->asic_prop.iatu_done_by_fw)
5948 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5951 * Before pushing u-boot/linux to device, need to set the hbm bar to
5952 * base address of dram
5954 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5955 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5959 rc = gaudi2_init_cpu(hdev);
5961 dev_err(hdev->dev, "failed to initialize CPU\n");
5965 gaudi2_init_scrambler_hbm(hdev);
5966 gaudi2_init_kdma(hdev);
5968 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5970 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5974 rc = gaudi2->cpucp_info_get(hdev);
5976 dev_err(hdev->dev, "Failed to get cpucp info\n");
5980 rc = gaudi2_mmu_init(hdev);
5984 gaudi2_init_pdma(hdev);
5985 gaudi2_init_edma(hdev);
5986 gaudi2_init_sm(hdev);
5987 gaudi2_init_tpc(hdev);
5988 gaudi2_init_mme(hdev);
5989 gaudi2_init_rotator(hdev);
5990 gaudi2_init_dec(hdev);
5991 gaudi2_enable_timestamp(hdev);
5993 rc = gaudi2_coresight_init(hdev);
5995 goto disable_queues;
5997 rc = gaudi2_enable_msix(hdev);
5999 goto disable_queues;
6001 /* Perform read from the device to flush all configuration */
6007 gaudi2_disable_dma_qmans(hdev);
6008 gaudi2_disable_mme_qmans(hdev);
6009 gaudi2_disable_tpc_qmans(hdev);
6010 gaudi2_disable_rot_qmans(hdev);
6011 gaudi2_disable_nic_qmans(hdev);
6013 gaudi2_disable_timestamp(hdev);
6019 * gaudi2_send_hard_reset_cmd - common function to handle reset
6021 * @hdev: pointer to the habanalabs device structure
6023 * This function handles the various possible scenarios for reset.
6024 * It considers if reset is handled by driver\FW and what FW components are loaded
6026 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6028 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6029 bool heartbeat_reset, preboot_only, cpu_initialized = false;
6030 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6031 u32 cpu_boot_status;
6033 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6034 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6037 * Handle corner case where failure was at cpu management app load,
6038 * and driver didn't detect any failure while loading the FW,
6039 * then at such scenario driver will send only HALT_MACHINE
6040 * and no one will respond to this request since FW already back to preboot
6041 * and it cannot handle such cmd.
6042 * In this case next time the management app loads it'll check on events register
6043 * which will still have the halt indication, and will reboot the device.
6044 * The solution is to let preboot clear all relevant registers before next boot
6045 * once driver send COMMS_RST_DEV.
6047 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6049 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6050 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6051 cpu_initialized = true;
6054 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6055 * 1. FW reset: FW initiate the reset sequence
6056 * 2. driver reset: FW will start HALT sequence (the preparations for the
6057 * reset but not the reset itself as it is not implemented
6058 * on their part) and LKD will wait to let FW complete the
6059 * sequence before issuing the reset
6061 if (!preboot_only && cpu_initialized) {
6062 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6063 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6065 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6069 * When working with preboot (without Linux/Boot fit) we can
6070 * communicate only using the COMMS commands to issue halt/reset.
6072 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6073 * attempt to revive the card in the small chance that the f/w has
6074 * experienced a watchdog event, which caused it to return back to preboot.
6075 * In that case, triggering reset through GIC won't help. We need to
6076 * trigger the reset as if Linux wasn't loaded.
6078 * We do it only if the reset cause was HB, because that would be the
6079 * indication of such an event.
6081 * In case watchdog hasn't expired but we still got HB, then this won't
6085 if (heartbeat_reset || preboot_only || !cpu_initialized) {
6086 if (hdev->asic_prop.hard_reset_done_by_fw)
6087 hl_fw_ask_hard_reset_without_linux(hdev);
6089 hl_fw_ask_halt_machine_without_linux(hdev);
6094 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6096 * @hdev: pointer to the habanalabs device structure
6098 * This function executes hard reset based on if driver/FW should do the reset
6100 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6102 if (hdev->asic_prop.hard_reset_done_by_fw) {
6103 gaudi2_send_hard_reset_cmd(hdev);
6107 /* Set device to handle FLR by H/W as we will put the device
6110 WREG32(mmPCIE_AUX_FLR_CTRL,
6111 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6113 gaudi2_send_hard_reset_cmd(hdev);
6115 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6118 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6123 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6124 rc = hl_poll_timeout(
6126 mmCPU_RST_STATUS_TO_HOST,
6128 reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6133 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6139 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6141 * @hdev: pointer to the habanalabs device structure
6142 * @driver_performs_reset: true if driver should perform reset instead of f/w.
6143 * @poll_timeout_us: time to wait for response from f/w.
6145 * This function executes soft reset based on if driver/FW should do the reset
6147 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6148 u32 poll_timeout_us)
6150 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6153 if (!driver_performs_reset) {
6154 if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6155 /* set SP to indicate reset request sent to FW */
6156 if (dyn_regs->cpu_rst_status)
6157 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6159 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6160 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6161 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6163 /* wait for f/w response */
6164 rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6166 rc = hl_fw_send_soft_reset(hdev);
6171 /* Block access to engines, QMANs and SM during reset, these
6172 * RRs will be reconfigured after soft reset.
6173 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6175 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6176 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6178 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6179 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6180 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6182 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6186 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6191 /* We poll the BTM done indication multiple times after reset due to
6192 * a HW errata 'GAUDI2_0300'
6194 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6195 rc = hl_poll_timeout(
6197 mmPSOC_GLOBAL_CONF_BTM_FSM,
6204 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6207 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6209 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6210 u32 poll_timeout_us, reset_sleep_ms;
6211 bool driver_performs_reset = false;
6215 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6216 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6217 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6219 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6220 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6226 gaudi2_reset_arcs(hdev);
6229 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6230 gaudi2_execute_hard_reset(hdev);
6233 * As we have to support also work with preboot only (which does not supports
6234 * soft reset) we have to make sure that security is disabled before letting driver
6235 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6236 * secured device with preboot only.
6238 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6239 !hdev->asic_prop.fw_security_enabled);
6240 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6246 if (driver_performs_reset || hard_reset) {
6248 * Instead of waiting for BTM indication we should wait for preboot ready:
6249 * Consider the below scenario:
6250 * 1. FW update is being triggered
6251 * - setting the dirty bit
6252 * 2. hard reset will be triggered due to the dirty bit
6253 * 3. FW initiates the reset:
6254 * - dirty bit cleared
6255 * - BTM indication cleared
6256 * - preboot ready indication cleared
6257 * 4. during hard reset:
6258 * - BTM indication will be set
6259 * - BIST test performed and another reset triggered
6260 * 5. only after this reset the preboot will set the preboot ready
6262 * when polling on BTM indication alone we can lose sync with FW while trying to
6263 * communicate with FW that is during reset.
6264 * to overcome this we will always wait to preboot ready indication
6267 /* without this sleep reset will not work */
6268 msleep(reset_sleep_ms);
6270 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6271 hl_fw_wait_preboot_ready(hdev);
6273 gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6279 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6280 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6283 * Clear NIC capability mask in order for driver to re-configure
6284 * NIC QMANs. NIC ports will not be re-configured during soft
6285 * reset as we call gaudi2_nic_init only during hard reset
6287 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6290 gaudi2->hw_cap_initialized &=
6291 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6292 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6293 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6294 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6295 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6297 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6299 gaudi2->hw_cap_initialized &=
6300 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6301 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6307 static int gaudi2_suspend(struct hl_device *hdev)
6311 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6313 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6318 static int gaudi2_resume(struct hl_device *hdev)
6320 return gaudi2_init_iatu(hdev);
6323 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6324 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6328 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6329 VM_DONTCOPY | VM_NORESERVE);
6331 #ifdef _HAS_DMA_MMAP_COHERENT
6333 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6335 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6339 rc = remap_pfn_range(vma, vma->vm_start,
6340 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6341 size, vma->vm_page_prot);
6343 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6350 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6352 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6353 u64 hw_cap_mask = 0;
6354 u64 hw_tpc_cap_bit = 0;
6355 u64 hw_nic_cap_bit = 0;
6356 u64 hw_test_cap_bit = 0;
6358 switch (hw_queue_id) {
6359 case GAUDI2_QUEUE_ID_PDMA_0_0:
6360 case GAUDI2_QUEUE_ID_PDMA_0_1:
6361 case GAUDI2_QUEUE_ID_PDMA_1_0:
6362 hw_cap_mask = HW_CAP_PDMA_MASK;
6364 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6365 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6366 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6368 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6369 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6370 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6372 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6373 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6374 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6376 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6377 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6378 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6381 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6382 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6385 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6386 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6389 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6390 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6393 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6394 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6397 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6398 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6399 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6401 /* special case where cap bit refers to the first queue id */
6402 if (!hw_tpc_cap_bit)
6403 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6406 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6407 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6408 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6411 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6412 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6413 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6416 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6417 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6418 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6421 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6422 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6425 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6426 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6429 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6430 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6432 /* special case where cap bit refers to the first queue id */
6433 if (!hw_nic_cap_bit)
6434 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6437 case GAUDI2_QUEUE_ID_CPU_PQ:
6438 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6445 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6448 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6450 if (hw_test_cap_bit)
6451 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6453 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6456 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6458 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6461 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6462 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6463 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6465 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6466 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6468 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6469 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6476 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6478 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6481 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6482 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6483 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6486 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6487 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6490 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6491 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6499 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6501 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6504 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6505 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6506 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6509 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6510 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6513 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6514 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6522 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6524 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6525 u32 pq_offset, reg_base, db_reg_offset, db_value;
6527 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6529 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6530 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6533 pq_offset = (hw_queue_id & 0x3) * 4;
6534 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6535 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6537 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6542 /* ring the doorbell */
6543 WREG32(db_reg_offset, db_value);
6545 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6546 /* make sure device CPU will read latest data from host */
6548 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6549 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6553 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6555 __le64 *pbd = (__le64 *) bd;
6557 /* The QMANs are on the host memory so a simple copy suffice */
6562 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6563 dma_addr_t *dma_handle, gfp_t flags)
6565 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6568 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6569 void *cpu_addr, dma_addr_t dma_handle)
6571 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6574 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6575 u32 timeout, u64 *result)
6577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6579 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6586 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6588 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6591 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6592 gfp_t mem_flags, dma_addr_t *dma_handle)
6594 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6597 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6600 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6602 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6605 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6606 dma_addr_t *dma_handle)
6608 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6611 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6613 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6616 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6617 enum dma_data_direction dir)
6619 dma_addr_t dma_addr;
6621 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6622 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6628 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6629 enum dma_data_direction dir)
6631 dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6634 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6636 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6637 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6639 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6640 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6644 /* Just check if CB address is valid */
6646 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6647 parser->user_cb_size,
6648 asic_prop->sram_user_base_address,
6649 asic_prop->sram_end_address))
6652 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6653 parser->user_cb_size,
6654 asic_prop->dram_user_base_address,
6655 asic_prop->dram_end_address))
6658 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6659 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6660 parser->user_cb_size,
6661 asic_prop->dmmu.start_addr,
6662 asic_prop->dmmu.end_addr))
6665 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6666 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6667 parser->user_cb_size,
6668 asic_prop->pmmu.start_addr,
6669 asic_prop->pmmu.end_addr) ||
6670 hl_mem_area_inside_range(
6671 (u64) (uintptr_t) parser->user_cb,
6672 parser->user_cb_size,
6673 asic_prop->pmmu_huge.start_addr,
6674 asic_prop->pmmu_huge.end_addr))
6677 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6681 if (!device_iommu_mapped(&hdev->pdev->dev))
6685 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6686 parser->user_cb, parser->user_cb_size);
6691 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6693 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6695 if (!parser->is_kernel_allocated_cb)
6696 return gaudi2_validate_cb_address(hdev, parser);
6698 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6699 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6706 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6708 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6710 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6713 return hl_fw_send_heartbeat(hdev);
6716 /* This is an internal helper function, used to update the KDMA mmu props.
6717 * Should be called with a proper kdma lock.
6719 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6720 bool mmu_bypass, u32 asid)
6722 u32 rw_asid, rw_mmu_bp;
6724 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6725 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6727 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6728 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6730 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6731 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6734 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6735 u32 mon_payload, u32 sync_value)
6737 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6740 sob_offset = sob_id * 4;
6741 mon_offset = mon_id * 4;
6743 /* Reset the SOB value */
6744 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6746 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6747 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6749 /* Configure this address with CS index because CQ_EN is set */
6750 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6752 sync_group_id = sob_id / 8;
6753 mask = ~(1 << (sob_id & 0x7));
6754 mode = 1; /* comparison mode is "equal to" */
6756 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6757 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6758 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6759 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6760 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6763 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6764 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6765 u64 src_addr, u64 dst_addr,
6766 u32 size, bool is_memset)
6768 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6769 struct hl_cq_entry *cq_base;
6774 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6775 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6776 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6778 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6779 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6781 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6782 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6784 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6785 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6786 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6787 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6788 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6789 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6790 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6791 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6793 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6794 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6797 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6799 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6801 /* Wait for completion */
6802 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6803 cq_base = cq->kernel_address;
6804 polling_addr = (u32 *)&cq_base[cq->ci];
6807 /* for each 1MB 20 second of timeout */
6808 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6810 timeout = KDMA_TIMEOUT_USEC;
6813 rc = hl_poll_timeout_memory(
6825 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6826 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6830 cq->ci = hl_cq_inc_ptr(cq->ci);
6835 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6839 for (i = 0 ; i < size ; i += sizeof(u32))
6840 WREG32(addr + i, val);
6843 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6845 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6848 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6849 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6851 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6852 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6856 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6858 return hdev->asic_prop.first_available_user_sob[0] +
6859 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6862 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6864 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6865 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6867 /* Reset the SOB value */
6868 WREG32(sob_addr, 0);
6871 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6872 struct gaudi2_queues_test_info *msg_info)
6874 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6875 u32 tmp, sob_base = 1;
6876 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6877 size_t pkt_size = sizeof(struct packet_msg_short);
6880 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6881 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6882 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6883 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6884 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6886 msg_short_pkt->value = cpu_to_le32(sob_val);
6887 msg_short_pkt->ctl = cpu_to_le32(tmp);
6889 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6892 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6897 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6899 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6900 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6901 u32 timeout_usec, tmp;
6905 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6907 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6909 rc = hl_poll_timeout(
6917 if (rc == -ETIMEDOUT) {
6918 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6926 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6928 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6931 * check capability here as send_cpu_message() won't update the result
6932 * value if no capability
6934 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6937 return hl_fw_test_cpu_queue(hdev);
6940 static int gaudi2_test_queues(struct hl_device *hdev)
6942 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6943 struct gaudi2_queues_test_info *msg_info;
6944 u32 sob_val = 0x5a5a;
6947 /* send test message on all enabled Qs */
6948 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6949 if (!gaudi2_is_queue_enabled(hdev, i))
6952 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6953 gaudi2_qman_set_test_mode(hdev, i, true);
6954 gaudi2_test_queue_clear(hdev, i);
6955 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6960 rc = gaudi2_test_cpu_queue(hdev);
6964 /* verify that all messages were processed */
6965 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6966 if (!gaudi2_is_queue_enabled(hdev, i))
6969 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6971 /* chip is not usable, no need for cleanups, just bail-out with error */
6974 gaudi2_test_queue_clear(hdev, i);
6975 gaudi2_qman_set_test_mode(hdev, i, false);
6982 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6984 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6985 size_t irq_arr_size;
6988 gaudi2_init_arcs(hdev);
6990 rc = gaudi2_scrub_arcs_dccm(hdev);
6992 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
6996 gaudi2_init_security(hdev);
6998 /* Unmask all IRQs since some could have been received during the soft reset */
6999 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7000 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7003 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7004 struct engines_data *e)
7006 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7007 struct asic_fixed_properties *prop = &hdev->asic_prop;
7008 unsigned long *mask = (unsigned long *) mask_arr;
7009 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7010 bool is_idle = true, is_eng_idle;
7011 int engine_idx, i, j;
7015 hl_engine_data_sprintf(e,
7016 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7017 "---- ---- ------- ------------ ------------- -------------\n");
7019 for (i = 0; i < NUM_OF_DCORES; i++) {
7020 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7021 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7023 if (!(prop->edma_enabled_mask & BIT(seq)))
7026 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7027 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7028 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7030 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7031 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7033 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7034 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7035 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7037 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7038 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7039 is_idle &= is_eng_idle;
7041 if (mask && !is_eng_idle)
7042 set_bit(engine_idx, mask);
7045 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7046 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7053 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7054 struct engines_data *e)
7056 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7057 unsigned long *mask = (unsigned long *) mask_arr;
7058 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7059 bool is_idle = true, is_eng_idle;
7064 hl_engine_data_sprintf(e,
7065 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7066 "---- ------- ------------ ------------- -------------\n");
7068 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7069 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7070 offset = i * PDMA_OFFSET;
7071 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7072 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7074 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7075 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7076 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7078 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7079 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7080 is_idle &= is_eng_idle;
7082 if (mask && !is_eng_idle)
7083 set_bit(engine_idx, mask);
7086 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7087 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7093 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7094 struct engines_data *e)
7096 unsigned long *mask = (unsigned long *) mask_arr;
7097 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7098 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7099 bool is_idle = true, is_eng_idle;
7103 /* NIC, twelve macros in Full chip */
7104 if (e && hdev->nic_ports_mask)
7105 hl_engine_data_sprintf(e,
7106 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7107 "--- ------- ------------ ----------\n");
7109 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7111 offset = i / 2 * NIC_OFFSET;
7113 offset += NIC_QM_OFFSET;
7115 if (!(hdev->nic_ports_mask & BIT(i)))
7118 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7121 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7122 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7123 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7125 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7126 is_idle &= is_eng_idle;
7128 if (mask && !is_eng_idle)
7129 set_bit(engine_idx, mask);
7132 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7133 qm_glbl_sts0, qm_cgm_sts);
7139 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7140 struct engines_data *e)
7142 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7143 unsigned long *mask = (unsigned long *) mask_arr;
7144 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7145 bool is_idle = true, is_eng_idle;
7150 hl_engine_data_sprintf(e,
7151 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
7152 "--- ---- ------- ------------ ---------------\n");
7153 /* MME, one per Dcore */
7154 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7155 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7156 offset = i * DCORE_OFFSET;
7158 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7159 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7160 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7162 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7163 is_idle &= is_eng_idle;
7165 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7166 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7167 is_idle &= is_eng_idle;
7170 hl_engine_data_sprintf(e, mme_fmt, i, "N",
7171 is_eng_idle ? "Y" : "N",
7175 if (mask && !is_eng_idle)
7176 set_bit(engine_idx, mask);
7182 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7183 struct iterate_module_ctx *ctx)
7185 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7186 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7190 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7191 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7193 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7194 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7196 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7197 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7198 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7199 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7201 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7202 IS_TPC_IDLE(tpc_cfg_sts);
7203 *(idle_data->is_idle) &= is_eng_idle;
7205 if (idle_data->mask && !is_eng_idle)
7206 set_bit(engine_idx, idle_data->mask);
7209 hl_engine_data_sprintf(idle_data->e,
7210 idle_data->tpc_fmt, dcore, inst,
7211 is_eng_idle ? "Y" : "N",
7212 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7215 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7216 struct engines_data *e)
7218 struct asic_fixed_properties *prop = &hdev->asic_prop;
7219 unsigned long *mask = (unsigned long *) mask_arr;
7220 bool is_idle = true;
7222 struct gaudi2_tpc_idle_data tpc_idle_data = {
7223 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7226 .is_idle = &is_idle,
7228 struct iterate_module_ctx tpc_iter = {
7229 .fn = &gaudi2_is_tpc_engine_idle,
7230 .data = &tpc_idle_data,
7233 if (e && prop->tpc_enabled_mask)
7234 hl_engine_data_sprintf(e,
7235 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
7236 "---- --- ------- ------------ ---------- ------\n");
7238 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7240 return tpc_idle_data.is_idle;
7243 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7244 struct engines_data *e)
7246 struct asic_fixed_properties *prop = &hdev->asic_prop;
7247 unsigned long *mask = (unsigned long *) mask_arr;
7248 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7249 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7250 bool is_idle = true, is_eng_idle;
7251 u32 dec_swreg15, dec_enabled_bit;
7252 int engine_idx, i, j;
7255 /* Decoders, two each Dcore and two shared PCIe decoders */
7256 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7257 hl_engine_data_sprintf(e,
7258 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
7259 "---- --- ------- ---------------\n");
7261 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7262 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7263 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7264 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7267 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7268 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7269 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7271 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7272 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7273 is_idle &= is_eng_idle;
7275 if (mask && !is_eng_idle)
7276 set_bit(engine_idx, mask);
7279 hl_engine_data_sprintf(e, dec_fmt, i, j,
7280 is_eng_idle ? "Y" : "N", dec_swreg15);
7284 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7285 hl_engine_data_sprintf(e,
7286 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
7287 "-------- ------- ---------------\n");
7289 /* Check shared(PCIe) decoders */
7290 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7291 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7292 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7295 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7296 offset = i * DCORE_DEC_OFFSET;
7297 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7298 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7299 is_idle &= is_eng_idle;
7301 if (mask && !is_eng_idle)
7302 set_bit(engine_idx, mask);
7305 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7306 is_eng_idle ? "Y" : "N", dec_swreg15);
7312 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7313 struct engines_data *e)
7315 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7316 unsigned long *mask = (unsigned long *) mask_arr;
7317 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7318 bool is_idle = true, is_eng_idle;
7323 hl_engine_data_sprintf(e,
7324 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
7325 "---- --- ------- ------------ ------------ ----------\n");
7327 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7328 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7330 offset = i * ROT_OFFSET;
7332 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7333 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7334 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7336 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7337 is_idle &= is_eng_idle;
7339 if (mask && !is_eng_idle)
7340 set_bit(engine_idx, mask);
7343 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7344 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7350 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7351 struct engines_data *e)
7353 bool is_idle = true;
7355 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7356 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7357 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7358 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7359 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7360 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7361 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7366 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7367 __acquires(&gaudi2->hw_queues_lock)
7369 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7371 spin_lock(&gaudi2->hw_queues_lock);
7374 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7375 __releases(&gaudi2->hw_queues_lock)
7377 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7379 spin_unlock(&gaudi2->hw_queues_lock);
7382 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7384 return hdev->pdev->device;
7387 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7389 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7391 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7394 return hl_fw_get_eeprom_data(hdev, data, max_size);
7397 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7399 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7402 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7404 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7407 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7408 return gaudi2->events_stat_aggregate;
7411 *size = (u32) sizeof(gaudi2->events_stat);
7412 return gaudi2->events_stat;
7415 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7416 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7418 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7419 dcore_vdec_id + DCORE_OFFSET * dcore_id;
7421 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7422 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7424 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7425 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7427 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7428 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7430 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7431 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7433 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7434 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7437 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7439 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7440 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7441 struct asic_fixed_properties *prop = &hdev->asic_prop;
7442 u32 dcore_offset = dcore_id * DCORE_OFFSET;
7443 u32 vdec_id, i, ports_offset, reg_val;
7447 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7448 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7449 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7450 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7451 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7452 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7455 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7456 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7457 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7458 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7459 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7463 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7465 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7466 * for any access type
7469 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7470 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7471 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7472 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7475 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7476 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7478 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7479 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7480 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7481 dcore_offset + ports_offset, 0);
7482 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7483 dcore_offset + ports_offset, rw_asid);
7486 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7487 ports_offset = i * DCORE_MME_WB_OFFSET;
7488 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7489 dcore_offset + ports_offset, 0);
7490 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7491 dcore_offset + ports_offset, rw_asid);
7494 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7495 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7500 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7501 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7502 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7506 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7507 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7509 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7511 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7512 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7514 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7515 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7517 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7518 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7520 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7521 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7523 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7524 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7527 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7528 u32 rw_asid, u32 rw_mmu_bp)
7530 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7532 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7533 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7536 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7538 u32 reg_base, reg_offset, reg_val = 0;
7540 reg_base = gaudi2_arc_blocks_bases[cpu_id];
7542 /* Enable MMU and configure asid for all relevant ARC regions */
7543 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7544 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7546 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7547 WREG32(reg_base + reg_offset, reg_val);
7549 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7550 WREG32(reg_base + reg_offset, reg_val);
7552 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7553 WREG32(reg_base + reg_offset, reg_val);
7555 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7556 WREG32(reg_base + reg_offset, reg_val);
7558 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7559 WREG32(reg_base + reg_offset, reg_val);
7561 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7562 WREG32(reg_base + reg_offset, reg_val);
7564 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7565 WREG32(reg_base + reg_offset, reg_val);
7567 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7568 WREG32(reg_base + reg_offset, reg_val);
7570 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7571 WREG32(reg_base + reg_offset, reg_val);
7573 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7574 WREG32(reg_base + reg_offset, reg_val);
7576 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7577 WREG32(reg_base + reg_offset, reg_val);
7580 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7584 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7585 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7587 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7588 gaudi2_arc_mmu_prepare(hdev, i, asid);
7590 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7591 if (!gaudi2_is_queue_enabled(hdev, i))
7594 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7600 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7602 struct asic_fixed_properties *prop = &hdev->asic_prop;
7603 u32 rw_asid, offset;
7606 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7607 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7609 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7610 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7611 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7612 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7614 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7615 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7616 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7617 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7620 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7621 offset = i * ROT_OFFSET;
7622 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7623 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7624 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7625 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7626 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7629 /* Shared Decoders are the last bits in the decoders mask */
7630 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7631 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7633 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7634 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7636 /* arc farm arc dup eng */
7637 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7638 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7640 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7647 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7648 struct iterate_module_ctx *ctx)
7650 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7652 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7653 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7654 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7655 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7658 /* zero the MMUBP and set the ASID */
7659 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7661 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7662 struct gaudi2_tpc_mmu_data tpc_mmu_data;
7663 struct iterate_module_ctx tpc_iter = {
7664 .fn = &gaudi2_tpc_mmu_prepare,
7665 .data = &tpc_mmu_data,
7669 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7670 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7674 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7677 rc = gaudi2_mmu_shared_prepare(hdev, asid);
7681 /* configure DCORE MMUs */
7682 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7683 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7684 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7685 for (i = 0 ; i < NUM_OF_DCORES ; i++)
7686 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7691 static inline bool is_info_event(u32 event)
7694 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7695 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7697 /* return in case of NIC status event - these events are received periodically and not as
7698 * an indication to an error.
7700 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7707 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7708 bool ratelimited, const char *fmt, ...)
7710 struct va_format vaf;
7713 va_start(args, fmt);
7718 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7719 gaudi2_irq_map_table[event_type].valid ?
7720 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7722 dev_err(hdev->dev, "%s: %pV\n",
7723 gaudi2_irq_map_table[event_type].valid ?
7724 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7729 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7730 struct hl_eq_ecc_data *ecc_data)
7732 u64 ecc_address = 0, ecc_syndrom = 0;
7733 u8 memory_wrapper_idx = 0;
7735 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7736 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7737 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7739 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7740 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7741 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7743 return !!ecc_data->is_critical;
7747 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7749 * @idx: the current pi/ci value
7750 * @q_len: the queue length (power of 2)
7752 * @return the cyclically decremented index
7754 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7756 u32 mask = q_len - 1;
7759 * modular decrement is equivalent to adding (queue_size -1)
7760 * later we take LSBs to make sure the value is in the
7761 * range [0, queue_len - 1]
7763 return (idx + q_len - 1) & mask;
7767 * gaudi2_print_sw_config_stream_data - print SW config stream data
7769 * @hdev: pointer to the habanalabs device structure
7770 * @stream: the QMAN's stream
7771 * @qman_base: base address of QMAN registers block
7773 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7774 u32 stream, u64 qman_base)
7776 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7777 u32 cq_ptr_lo_off, size;
7779 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7781 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7782 stream * cq_ptr_lo_off;
7784 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7786 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7788 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7789 size = RREG32(cq_tsize);
7790 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7791 stream, cq_ptr, size);
7795 * gaudi2_print_last_pqes_on_err - print last PQEs on error
7797 * @hdev: pointer to the habanalabs device structure
7798 * @qid_base: first QID of the QMAN (out of 4 streams)
7799 * @stream: the QMAN's stream
7800 * @qman_base: base address of QMAN registers block
7801 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7803 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7804 u64 qman_base, bool pr_sw_conf)
7806 u32 ci, qm_ci_stream_off;
7807 struct hl_hw_queue *q;
7811 q = &hdev->kernel_queues[qid_base + stream];
7813 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7814 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7815 stream * qm_ci_stream_off;
7817 hdev->asic_funcs->hw_queues_lock(hdev);
7820 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7824 /* we should start printing form ci -1 */
7825 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7827 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7832 bd = q->kernel_address;
7835 len = le32_to_cpu(bd->len);
7836 /* len 0 means uninitialized entry- break */
7840 addr = le64_to_cpu(bd->ptr);
7842 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7843 stream, ci, addr, len);
7845 /* get previous ci, wrap if needed */
7846 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7849 hdev->asic_funcs->hw_queues_unlock(hdev);
7853 * print_qman_data_on_err - extract QMAN data on error
7855 * @hdev: pointer to the habanalabs device structure
7856 * @qid_base: first QID of the QMAN (out of 4 streams)
7857 * @stream: the QMAN's stream
7858 * @qman_base: base address of QMAN registers block
7860 * This function attempt to extract as much data as possible on QMAN error.
7861 * On upper CP print the SW config stream data and last 8 PQEs.
7862 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7864 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7868 if (stream != QMAN_STREAMS) {
7869 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7873 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7875 for (i = 0 ; i < QMAN_STREAMS ; i++)
7876 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7879 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7880 u64 qman_base, u32 qid_base)
7882 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7883 u64 glbl_sts_addr, arb_err_addr;
7886 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7887 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7889 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7890 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7891 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7896 if (i == QMAN_STREAMS) {
7897 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7898 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7900 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7901 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7904 for (j = 0 ; j < num_error_causes ; j++)
7905 if (glbl_sts_val & BIT(j)) {
7906 gaudi2_print_event(hdev, event_type, true,
7907 "%s. err cause: %s", reg_desc,
7909 gaudi2_qman_lower_cp_error_cause[j] :
7910 gaudi2_qman_error_cause[j]);
7914 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7917 arb_err_val = RREG32(arb_err_addr);
7922 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7923 if (arb_err_val & BIT(j)) {
7924 gaudi2_print_event(hdev, event_type, true,
7925 "ARB_ERR. err cause: %s",
7926 gaudi2_qman_arb_error_cause[j]);
7935 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7936 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7937 enum gaudi2_engine_id id, u64 *event_mask)
7939 u32 razwi_hi, razwi_lo, razwi_xy;
7944 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7945 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7946 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7947 rd_wr_flag = HL_RAZWI_WRITE;
7949 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7950 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7951 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7952 rd_wr_flag = HL_RAZWI_READ;
7955 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7956 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7958 dev_err_ratelimited(hdev->dev,
7959 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7960 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7963 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7964 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7965 enum gaudi2_engine_id id, u64 *event_mask)
7967 u64 razwi_addr = CFG_BASE;
7973 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7974 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7975 rd_wr_flag = HL_RAZWI_WRITE;
7977 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7978 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7979 rd_wr_flag = HL_RAZWI_READ;
7982 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7983 dev_err_ratelimited(hdev->dev,
7984 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7985 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7989 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7990 enum razwi_event_sources module, u8 module_idx)
7994 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7995 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7996 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7997 (module_idx % NUM_OF_TPC_PER_DCORE) +
7998 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8001 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8002 (module_idx * ENGINE_ID_DCORE_OFFSET));
8005 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8006 (module_idx % NUM_OF_EDMA_PER_DCORE));
8009 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8012 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8015 if (module_idx == 8)
8016 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8018 if (module_idx == 9)
8019 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8021 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8022 (module_idx % NUM_OF_DEC_PER_DCORE) +
8023 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8026 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8029 return GAUDI2_ENGINE_ID_SIZE;
8034 * This function handles RR(Range register) hit events.
8035 * raised be initiators not PSOC RAZWI.
8037 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8038 enum razwi_event_sources module, u8 module_idx,
8039 u8 module_sub_idx, u64 *event_mask)
8041 bool via_sft = false;
8042 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
8043 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8044 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8045 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8046 char initiator_name[64];
8050 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8052 if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
8053 !hdev->asic_prop.fw_security_enabled &&
8054 ((module_idx == 0) || (module_idx == 1)))
8055 lbw_rtr_id = DCORE0_RTR0;
8057 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8058 sprintf(initiator_name, "TPC_%u", module_idx);
8061 sprintf(initiator_name, "MME_%u", module_idx);
8062 switch (module_sub_idx) {
8064 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8067 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8070 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8073 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8076 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8079 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8082 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8085 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8088 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8093 lbw_rtr_id = hbw_rtr_id;
8096 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8097 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8098 /* SFT has separate MSTR_IF for LBW, only there we can
8099 * read the LBW razwi related registers
8101 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8102 dcore_id * SFT_DCORE_OFFSET;
8104 sprintf(initiator_name, "EDMA_%u", module_idx);
8107 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8108 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8109 sprintf(initiator_name, "PDMA_%u", module_idx);
8112 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8113 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8114 sprintf(initiator_name, "NIC_%u", module_idx);
8117 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8118 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8119 sprintf(initiator_name, "DEC_%u", module_idx);
8122 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8123 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8124 sprintf(initiator_name, "ROT_%u", module_idx);
8130 /* Find router mstr_if register base */
8132 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8133 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8134 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8135 dcore_id * DCORE_OFFSET +
8136 dcore_rtr_id * DCORE_RTR_OFFSET +
8138 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8139 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8142 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8143 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8144 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8145 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8146 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8148 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8150 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8151 initiator_name, eng_id, event_mask);
8153 /* Clear event indication */
8154 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8158 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8159 initiator_name, eng_id, event_mask);
8161 /* Clear event indication */
8162 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8166 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8167 initiator_name, eng_id, event_mask);
8169 /* Clear event indication */
8170 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8174 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8175 initiator_name, eng_id, event_mask);
8177 /* Clear event indication */
8178 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8182 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8184 struct asic_fixed_properties *prop = &hdev->asic_prop;
8185 u8 mod_idx, sub_mod;
8187 /* check all TPCs */
8188 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8189 if (prop->tpc_enabled_mask & BIT(mod_idx))
8190 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8193 /* check all MMEs */
8194 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8195 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8196 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8199 /* check all EDMAs */
8200 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8201 if (prop->edma_enabled_mask & BIT(mod_idx))
8202 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8204 /* check all PDMAs */
8205 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8206 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8208 /* check all NICs */
8209 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8210 if (hdev->nic_ports_mask & BIT(mod_idx))
8211 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8214 /* check all DECs */
8215 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8216 if (prop->decoder_enabled_mask & BIT(mod_idx))
8217 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8219 /* check all ROTs */
8220 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8221 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8224 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8225 u32 axuser_xy, u32 *base, u16 *eng_id,
8229 int i, num_of_eng = 0;
8232 for (i = 0 ; i < array_size ; i++) {
8233 if (axuser_xy != razwi_info[i].axuser_xy)
8236 eng_id[num_of_eng] = razwi_info[i].eng_id;
8237 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8239 str_size += snprintf(eng_name + str_size,
8240 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8241 razwi_info[i].eng_name);
8243 str_size += snprintf(eng_name + str_size,
8244 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8245 razwi_info[i].eng_name);
8252 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8255 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8256 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8257 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8258 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8259 bool razwi_happened = false;
8263 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8264 axuser_xy, base, eng_id, eng_name_str);
8266 /* If no match for XY coordinates, try to find it in MME razwi table */
8268 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8269 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8270 ARRAY_SIZE(mme_razwi_info),
8271 axuser_xy, base, eng_id,
8275 for (i = 0 ; i < num_of_eng ; i++) {
8276 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8277 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8278 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8279 addr = ((u64)addr_hi << 32) + addr_lo;
8282 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8283 eng_name_str, addr);
8284 hl_handle_razwi(hdev, addr, &eng_id[0],
8285 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8286 razwi_happened = true;
8290 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8291 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8292 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8293 addr = ((u64)addr_hi << 32) + addr_lo;
8296 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8297 eng_name_str, addr);
8298 hl_handle_razwi(hdev, addr, &eng_id[0],
8299 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8300 razwi_happened = true;
8304 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8305 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8308 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8309 eng_name_str, addr_lo);
8310 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8311 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8312 razwi_happened = true;
8316 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8317 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8320 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8321 eng_name_str, addr_lo);
8322 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8323 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8324 razwi_happened = true;
8327 /* In common case the loop will break, when there is only one engine id, or
8328 * several engines with the same router. The exceptional case is with psoc razwi
8329 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8330 * interfaces of sft router). In this case, maybe the first router won't hold info
8331 * and we will need to iterate on the other router.
8337 return razwi_happened;
8340 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8341 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8343 u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8345 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8346 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8351 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8353 dev_err_ratelimited(hdev->dev,
8354 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8355 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8356 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8357 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8358 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8359 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8361 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8364 dev_err_ratelimited(hdev->dev,
8365 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8368 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8369 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8370 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8375 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8377 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8379 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8381 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8382 if (sts_val & BIT(i)) {
8383 gaudi2_print_event(hdev, event_type, true,
8384 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8385 sts_clr_val |= BIT(i);
8390 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8395 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8396 bool extended_err_check, u64 *event_mask)
8398 enum razwi_event_sources module;
8399 u32 error_count = 0;
8403 switch (event_type) {
8404 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8405 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8406 qman_base = mmDCORE0_TPC0_QM_BASE +
8407 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8408 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8411 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8412 qman_base = mmDCORE0_TPC6_QM_BASE;
8415 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8416 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8417 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8418 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8419 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8420 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8421 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8422 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8425 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8426 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8427 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8428 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8429 module = RAZWI_PDMA;
8431 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8432 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8433 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8434 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8441 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8443 /* There is a single event per NIC macro, so should check its both QMAN blocks */
8444 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8445 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8446 error_count += _gaudi2_handle_qm_sei_err(hdev,
8447 qman_base + NIC_QM_OFFSET, event_type);
8449 if (extended_err_check) {
8450 /* check if RAZWI happened */
8451 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8452 hl_check_for_glbl_errors(hdev);
8458 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8460 u32 qid_base, error_count = 0;
8464 switch (event_type) {
8465 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8466 index = event_type - GAUDI2_EVENT_TPC0_QM;
8467 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8468 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8470 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8471 index = event_type - GAUDI2_EVENT_TPC6_QM;
8472 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8473 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8475 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8476 index = event_type - GAUDI2_EVENT_TPC12_QM;
8477 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8478 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8480 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8481 index = event_type - GAUDI2_EVENT_TPC18_QM;
8482 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8483 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8485 case GAUDI2_EVENT_TPC24_QM:
8486 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8487 qman_base = mmDCORE0_TPC6_QM_BASE;
8489 case GAUDI2_EVENT_MME0_QM:
8490 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8491 qman_base = mmDCORE0_MME_QM_BASE;
8493 case GAUDI2_EVENT_MME1_QM:
8494 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8495 qman_base = mmDCORE1_MME_QM_BASE;
8497 case GAUDI2_EVENT_MME2_QM:
8498 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8499 qman_base = mmDCORE2_MME_QM_BASE;
8501 case GAUDI2_EVENT_MME3_QM:
8502 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8503 qman_base = mmDCORE3_MME_QM_BASE;
8505 case GAUDI2_EVENT_HDMA0_QM:
8507 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8508 qman_base = mmDCORE0_EDMA0_QM_BASE;
8510 case GAUDI2_EVENT_HDMA1_QM:
8512 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8513 qman_base = mmDCORE0_EDMA1_QM_BASE;
8515 case GAUDI2_EVENT_HDMA2_QM:
8517 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8518 qman_base = mmDCORE1_EDMA0_QM_BASE;
8520 case GAUDI2_EVENT_HDMA3_QM:
8522 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8523 qman_base = mmDCORE1_EDMA1_QM_BASE;
8525 case GAUDI2_EVENT_HDMA4_QM:
8527 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8528 qman_base = mmDCORE2_EDMA0_QM_BASE;
8530 case GAUDI2_EVENT_HDMA5_QM:
8532 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8533 qman_base = mmDCORE2_EDMA1_QM_BASE;
8535 case GAUDI2_EVENT_HDMA6_QM:
8537 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8538 qman_base = mmDCORE3_EDMA0_QM_BASE;
8540 case GAUDI2_EVENT_HDMA7_QM:
8542 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8543 qman_base = mmDCORE3_EDMA1_QM_BASE;
8545 case GAUDI2_EVENT_PDMA0_QM:
8546 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8547 qman_base = mmPDMA0_QM_BASE;
8549 case GAUDI2_EVENT_PDMA1_QM:
8550 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8551 qman_base = mmPDMA1_QM_BASE;
8553 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8554 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8555 qman_base = mmROT0_QM_BASE;
8557 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8558 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8559 qman_base = mmROT1_QM_BASE;
8565 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8567 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8568 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8569 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8570 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8573 hl_check_for_glbl_errors(hdev);
8578 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8580 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8582 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8584 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8585 (arc_farm * ARC_FARM_OFFSET));
8587 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8588 if (sts_val & BIT(i)) {
8589 gaudi2_print_event(hdev, event_type, true,
8590 "ARC FARM ARC %u err cause: %s",
8591 arc_farm, gaudi2_arc_sei_error_cause[i]);
8592 sts_clr_val |= BIT(i);
8596 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8600 hl_check_for_glbl_errors(hdev);
8605 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8607 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8609 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8611 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8612 if (sts_val & BIT(i)) {
8613 gaudi2_print_event(hdev, event_type, true,
8614 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8615 sts_clr_val |= BIT(i);
8620 hl_check_for_glbl_errors(hdev);
8622 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8627 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8628 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8631 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8632 u32 error_count = 0;
8635 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8636 if (intr_cause_data & BIT(i)) {
8637 gaudi2_print_event(hdev, event_type, true,
8638 "err cause: %s", guadi2_rot_error_cause[i]);
8642 /* check if RAZWI happened */
8643 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8644 hl_check_for_glbl_errors(hdev);
8649 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type,
8650 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8653 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8654 u32 error_count = 0;
8657 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8658 if (intr_cause_data & BIT(i)) {
8659 gaudi2_print_event(hdev, event_type, true,
8660 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]);
8664 /* check if RAZWI happened */
8665 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8666 hl_check_for_glbl_errors(hdev);
8671 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8674 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8677 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8679 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8680 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8681 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8684 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8685 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8687 sts_val = RREG32(sts_addr);
8689 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8690 if (sts_val & BIT(i)) {
8691 gaudi2_print_event(hdev, event_type, true,
8692 "err cause: %s", gaudi2_dec_error_cause[i]);
8693 sts_clr_val |= BIT(i);
8698 /* check if RAZWI happened */
8699 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8700 hl_check_for_glbl_errors(hdev);
8702 /* Write 1 clear errors */
8703 WREG32(sts_addr, sts_clr_val);
8708 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8711 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8714 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8715 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8717 sts_val = RREG32(sts_addr);
8719 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8720 if (sts_val & BIT(i)) {
8721 gaudi2_print_event(hdev, event_type, true,
8722 "err cause: %s", guadi2_mme_error_cause[i]);
8723 sts_clr_val |= BIT(i);
8728 /* check if RAZWI happened */
8729 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8730 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8732 hl_check_for_glbl_errors(hdev);
8734 WREG32(sts_clr_addr, sts_clr_val);
8739 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8740 u64 intr_cause_data)
8742 int i, error_count = 0;
8744 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8745 if (intr_cause_data & BIT(i)) {
8746 gaudi2_print_event(hdev, event_type, true,
8747 "err cause: %s", guadi2_mme_sbte_error_cause[i]);
8751 hl_check_for_glbl_errors(hdev);
8756 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8759 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8762 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8763 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8765 sts_val = RREG32(sts_addr);
8767 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8768 if (sts_val & BIT(i)) {
8769 gaudi2_print_event(hdev, event_type, true,
8770 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8771 sts_clr_val |= BIT(i);
8776 /* check if RAZWI happened on WAP0/1 */
8777 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8778 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8779 hl_check_for_glbl_errors(hdev);
8781 WREG32(sts_clr_addr, sts_clr_val);
8786 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8787 u64 intr_cause_data)
8789 u32 error_count = 0;
8792 /* If an AXI read or write error is received, an error is reported and
8793 * interrupt message is sent. Due to an HW errata, when reading the cause
8794 * register of the KDMA engine, the reported error is always HBW even if
8795 * the actual error caused by a LBW KDMA transaction.
8797 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8798 if (intr_cause_data & BIT(i)) {
8799 gaudi2_print_event(hdev, event_type, true,
8800 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8804 hl_check_for_glbl_errors(hdev);
8809 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr)
8811 u32 error_count = 0, sts_val = RREG32(sts_addr);
8814 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8815 if (sts_val & BIT(i)) {
8816 gaudi2_print_event(hdev, event_type, true,
8817 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8821 hl_check_for_glbl_errors(hdev);
8826 static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx)
8830 sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET;
8831 return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8834 static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx)
8836 static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5};
8837 u32 sts_addr, index;
8839 index = edma_event_index_map[edma_idx];
8841 sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE +
8842 DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) +
8843 DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE);
8844 return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8847 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8849 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8851 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8852 if (RREG32(razwi_happened_addr)) {
8853 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8854 GAUDI2_ENGINE_ID_PCIE, event_mask);
8855 WREG32(razwi_happened_addr, 0x1);
8858 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8859 if (RREG32(razwi_happened_addr)) {
8860 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8861 GAUDI2_ENGINE_ID_PCIE, event_mask);
8862 WREG32(razwi_happened_addr, 0x1);
8865 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8866 if (RREG32(razwi_happened_addr)) {
8867 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8868 GAUDI2_ENGINE_ID_PCIE, event_mask);
8869 WREG32(razwi_happened_addr, 0x1);
8872 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8873 if (RREG32(razwi_happened_addr)) {
8874 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8875 GAUDI2_ENGINE_ID_PCIE, event_mask);
8876 WREG32(razwi_happened_addr, 0x1);
8880 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8881 u64 intr_cause_data, u64 *event_mask)
8883 u32 error_count = 0;
8886 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8887 if (!(intr_cause_data & BIT_ULL(i)))
8890 gaudi2_print_event(hdev, event_type, true,
8891 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8894 switch (intr_cause_data & BIT_ULL(i)) {
8895 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8896 hl_check_for_glbl_errors(hdev);
8898 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8899 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8907 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8908 u64 intr_cause_data)
8911 u32 error_count = 0;
8914 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8915 if (intr_cause_data & BIT_ULL(i)) {
8916 gaudi2_print_event(hdev, event_type, true,
8917 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8925 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8927 u32 error_count = 0;
8930 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8931 if (intr_cause_data & BIT_ULL(i)) {
8932 gaudi2_print_event(hdev, event_type, true,
8933 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8941 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8947 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8949 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8952 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8953 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8955 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8958 addr = gaudi2_mmu_descramble_addr(hdev, addr);
8960 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8961 is_pmmu ? "PMMU" : "HMMU", addr);
8962 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8964 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8967 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8972 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8974 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8977 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8978 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8980 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8983 addr = gaudi2_mmu_descramble_addr(hdev, addr);
8985 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8986 is_pmmu ? "PMMU" : "HMMU", addr);
8987 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8990 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8991 u64 mmu_base, bool is_pmmu, u64 *event_mask)
8993 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8996 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8998 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8999 if (spi_sei_cause & BIT(i)) {
9000 gaudi2_print_event(hdev, event_type, true,
9001 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9004 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9006 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9008 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9009 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9016 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9018 /* Clear interrupt */
9019 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9024 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9026 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9027 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9030 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9031 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9033 sei_cause_val = RREG32(sei_cause_addr);
9034 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9035 cq_intr_val = RREG32(cq_intr_addr);
9038 if (sei_cause_cause) {
9039 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9040 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9043 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9044 if (!(sei_cause_cause & BIT(i)))
9047 gaudi2_print_event(hdev, event_type, true,
9048 "err cause: %s. %s: 0x%X",
9049 gaudi2_sm_sei_cause[i].cause_name,
9050 gaudi2_sm_sei_cause[i].log_name,
9056 /* Clear SM_SEI_CAUSE */
9057 WREG32(sei_cause_addr, 0);
9061 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9062 cq_intr_queue_index =
9063 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9066 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9067 sm_index, cq_intr_queue_index);
9071 WREG32(cq_intr_addr, 0);
9074 hl_check_for_glbl_errors(hdev);
9079 static u64 get_hmmu_base(u16 event_type)
9081 u8 dcore, index_in_dcore;
9083 switch (event_type) {
9084 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9085 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9089 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9090 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9094 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9095 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9099 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9100 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9104 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9105 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9109 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9110 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9114 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9115 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9119 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9120 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9124 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9125 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9129 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9130 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9134 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9135 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9139 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9140 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9144 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9145 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9149 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9150 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9154 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9155 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9159 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9160 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9168 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9171 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9173 bool is_pmmu = false;
9174 u32 error_count = 0;
9177 switch (event_type) {
9178 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9179 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9180 mmu_base = get_hmmu_base(event_type);
9183 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9184 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9186 mmu_base = mmPMMU_HBW_MMU_BASE;
9192 if (mmu_base == ULONG_MAX)
9195 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9196 is_pmmu, event_mask);
9197 hl_check_for_glbl_errors(hdev);
9203 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9204 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9205 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9207 u32 addr, beat, beat_shift;
9210 dev_err_ratelimited(hdev->dev,
9211 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9212 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9213 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9214 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9216 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9217 dev_err_ratelimited(hdev->dev,
9218 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9219 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9220 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9221 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9222 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9223 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9225 /* For each beat (RDQS edge), look for possible errors and print relevant info */
9226 for (beat = 0 ; beat < 4 ; beat++) {
9227 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9228 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9229 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9231 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9232 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9234 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9235 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9236 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9238 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9239 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9243 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9244 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9245 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9246 dev_err_ratelimited(hdev->dev,
9247 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9249 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9250 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9251 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9252 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9256 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9257 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9258 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9259 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9260 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9266 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9267 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9269 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9270 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9272 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9274 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9275 derr & 0x3, derr & 0xc);
9277 /* JIRA H6-3286 - the following prints may not be valid */
9278 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9279 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9280 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9281 dev_err_ratelimited(hdev->dev,
9282 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9284 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9285 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9286 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9287 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9291 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9292 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9294 __le32 *col_cmd = ca_par_err_data->dbg_col;
9295 __le16 *row_cmd = ca_par_err_data->dbg_row;
9298 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9300 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9301 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9302 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9303 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9304 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9307 /* Returns true if hard reset is needed or false otherwise */
9308 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9309 struct hl_eq_hbm_sei_data *sei_data)
9311 bool require_hard_reset = false;
9312 u32 hbm_id, mc_id, cause_idx;
9314 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9315 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9317 cause_idx = sei_data->hdr.sei_cause;
9318 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9319 gaudi2_print_event(hdev, event_type, true,
9321 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9325 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9326 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9327 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9328 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9329 hbm_mc_sei_cause[cause_idx]);
9331 /* Print error-specific info */
9332 switch (cause_idx) {
9333 case HBM_SEI_CATTRIP:
9334 require_hard_reset = true;
9337 case HBM_SEI_CMD_PARITY_EVEN:
9338 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9339 le32_to_cpu(sei_data->hdr.cnt));
9340 require_hard_reset = true;
9343 case HBM_SEI_CMD_PARITY_ODD:
9344 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9345 le32_to_cpu(sei_data->hdr.cnt));
9346 require_hard_reset = true;
9349 case HBM_SEI_WRITE_DATA_PARITY_ERR:
9350 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9351 le32_to_cpu(sei_data->hdr.cnt));
9352 require_hard_reset = true;
9355 case HBM_SEI_READ_ERR:
9356 /* Unlike other SEI events, read error requires further processing of the
9357 * raw data in order to determine the root cause.
9359 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9360 &sei_data->read_err_info,
9361 le32_to_cpu(sei_data->hdr.cnt));
9368 require_hard_reset |= !!sei_data->hdr.is_critical;
9370 return require_hard_reset;
9373 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9374 u64 intr_cause_data)
9376 if (intr_cause_data) {
9377 gaudi2_print_event(hdev, event_type, true,
9378 "temperature error cause: %#llx", intr_cause_data);
9385 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9387 u32 i, error_count = 0;
9389 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9390 if (intr_cause_data & hbm_mc_spi[i].mask) {
9391 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9392 hbm_mc_spi[i].cause);
9399 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9401 ktime_t zero_time = ktime_set(0, 0);
9403 mutex_lock(&hdev->clk_throttling.lock);
9405 switch (event_type) {
9406 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9407 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9408 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9409 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9410 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9411 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9414 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9415 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9416 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9417 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9420 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9421 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9422 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9423 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9424 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9425 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9426 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9429 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9430 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9431 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9432 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9433 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9437 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9441 mutex_unlock(&hdev->clk_throttling.lock);
9444 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9445 struct cpucp_pkt_sync_err *sync_err)
9447 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9449 gaudi2_print_event(hdev, event_type, false,
9450 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9451 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9452 q->pi, atomic_read(&q->ci));
9455 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9457 u32 p2p_intr, msix_gw_intr, error_count = 0;
9459 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9460 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9463 gaudi2_print_event(hdev, event_type, true,
9464 "pcie p2p transaction terminated due to security, req_id(0x%x)",
9465 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9467 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9472 gaudi2_print_event(hdev, event_type, true,
9473 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9474 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9476 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9483 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9484 struct hl_eq_pcie_drain_ind_data *drain_data)
9486 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9488 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9489 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9490 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9491 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9492 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9494 if (cause & BIT_ULL(0)) {
9495 dev_err_ratelimited(hdev->dev,
9496 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9497 !!lbw_rd, !!lbw_wr);
9501 if (cause & BIT_ULL(1)) {
9502 dev_err_ratelimited(hdev->dev,
9503 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9511 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9513 u32 error_count = 0;
9516 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9517 if (intr_cause_data & BIT_ULL(i)) {
9518 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9519 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9524 hl_check_for_glbl_errors(hdev);
9529 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9530 struct cpucp_pkt_sync_err *sync_err)
9532 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9534 gaudi2_print_event(hdev, event_type, false,
9535 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9536 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9539 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9540 struct hl_eq_engine_arc_intr_data *data)
9542 struct hl_engine_arc_dccm_queue_full_irq *q;
9543 u32 intr_type, engine_id;
9546 intr_type = le32_to_cpu(data->intr_type);
9547 engine_id = le32_to_cpu(data->engine_id);
9548 payload = le64_to_cpu(data->payload);
9550 switch (intr_type) {
9551 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9552 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9554 gaudi2_print_event(hdev, event_type, true,
9555 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9556 engine_id, intr_type, q->queue_index);
9559 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9564 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9566 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9567 bool reset_required = false, is_critical = false;
9568 u32 index, ctl, reset_flags = 0, error_count = 0;
9572 ctl = le32_to_cpu(eq_entry->hdr.ctl);
9573 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9575 if (event_type >= GAUDI2_EVENT_SIZE) {
9576 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9577 event_type, GAUDI2_EVENT_SIZE - 1);
9581 gaudi2->events_stat[event_type]++;
9582 gaudi2->events_stat_aggregate[event_type]++;
9584 switch (event_type) {
9585 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9587 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9588 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9589 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9590 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9591 is_critical = eq_entry->ecc_data.is_critical;
9595 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9597 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9599 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9600 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9601 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9604 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9605 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9606 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9609 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9610 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9611 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9612 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9615 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9616 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9617 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9618 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9621 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9622 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9623 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9624 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9625 &eq_entry->razwi_with_intr_cause, &event_mask);
9626 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9627 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9630 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9631 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9632 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9633 &eq_entry->razwi_with_intr_cause, &event_mask);
9634 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9635 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9638 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9639 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9640 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9641 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9644 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9645 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9646 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9647 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9648 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9649 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9650 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9651 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9652 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9653 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9654 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9655 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9656 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9657 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9658 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9659 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9660 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9661 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9662 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9663 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9664 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9665 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9666 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9667 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9668 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9669 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9670 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9671 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9672 &eq_entry->razwi_with_intr_cause, &event_mask);
9673 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9676 case GAUDI2_EVENT_DEC0_SPI:
9677 case GAUDI2_EVENT_DEC1_SPI:
9678 case GAUDI2_EVENT_DEC2_SPI:
9679 case GAUDI2_EVENT_DEC3_SPI:
9680 case GAUDI2_EVENT_DEC4_SPI:
9681 case GAUDI2_EVENT_DEC5_SPI:
9682 case GAUDI2_EVENT_DEC6_SPI:
9683 case GAUDI2_EVENT_DEC7_SPI:
9684 case GAUDI2_EVENT_DEC8_SPI:
9685 case GAUDI2_EVENT_DEC9_SPI:
9686 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9687 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9688 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9689 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9692 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9693 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9694 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9695 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9696 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9697 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9698 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9699 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9700 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9701 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9704 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9705 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9706 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9707 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9708 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9709 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9710 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9711 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9712 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9715 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9716 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9717 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9718 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9719 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9720 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9721 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9722 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9723 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9726 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9727 case GAUDI2_EVENT_KDMA0_CORE:
9728 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9729 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9730 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9733 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9734 index = event_type - GAUDI2_EVENT_HDMA2_CORE;
9735 error_count = gaudi2_handle_edma_core_event(hdev, event_type, index);
9736 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9739 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9740 index = event_type - GAUDI2_EVENT_PDMA0_CORE;
9741 error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index);
9742 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9745 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9746 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9747 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9748 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9749 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9752 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9753 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9754 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9755 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9756 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9757 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9758 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9761 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9762 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9763 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9764 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9765 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9768 case GAUDI2_EVENT_PMMU_FATAL_0:
9769 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9770 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9771 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9772 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9775 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9776 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9777 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9780 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9781 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9782 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9783 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9784 reset_required = true;
9789 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9790 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9791 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9792 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9795 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9796 error_count = gaudi2_handle_hbm_mc_spi(hdev,
9797 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9798 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9801 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9802 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9803 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9804 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9807 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9808 error_count = gaudi2_handle_psoc_drain(hdev,
9809 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9810 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9811 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9814 case GAUDI2_EVENT_CPU_AXI_ECC:
9815 error_count = GAUDI2_NA_EVENT_CAUSE;
9816 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9817 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9819 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9820 error_count = GAUDI2_NA_EVENT_CAUSE;
9821 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9822 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9824 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9825 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9826 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9827 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9828 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9829 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9830 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9832 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9833 error_count = GAUDI2_NA_EVENT_CAUSE;
9834 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9835 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9837 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9838 error_count = GAUDI2_NA_EVENT_CAUSE;
9839 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9840 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9842 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9843 error_count = GAUDI2_NA_EVENT_CAUSE;
9844 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9846 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9847 error_count = GAUDI2_NA_EVENT_CAUSE;
9848 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9849 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9851 case GAUDI2_EVENT_PCIE_FATAL_ERR:
9852 error_count = GAUDI2_NA_EVENT_CAUSE;
9853 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9854 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9856 case GAUDI2_EVENT_TPC0_BMON_SPMU:
9857 case GAUDI2_EVENT_TPC1_BMON_SPMU:
9858 case GAUDI2_EVENT_TPC2_BMON_SPMU:
9859 case GAUDI2_EVENT_TPC3_BMON_SPMU:
9860 case GAUDI2_EVENT_TPC4_BMON_SPMU:
9861 case GAUDI2_EVENT_TPC5_BMON_SPMU:
9862 case GAUDI2_EVENT_TPC6_BMON_SPMU:
9863 case GAUDI2_EVENT_TPC7_BMON_SPMU:
9864 case GAUDI2_EVENT_TPC8_BMON_SPMU:
9865 case GAUDI2_EVENT_TPC9_BMON_SPMU:
9866 case GAUDI2_EVENT_TPC10_BMON_SPMU:
9867 case GAUDI2_EVENT_TPC11_BMON_SPMU:
9868 case GAUDI2_EVENT_TPC12_BMON_SPMU:
9869 case GAUDI2_EVENT_TPC13_BMON_SPMU:
9870 case GAUDI2_EVENT_TPC14_BMON_SPMU:
9871 case GAUDI2_EVENT_TPC15_BMON_SPMU:
9872 case GAUDI2_EVENT_TPC16_BMON_SPMU:
9873 case GAUDI2_EVENT_TPC17_BMON_SPMU:
9874 case GAUDI2_EVENT_TPC18_BMON_SPMU:
9875 case GAUDI2_EVENT_TPC19_BMON_SPMU:
9876 case GAUDI2_EVENT_TPC20_BMON_SPMU:
9877 case GAUDI2_EVENT_TPC21_BMON_SPMU:
9878 case GAUDI2_EVENT_TPC22_BMON_SPMU:
9879 case GAUDI2_EVENT_TPC23_BMON_SPMU:
9880 case GAUDI2_EVENT_TPC24_BMON_SPMU:
9881 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9882 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9883 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9884 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9885 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9886 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9887 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9888 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9889 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9890 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9891 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9892 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9893 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9895 case GAUDI2_EVENT_DEC0_BMON_SPMU:
9896 case GAUDI2_EVENT_DEC1_BMON_SPMU:
9897 case GAUDI2_EVENT_DEC2_BMON_SPMU:
9898 case GAUDI2_EVENT_DEC3_BMON_SPMU:
9899 case GAUDI2_EVENT_DEC4_BMON_SPMU:
9900 case GAUDI2_EVENT_DEC5_BMON_SPMU:
9901 case GAUDI2_EVENT_DEC6_BMON_SPMU:
9902 case GAUDI2_EVENT_DEC7_BMON_SPMU:
9903 case GAUDI2_EVENT_DEC8_BMON_SPMU:
9904 case GAUDI2_EVENT_DEC9_BMON_SPMU:
9905 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9906 error_count = GAUDI2_NA_EVENT_CAUSE;
9907 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9910 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9911 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9912 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9913 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9914 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9915 error_count = GAUDI2_NA_EVENT_CAUSE;
9918 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9919 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9920 error_count = GAUDI2_NA_EVENT_CAUSE;
9921 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9922 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9925 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9926 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9927 error_count = GAUDI2_NA_EVENT_CAUSE;
9928 /* Do nothing- FW will handle it */
9931 case GAUDI2_EVENT_PCIE_P2P_MSIX:
9932 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9933 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9936 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9937 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9938 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9939 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9942 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9943 error_count = GAUDI2_NA_EVENT_CAUSE;
9944 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9947 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9948 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9949 le64_to_cpu(eq_entry->data[0]));
9950 error_count = GAUDI2_NA_EVENT_CAUSE;
9951 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9953 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9954 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9955 le64_to_cpu(eq_entry->data[0]));
9956 error_count = GAUDI2_NA_EVENT_CAUSE;
9957 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9960 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9961 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9962 error_count = GAUDI2_NA_EVENT_CAUSE;
9963 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9964 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9967 case GAUDI2_EVENT_ARC_DCCM_FULL:
9968 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9969 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9972 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9973 case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9974 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9975 error_count = GAUDI2_NA_EVENT_CAUSE;
9980 if (gaudi2_irq_map_table[event_type].valid) {
9981 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9983 error_count = GAUDI2_NA_EVENT_CAUSE;
9987 /* Make sure to dump an error in case no error cause was printed so far.
9988 * Note that although we have counted the errors, we use this number as
9991 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9992 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9993 else if (error_count == 0)
9994 gaudi2_print_event(hdev, event_type, true,
9995 "No error cause for H/W event %u", event_type);
9997 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9999 if (reset_required ||
10000 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10001 reset_flags |= HL_DRV_RESET_HARD;
10003 if (hdev->hard_reset_on_fw_events ||
10004 (hdev->asic_prop.fw_security_enabled && is_critical))
10008 /* Send unmask irq only for interrupts not classified as MSG */
10009 if (!gaudi2_irq_map_table[event_type].msg)
10010 hl_fw_unmask_irq(hdev, event_type);
10013 hl_notifier_event_send_all(hdev, event_mask);
10018 if (hdev->asic_prop.fw_security_enabled && is_critical) {
10019 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10020 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10022 reset_flags |= HL_DRV_RESET_DELAY;
10024 /* escalate general hw errors to critical/fatal error */
10025 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10026 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10028 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10029 hl_device_cond_reset(hdev, reset_flags, event_mask);
10032 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10033 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
10034 u32 hw_queue_id, u32 size, u64 addr, u32 val)
10039 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10040 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10041 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10042 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10044 lin_dma_pkt->ctl = cpu_to_le32(ctl);
10045 lin_dma_pkt->src_addr = cpu_to_le64(val);
10046 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10047 lin_dma_pkt->tsize = cpu_to_le32(size);
10049 pkt_size = sizeof(struct packet_lin_dma);
10051 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
10053 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
10059 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10061 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10062 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10063 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10064 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10065 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10066 old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
10067 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10068 struct asic_fixed_properties *prop = &hdev->asic_prop;
10069 void *lin_dma_pkts_arr;
10070 dma_addr_t pkt_dma_addr;
10071 int rc = 0, dma_num = 0;
10073 if (prop->edma_enabled_mask == 0) {
10074 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10078 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10079 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10080 comp_addr = CFG_BASE + sob_addr;
10081 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10082 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10083 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10084 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10086 /* Calculate how many lin dma pkts we'll need */
10087 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10088 pkt_size = sizeof(struct packet_lin_dma);
10090 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
10091 &pkt_dma_addr, GFP_KERNEL);
10092 if (!lin_dma_pkts_arr)
10096 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10097 * only the first one to restore later
10098 * also set the sob addr for all edma cores for completion.
10099 * set QM as trusted to allow it to access physical address with MMU bp.
10101 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10102 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10103 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10104 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10105 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10107 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10110 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10111 edma_offset, mmubp);
10112 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10113 lower_32_bits(comp_addr));
10114 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10115 upper_32_bits(comp_addr));
10116 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10118 gaudi2_qman_set_test_mode(hdev,
10119 edma_queues_id[dcore] + 4 * edma_idx, true);
10123 WREG32(sob_addr, 0);
10125 while (cur_addr < end_addr) {
10126 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10127 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10128 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10130 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10133 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10135 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10136 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10137 pkt_dma_addr + dma_num * pkt_size,
10138 edma_queues_id[dcore] + edma_idx * 4,
10139 chunk_size, cur_addr, val);
10144 cur_addr += chunk_size;
10145 if (cur_addr == end_addr)
10151 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10153 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10157 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10158 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10159 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10160 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10162 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10165 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10166 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10167 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10168 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10169 gaudi2_qman_set_test_mode(hdev,
10170 edma_queues_id[dcore] + 4 * edma_idx, false);
10174 WREG32(sob_addr, 0);
10175 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10180 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10183 struct asic_fixed_properties *prop = &hdev->asic_prop;
10184 u64 size = prop->dram_end_address - prop->dram_user_base_address;
10186 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10189 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10190 prop->dram_user_base_address, size);
10194 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10197 struct asic_fixed_properties *prop = &hdev->asic_prop;
10198 u64 val = hdev->memory_scrub_val;
10201 if (!hdev->memory_scrub)
10205 addr = prop->sram_user_base_address;
10206 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10207 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10208 addr, addr + size, val);
10209 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10211 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10216 rc = gaudi2_scrub_device_dram(hdev, val);
10218 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10224 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10226 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10227 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10228 u32 val, size, offset;
10231 offset = hdev->asic_prop.first_available_cq[0] * 4;
10232 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10233 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10234 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10235 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10236 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10237 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10238 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10239 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10241 /* memset dcore0 CQ registers */
10242 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10243 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10244 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10245 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10246 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10247 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10249 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10250 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10251 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10252 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10253 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10254 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10255 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10257 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10258 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10259 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10260 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10261 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10262 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10263 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10265 cq_lbw_l_addr += DCORE_OFFSET;
10266 cq_lbw_h_addr += DCORE_OFFSET;
10267 cq_lbw_data_addr += DCORE_OFFSET;
10268 cq_base_l_addr += DCORE_OFFSET;
10269 cq_base_h_addr += DCORE_OFFSET;
10270 cq_size_addr += DCORE_OFFSET;
10273 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10274 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10275 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10276 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10278 /* memset dcore0 monitors */
10279 gaudi2_memset_device_lbw(hdev, addr, size, val);
10281 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10282 gaudi2_memset_device_lbw(hdev, addr, size, 0);
10284 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10285 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10286 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10288 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10289 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10290 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10291 mon_sts_addr += DCORE_OFFSET;
10292 mon_cfg_addr += DCORE_OFFSET;
10295 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10296 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10298 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10299 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10301 /* memset dcore0 sobs */
10302 gaudi2_memset_device_lbw(hdev, addr, size, val);
10304 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10305 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10307 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10308 gaudi2_memset_device_lbw(hdev, addr, size, val);
10309 addr += DCORE_OFFSET;
10312 /* Flush all WREG to prevent race */
10313 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10316 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10318 u32 reg_base, hw_queue_id;
10320 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10321 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10322 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10325 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10327 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10328 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10331 /* Flush all WREG to prevent race */
10332 RREG32(mmPDMA0_QM_ARB_CFG_0);
10335 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10337 u32 reg_base, hw_queue_id;
10339 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10340 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10341 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10344 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10346 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10347 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10350 /* Flush all WREG to prevent race */
10351 RREG32(mmPDMA0_QM_ARB_CFG_0);
10354 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10359 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10363 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10364 struct dup_block_ctx *cfg_ctx)
10366 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10370 for (i = 0 ; i < cfg_ctx->instances ; i++) {
10371 seq = block_idx * cfg_ctx->instances + i;
10373 /* skip disabled instance */
10374 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10377 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10382 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10387 cfg_ctx->enabled_mask = mask;
10389 for (i = 0 ; i < cfg_ctx->blocks ; i++)
10390 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10393 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10395 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10398 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10400 void *host_mem_virtual_addr;
10401 dma_addr_t host_mem_dma_addr;
10402 u64 reserved_va_base;
10403 u32 pos, size_left, size_to_dma;
10404 struct hl_ctx *ctx;
10407 /* Fetch the ctx */
10408 ctx = hl_get_compute_ctx(hdev);
10410 dev_err(hdev->dev, "No ctx available\n");
10414 /* Allocate buffers for read and for poll */
10415 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10416 GFP_KERNEL | __GFP_ZERO);
10417 if (host_mem_virtual_addr == NULL) {
10418 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10423 /* Reserve VM region on asic side */
10424 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10425 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10426 if (!reserved_va_base) {
10427 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10429 goto free_data_buffer;
10432 /* Create mapping on asic side */
10433 mutex_lock(&hdev->mmu_lock);
10435 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10437 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10441 rc = hl_mmu_invalidate_cache_range(hdev, false,
10442 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10443 ctx->asid, reserved_va_base, SZ_2M);
10445 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10449 mutex_unlock(&hdev->mmu_lock);
10451 /* Enable MMU on KDMA */
10452 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10456 size_to_dma = SZ_2M;
10458 while (size_left > 0) {
10459 if (size_left < SZ_2M)
10460 size_to_dma = size_left;
10462 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10466 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10468 if (size_left <= SZ_2M)
10473 size_left -= SZ_2M;
10476 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10478 mutex_lock(&hdev->mmu_lock);
10480 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10484 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10485 ctx->asid, reserved_va_base, SZ_2M);
10488 mutex_unlock(&hdev->mmu_lock);
10489 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10491 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10498 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10500 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10501 int min_alloc_order, rc;
10503 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10506 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10507 HOST_SPACE_INTERNAL_CB_SZ,
10508 &hdev->internal_cb_pool_dma_addr,
10509 GFP_KERNEL | __GFP_ZERO);
10511 if (!hdev->internal_cb_pool_virt_addr)
10514 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10515 gaudi2_get_wait_cb_size(hdev)));
10517 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10518 if (!hdev->internal_cb_pool) {
10519 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10521 goto free_internal_cb_pool;
10524 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10525 HOST_SPACE_INTERNAL_CB_SZ, -1);
10527 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10529 goto destroy_internal_cb_pool;
10532 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10533 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10535 if (!hdev->internal_cb_va_base) {
10537 goto destroy_internal_cb_pool;
10540 mutex_lock(&hdev->mmu_lock);
10542 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10543 HOST_SPACE_INTERNAL_CB_SZ);
10545 goto unreserve_internal_cb_pool;
10547 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10549 goto unmap_internal_cb_pool;
10551 mutex_unlock(&hdev->mmu_lock);
10555 unmap_internal_cb_pool:
10556 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10557 unreserve_internal_cb_pool:
10558 mutex_unlock(&hdev->mmu_lock);
10559 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10560 destroy_internal_cb_pool:
10561 gen_pool_destroy(hdev->internal_cb_pool);
10562 free_internal_cb_pool:
10563 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10564 hdev->internal_cb_pool_dma_addr);
10569 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10571 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10573 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10576 mutex_lock(&hdev->mmu_lock);
10577 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10578 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10579 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10580 mutex_unlock(&hdev->mmu_lock);
10582 gen_pool_destroy(hdev->internal_cb_pool);
10584 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10585 hdev->internal_cb_pool_dma_addr);
10588 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10590 gaudi2_restore_user_sm_registers(hdev);
10591 gaudi2_restore_user_qm_registers(hdev);
10594 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10596 struct hl_device *hdev = ctx->hdev;
10597 struct asic_fixed_properties *prop = &hdev->asic_prop;
10598 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10601 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10602 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10604 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10605 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10610 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10612 struct hl_device *hdev = ctx->hdev;
10613 struct asic_fixed_properties *prop = &hdev->asic_prop;
10616 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10617 prop->pmmu.page_size, true);
10619 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10620 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10623 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10627 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10631 /* No need to clear user registers if the device has just
10632 * performed reset, we restore only nic qm registers
10634 if (ctx->hdev->reset_upon_device_release)
10635 gaudi2_restore_nic_qm_registers(ctx->hdev);
10637 gaudi2_restore_user_registers(ctx->hdev);
10639 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10643 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10645 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10650 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10652 if (ctx->asid == HL_KERNEL_ASID_ID)
10655 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10657 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10660 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10662 struct hl_device *hdev = cs->ctx->hdev;
10663 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10664 u32 mon_payload, sob_id, mon_id;
10666 if (!cs_needs_completion(cs))
10670 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10671 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10672 * cyclic index. The SOB value is increased when each of the CS jobs is
10673 * completed. When the SOB reaches the number of CS jobs, the monitor
10674 * generates MSI-X interrupt.
10677 sob_id = mon_id = index;
10678 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10679 (1 << CQ_ENTRY_READY_SHIFT) | index;
10681 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10687 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10689 return HL_INVALID_QUEUE;
10692 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10694 struct hl_cb *cb = data;
10695 struct packet_msg_short *pkt;
10696 u32 value, ctl, pkt_size = sizeof(*pkt);
10698 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10699 memset(pkt, 0, pkt_size);
10701 /* Inc by 1, Mode ADD */
10702 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10703 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10705 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10706 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10707 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10708 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10709 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10711 pkt->value = cpu_to_le32(value);
10712 pkt->ctl = cpu_to_le32(ctl);
10714 return size + pkt_size;
10717 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10719 u32 ctl, pkt_size = sizeof(*pkt);
10721 memset(pkt, 0, pkt_size);
10723 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10724 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10725 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10726 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10727 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10729 pkt->value = cpu_to_le32(value);
10730 pkt->ctl = cpu_to_le32(ctl);
10735 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10736 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10738 u32 ctl, value, pkt_size = sizeof(*pkt);
10741 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10742 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10746 memset(pkt, 0, pkt_size);
10748 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10749 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10750 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10751 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10753 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10754 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10755 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10756 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10757 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10759 pkt->value = cpu_to_le32(value);
10760 pkt->ctl = cpu_to_le32(ctl);
10765 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10767 u32 ctl, cfg, pkt_size = sizeof(*pkt);
10769 memset(pkt, 0, pkt_size);
10771 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10772 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10773 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10775 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10776 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10777 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10779 pkt->cfg = cpu_to_le32(cfg);
10780 pkt->ctl = cpu_to_le32(ctl);
10785 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10787 struct hl_cb *cb = prop->data;
10788 void *buf = (void *) (uintptr_t) (cb->kernel_address);
10790 u64 monitor_base, fence_addr = 0;
10791 u32 stream_index, size = prop->size;
10792 u16 msg_addr_offset;
10794 stream_index = prop->q_idx % 4;
10795 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10796 QM_FENCE2_OFFSET + stream_index * 4;
10799 * monitor_base should be the content of the base0 address registers,
10800 * so it will be added to the msg short offsets
10802 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10804 /* First monitor config packet: low address of the sync */
10805 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10808 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10810 /* Second monitor config packet: high address of the sync */
10811 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10814 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10817 * Third monitor config packet: the payload, i.e. what to write when the
10820 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10823 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10825 /* Fourth monitor config packet: bind the monitor to a sync object */
10826 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10828 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10829 prop->sob_val, msg_addr_offset);
10832 size += gaudi2_add_fence_pkt(buf + size);
10837 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10839 struct hl_hw_sob *hw_sob = data;
10841 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10843 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10845 kref_init(&hw_sob->kref);
10848 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10852 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10854 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10856 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10859 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10864 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10865 struct hl_cs *cs, u32 wait_queue_id,
10866 u32 collective_engine_id, u32 encaps_signal_offset)
10872 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10873 * to DMMU page-size address (64MB) before mapping it in
10875 * The operation is performed on both the virtual and physical addresses.
10876 * for device with 6 HBMs the scramble is:
10877 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10880 * =============================================================================
10881 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
10882 * Phys address in MMU last
10884 * =============================================================================
10885 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
10886 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
10887 * =============================================================================
10889 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10891 struct asic_fixed_properties *prop = &hdev->asic_prop;
10892 u32 divisor, mod_va;
10895 /* accept any address in the DRAM address space */
10896 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10897 VA_HBM_SPACE_END)) {
10899 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10900 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10901 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10902 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10903 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10909 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10911 struct asic_fixed_properties *prop = &hdev->asic_prop;
10912 u32 divisor, mod_va;
10915 /* accept any address in the DRAM address space */
10916 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10917 VA_HBM_SPACE_END)) {
10919 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10920 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10921 PAGE_SIZE_64MB, &mod_va);
10923 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10924 (div_va * divisor + mod_va));
10927 return scrambled_addr;
10930 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10932 u32 base = 0, dcore_id, dec_id;
10934 if (core_id >= NUMBER_OF_DEC) {
10935 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10940 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10941 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10943 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10944 dec_id * DCORE_VDEC_OFFSET;
10946 /* PCIe Shared Decoder */
10947 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10953 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10954 u32 *block_size, u32 *block_id)
10956 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10959 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10960 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10963 *block_size = gaudi2->mapped_blocks[i].size;
10968 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10973 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10974 u32 block_id, u32 block_size)
10976 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10981 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10982 dev_err(hdev->dev, "Invalid block id %u", block_id);
10986 /* we allow mapping only an entire block */
10987 if (block_size != gaudi2->mapped_blocks[block_id].size) {
10988 dev_err(hdev->dev, "Invalid block size %u", block_size);
10992 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10994 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10996 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10997 VM_DONTCOPY | VM_NORESERVE);
10999 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11000 block_size, vma->vm_page_prot);
11002 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11007 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11009 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11011 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11012 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11014 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11015 WREG32(irq_handler_offset,
11016 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11019 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11022 case HW_CAP_DCORE0_DMMU0:
11023 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11025 case HW_CAP_DCORE0_DMMU1:
11026 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11028 case HW_CAP_DCORE0_DMMU2:
11029 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11031 case HW_CAP_DCORE0_DMMU3:
11032 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11034 case HW_CAP_DCORE1_DMMU0:
11035 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11037 case HW_CAP_DCORE1_DMMU1:
11038 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11040 case HW_CAP_DCORE1_DMMU2:
11041 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11043 case HW_CAP_DCORE1_DMMU3:
11044 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11046 case HW_CAP_DCORE2_DMMU0:
11047 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11049 case HW_CAP_DCORE2_DMMU1:
11050 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11052 case HW_CAP_DCORE2_DMMU2:
11053 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11055 case HW_CAP_DCORE2_DMMU3:
11056 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11058 case HW_CAP_DCORE3_DMMU0:
11059 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11061 case HW_CAP_DCORE3_DMMU1:
11062 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11064 case HW_CAP_DCORE3_DMMU2:
11065 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11067 case HW_CAP_DCORE3_DMMU3:
11068 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11071 *mmu_base = mmPMMU_HBW_MMU_BASE;
11080 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11082 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11083 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11086 if (!(gaudi2->hw_cap_initialized & mmu_id))
11089 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11092 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11093 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11096 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11098 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11100 /* check all HMMUs */
11101 for (i = 0 ; i < num_of_hmmus ; i++) {
11102 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11104 if (mmu_cap_mask & mmu_id)
11105 gaudi2_ack_mmu_error(hdev, mmu_id);
11109 if (mmu_cap_mask & HW_CAP_PMMU)
11110 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11115 static void gaudi2_get_msi_info(__le32 *table)
11117 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11120 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11123 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11124 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11125 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11126 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11127 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11128 case HL_GAUDI2_MME_PLL: return MME_PLL;
11129 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11130 case HL_GAUDI2_IF_PLL: return IF_PLL;
11131 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11132 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11133 case HL_GAUDI2_VID_PLL: return VID_PLL;
11134 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11135 default: return -EINVAL;
11139 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11141 /* Not implemented */
11145 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11147 /* Not implemented */
11151 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11152 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11154 /* Not implemented */
11159 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11160 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11161 u32 engine_id, char **buf, size_t *size, size_t *offset)
11163 /* Not implemented */
11168 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11169 .monitor_valid = gaudi2_monitor_valid,
11170 .print_single_monitor = gaudi2_print_single_monitor,
11171 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11172 .print_fences_single_engine = gaudi2_print_fences_single_engine,
11175 static void gaudi2_state_dump_init(struct hl_device *hdev)
11177 /* Not implemented */
11178 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11179 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11182 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11187 static u32 *gaudi2_get_stream_master_qid_arr(void)
11192 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11193 struct attribute_group *dev_vrm_attr_grp)
11195 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11196 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11199 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11200 u32 page_size, u32 *real_page_size, bool is_dram_addr)
11202 struct asic_fixed_properties *prop = &hdev->asic_prop;
11204 /* for host pages the page size must be */
11205 if (!is_dram_addr) {
11206 if (page_size % mmu_prop->page_size)
11207 goto page_size_err;
11209 *real_page_size = mmu_prop->page_size;
11213 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11214 goto page_size_err;
11217 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11218 * than DRAM page size).
11219 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11220 * this mismatch when calculating the address to place in the MMU page table.
11221 * (in that case also make sure that the dram_page_size is not greater than the
11224 *real_page_size = prop->dram_page_size;
11229 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11230 page_size, mmu_prop->page_size >> 10);
11234 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11236 return -EOPNOTSUPP;
11239 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11241 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11243 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11246 return hl_fw_send_device_activity(hdev, open);
11249 static const struct hl_asic_funcs gaudi2_funcs = {
11250 .early_init = gaudi2_early_init,
11251 .early_fini = gaudi2_early_fini,
11252 .late_init = gaudi2_late_init,
11253 .late_fini = gaudi2_late_fini,
11254 .sw_init = gaudi2_sw_init,
11255 .sw_fini = gaudi2_sw_fini,
11256 .hw_init = gaudi2_hw_init,
11257 .hw_fini = gaudi2_hw_fini,
11258 .halt_engines = gaudi2_halt_engines,
11259 .suspend = gaudi2_suspend,
11260 .resume = gaudi2_resume,
11261 .mmap = gaudi2_mmap,
11262 .ring_doorbell = gaudi2_ring_doorbell,
11263 .pqe_write = gaudi2_pqe_write,
11264 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11265 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11266 .scrub_device_mem = gaudi2_scrub_device_mem,
11267 .scrub_device_dram = gaudi2_scrub_device_dram,
11268 .get_int_queue_base = NULL,
11269 .test_queues = gaudi2_test_queues,
11270 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11271 .asic_dma_pool_free = gaudi2_dma_pool_free,
11272 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11273 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11274 .asic_dma_unmap_single = gaudi2_dma_unmap_single,
11275 .asic_dma_map_single = gaudi2_dma_map_single,
11276 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11277 .cs_parser = gaudi2_cs_parser,
11278 .asic_dma_map_sgtable = hl_dma_map_sgtable,
11279 .add_end_of_cb_packets = NULL,
11280 .update_eq_ci = gaudi2_update_eq_ci,
11281 .context_switch = gaudi2_context_switch,
11282 .restore_phase_topology = gaudi2_restore_phase_topology,
11283 .debugfs_read_dma = gaudi2_debugfs_read_dma,
11284 .add_device_attr = gaudi2_add_device_attr,
11285 .handle_eqe = gaudi2_handle_eqe,
11286 .get_events_stat = gaudi2_get_events_stat,
11289 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11290 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11291 .mmu_prefetch_cache_range = NULL,
11292 .send_heartbeat = gaudi2_send_heartbeat,
11293 .debug_coresight = gaudi2_debug_coresight,
11294 .is_device_idle = gaudi2_is_device_idle,
11295 .compute_reset_late_init = gaudi2_compute_reset_late_init,
11296 .hw_queues_lock = gaudi2_hw_queues_lock,
11297 .hw_queues_unlock = gaudi2_hw_queues_unlock,
11298 .get_pci_id = gaudi2_get_pci_id,
11299 .get_eeprom_data = gaudi2_get_eeprom_data,
11300 .get_monitor_dump = gaudi2_get_monitor_dump,
11301 .send_cpu_message = gaudi2_send_cpu_message,
11302 .pci_bars_map = gaudi2_pci_bars_map,
11303 .init_iatu = gaudi2_init_iatu,
11306 .halt_coresight = gaudi2_halt_coresight,
11307 .ctx_init = gaudi2_ctx_init,
11308 .ctx_fini = gaudi2_ctx_fini,
11309 .pre_schedule_cs = gaudi2_pre_schedule_cs,
11310 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11311 .load_firmware_to_device = NULL,
11312 .load_boot_fit_to_device = NULL,
11313 .get_signal_cb_size = gaudi2_get_signal_cb_size,
11314 .get_wait_cb_size = gaudi2_get_wait_cb_size,
11315 .gen_signal_cb = gaudi2_gen_signal_cb,
11316 .gen_wait_cb = gaudi2_gen_wait_cb,
11317 .reset_sob = gaudi2_reset_sob,
11318 .reset_sob_group = gaudi2_reset_sob_group,
11319 .get_device_time = gaudi2_get_device_time,
11320 .pb_print_security_errors = gaudi2_pb_print_security_errors,
11321 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11322 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11323 .get_dec_base_addr = gaudi2_get_dec_base_addr,
11324 .scramble_addr = gaudi2_mmu_scramble_addr,
11325 .descramble_addr = gaudi2_mmu_descramble_addr,
11326 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11327 .get_hw_block_id = gaudi2_get_hw_block_id,
11328 .hw_block_mmap = gaudi2_block_mmap,
11329 .enable_events_from_fw = gaudi2_enable_events_from_fw,
11330 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11331 .get_msi_info = gaudi2_get_msi_info,
11332 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11333 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11334 .init_firmware_loader = gaudi2_init_firmware_loader,
11335 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11336 .state_dump_init = gaudi2_state_dump_init,
11337 .get_sob_addr = &gaudi2_get_sob_addr,
11338 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11339 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11340 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11341 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11342 .access_dev_mem = hl_access_dev_mem,
11343 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11344 .set_engine_cores = gaudi2_set_engine_cores,
11345 .set_engines = gaudi2_set_engines,
11346 .send_device_activity = gaudi2_send_device_activity,
11347 .set_dram_properties = gaudi2_set_dram_properties,
11348 .set_binning_masks = gaudi2_set_binning_masks,
11351 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11353 hdev->asic_funcs = &gaudi2_funcs;