1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2020-2022 HabanaLabs, Ltd.
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
24 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
29 #define GAUDI2_RESET_POLL_CNT 3
30 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT 512
34 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
43 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44 * and the code relies on that value (for array size etc..) we define another value
45 * for MAX faulty TPCs which reflects the cluster binning requirements
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
48 #define MAX_FAULTY_XBARS 1
49 #define MAX_FAULTY_EDMAS 1
50 #define MAX_FAULTY_DECODERS 1
52 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK 0x3FF
56 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
57 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25
58 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
59 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
60 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
61 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
62 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
63 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30
64 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
65 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
66 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
67 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
68 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
69 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
70 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
71 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
72 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
73 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
74 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
75 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
78 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
79 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
80 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
82 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
83 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
85 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
87 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \
88 (!((dma_core_idle_ind_mask) & \
89 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
90 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
92 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
94 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
96 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
97 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
98 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
99 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
101 #define PCIE_DEC_EN_MASK 0x300
102 #define DEC_WORK_STATE_IDLE 0
103 #define DEC_WORK_STATE_PEND 3
104 #define IS_DEC_IDLE(dec_swreg15) \
105 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
106 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
108 /* HBM MMU address scrambling parameters */
109 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
110 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
111 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
112 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
113 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
114 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
115 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
116 #define MMU_RANGE_INV_EN_SHIFT 0
117 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
118 #define MMU_RANGE_INV_ASID_SHIFT 2
120 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
121 * a 2 entries FIFO, and hence it is not enabled for it.
123 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
124 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
126 #define GAUDI2_MAX_STRING_LEN 64
128 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
129 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
131 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
133 enum hl_pmmu_fatal_cause {
134 LATENCY_RD_OUT_FIFO_OVERRUN,
135 LATENCY_WR_OUT_FIFO_OVERRUN,
138 enum hl_pcie_drain_ind_cause {
143 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
152 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
159 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
160 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
161 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
162 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
163 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
164 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
165 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
166 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
167 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
170 static const int gaudi2_qman_async_event_id[] = {
171 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
172 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
173 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
174 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
175 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
176 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
177 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
178 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
179 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
180 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
185 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
186 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
187 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
188 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
189 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
190 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
191 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
192 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
193 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
194 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
195 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
196 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
197 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
198 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
199 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
200 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
201 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
202 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
203 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
204 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
205 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
206 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
207 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
208 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
209 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
210 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
211 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
212 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
213 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
214 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
215 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
216 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
217 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
218 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
219 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
220 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
225 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
226 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
227 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
228 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
229 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
230 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
231 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
232 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
233 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
234 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
235 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
236 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
237 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
238 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
239 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
240 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
241 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
242 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
243 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
244 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
245 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
246 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
247 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
248 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
249 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
250 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
251 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
252 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
253 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
254 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
255 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
256 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
261 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
262 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
263 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
264 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
265 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
266 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
267 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
268 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
269 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
270 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
271 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
272 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
273 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
274 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
275 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
276 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
277 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
278 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
279 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
280 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
281 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
282 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
283 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
284 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
285 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
286 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
287 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
288 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
289 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
290 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
291 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
292 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
297 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
298 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
299 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
300 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
301 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
302 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
303 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
304 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
305 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
306 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
307 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
308 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
309 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
310 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
311 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
312 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
313 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
314 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
315 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
316 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
317 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
318 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
319 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
320 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
321 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
322 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
323 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
324 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
325 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
326 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
327 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
328 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
329 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
330 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
331 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
332 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
333 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
334 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
335 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
336 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
337 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
338 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
339 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
340 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
341 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
342 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
343 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
344 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
345 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
346 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
347 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
348 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
349 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
350 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
351 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
352 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
353 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
354 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
355 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
356 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
357 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
358 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
359 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
360 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
361 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
362 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
363 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
364 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
365 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
366 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
367 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
368 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
369 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
370 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
371 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
372 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
373 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
374 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
375 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
376 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
377 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
378 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
379 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
380 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
381 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
382 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
383 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
384 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
385 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
386 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
387 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
388 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
389 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
390 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
391 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
392 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
393 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
394 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
395 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
396 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
397 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
398 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
399 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
400 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
401 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
402 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
403 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
404 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
405 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
406 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
407 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
408 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
409 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
410 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
411 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
412 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
413 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
414 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
415 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
416 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
417 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
418 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
419 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
420 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
421 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
422 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
423 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
424 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
428 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
429 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
433 static const int gaudi2_dma_core_async_event_id[] = {
434 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
435 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
436 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
437 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
438 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
439 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
440 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
441 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
442 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
443 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
444 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
447 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
452 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
455 "AXI SPLIT SEI Status"
458 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
459 "cbu_bresp_sei_intr_cause",
460 "cbu_rresp_sei_intr_cause",
461 "lbu_bresp_sei_intr_cause",
462 "lbu_rresp_sei_intr_cause",
463 "cbu_axi_split_intr_cause",
464 "lbu_axi_split_intr_cause",
465 "arc_ip_excptn_sei_intr_cause",
466 "dmi_bresp_sei_intr_cause",
467 "aux2apb_err_sei_intr_cause",
468 "cfg_lbw_wr_terminated_intr_cause",
469 "cfg_lbw_rd_terminated_intr_cause",
470 "cfg_dccm_wr_terminated_intr_cause",
471 "cfg_dccm_rd_terminated_intr_cause",
472 "cfg_hbw_rd_terminated_intr_cause"
475 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
479 "msix_abnrm_hbw_sei",
483 "msix_abnrm_lbw_sei",
492 "axi_split_bresp_err_sei",
493 "hbw_axi_wr_viol_sei",
494 "hbw_axi_rd_viol_sei",
495 "lbw_axi_wr_viol_sei",
496 "lbw_axi_rd_viol_sei",
503 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
507 "CP error due to undefined OPCODE",
508 "CP encountered STOP OPCODE",
510 "CP WRREG32 or WRBULK returned error",
512 "FENCE 0 inc over max value and clipped",
513 "FENCE 1 inc over max value and clipped",
514 "FENCE 2 inc over max value and clipped",
515 "FENCE 3 inc over max value and clipped",
516 "FENCE 0 dec under min value and clipped",
517 "FENCE 1 dec under min value and clipped",
518 "FENCE 2 dec under min value and clipped",
519 "FENCE 3 dec under min value and clipped",
524 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
528 "CP error due to undefined OPCODE",
529 "CP encountered STOP OPCODE",
531 "CP WRREG32 or WRBULK returned error",
533 "FENCE 0 inc over max value and clipped",
534 "FENCE 1 inc over max value and clipped",
535 "FENCE 2 inc over max value and clipped",
536 "FENCE 3 inc over max value and clipped",
537 "FENCE 0 dec under min value and clipped",
538 "FENCE 1 dec under min value and clipped",
539 "FENCE 2 dec under min value and clipped",
540 "FENCE 3 dec under min value and clipped",
543 "CQ_WR_IFIFO_CI_ERR",
546 "ARC_CQ_WR_IFIFO_CI_ERR",
547 "ARC_CQ_WR_CTL_CI_ERR",
552 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
553 "Choice push while full error",
554 "Choice Q watchdog error",
555 "MSG AXI LBW returned with error"
558 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
560 "qm_trace_fence_events",
563 "lbw_mstr_rresp_err",
564 "lbw_mstr_bresp_err",
568 "hbw_mstr_rresp_err",
569 "hbw_mstr_bresp_err",
580 "async_arc2cpu_sei_intr",
583 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
584 "tpc_address_exceed_slm",
586 "tpc_spu_mac_overflow",
587 "tpc_spu_addsub_overflow",
588 "tpc_spu_abs_overflow",
589 "tpc_spu_fma_fp_dst_nan",
590 "tpc_spu_fma_fp_dst_inf",
591 "tpc_spu_convert_fp_dst_nan",
592 "tpc_spu_convert_fp_dst_inf",
593 "tpc_spu_fp_dst_denorm",
594 "tpc_vpu_mac_overflow",
595 "tpc_vpu_addsub_overflow",
596 "tpc_vpu_abs_overflow",
597 "tpc_vpu_convert_fp_dst_nan",
598 "tpc_vpu_convert_fp_dst_inf",
599 "tpc_vpu_fma_fp_dst_nan",
600 "tpc_vpu_fma_fp_dst_inf",
601 "tpc_vpu_fp_dst_denorm",
603 "tpc_illegal_instruction",
604 "tpc_pc_wrap_around",
610 "st_unlock_already_locked",
611 "invalid_lock_access",
612 "LD_L protection violation",
613 "ST_L protection violation",
616 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
619 "wap sei (wbc axi err)",
635 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
643 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
653 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
654 "HBW Read returned with error RRESP",
655 "HBW write returned with error BRESP",
656 "LBW write returned with error BRESP",
657 "descriptor_fifo_overflow",
658 "KDMA SB LBW Read returned with error",
659 "KDMA WBC LBW Write returned with error",
660 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
661 "WRONG CFG FOR COMMIT IN LIN DMA"
664 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
665 "HBW/LBW Read returned with error RRESP",
666 "HBW/LBW write returned with error BRESP",
667 "LBW write returned with error BRESP",
668 "descriptor_fifo_overflow",
669 "KDMA SB LBW Read returned with error",
670 "KDMA WBC LBW Write returned with error",
671 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
672 "WRONG CFG FOR COMMIT IN LIN DMA"
675 struct gaudi2_sm_sei_cause_data {
676 const char *cause_name;
677 const char *log_name;
681 static const struct gaudi2_sm_sei_cause_data
682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
683 {"calculated SO value overflow/underflow", "SOB group ID", 0x7FF},
684 {"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF},
685 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF},
688 static const char * const
689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
690 "LATENCY_RD_OUT_FIFO_OVERRUN",
691 "LATENCY_WR_OUT_FIFO_OVERRUN",
694 static const char * const
695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
696 "LATENCY_RD_OUT_FIFO_OVERRUN",
697 "LATENCY_WR_OUT_FIFO_OVERRUN",
700 static const char * const
701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
706 static const char * const
707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
708 "HBW error response",
709 "LBW error response",
710 "TLP is blocked by RR"
713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
714 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
715 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
716 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
717 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
718 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
719 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
720 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
721 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
722 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
728 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
729 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
730 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
732 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
733 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
734 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
736 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
737 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
738 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
740 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
741 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
742 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
744 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
745 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
746 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
748 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
749 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
750 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
752 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
753 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
754 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
756 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
757 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
758 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
760 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
761 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
762 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
768 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
769 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
770 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
772 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
773 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
774 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
776 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
777 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
778 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
780 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
781 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
782 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
784 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
785 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
786 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
788 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
789 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
790 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
792 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
793 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
794 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
796 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
797 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
798 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
804 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
805 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
806 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
808 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
809 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
810 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
812 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
813 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
814 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
816 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
817 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
818 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
820 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
821 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
822 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
824 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
825 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
826 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
828 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
829 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
830 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
832 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
833 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
834 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
840 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
841 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
842 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
844 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
845 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
846 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
848 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
849 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
850 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
852 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
853 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
854 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
856 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
857 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
858 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
860 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
861 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
862 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
864 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
865 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
866 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
868 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
869 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
870 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
871 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
872 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
873 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
874 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
875 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
876 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
877 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
878 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
879 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
880 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
881 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
882 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
883 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
884 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
885 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
886 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
887 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
888 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
889 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
890 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
891 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
892 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
893 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
894 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
895 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
896 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
897 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
898 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
899 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
900 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
901 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
902 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
903 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
904 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
905 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
906 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
907 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
908 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
909 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
910 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
911 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
912 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
913 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
914 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
915 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
916 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
917 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
918 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
919 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
920 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
921 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
922 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
923 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
924 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
925 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
926 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
927 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
928 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
929 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
930 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
931 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
932 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
933 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
934 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
935 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
936 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
937 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
938 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
939 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
940 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
941 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
942 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
943 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
944 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
945 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
946 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
947 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
948 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
949 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
950 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
951 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
952 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
953 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
954 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
955 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
956 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
957 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
958 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
959 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
960 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
961 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
962 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
963 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
964 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
965 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
966 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
967 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
968 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
969 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
970 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
971 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
972 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
973 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
977 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
978 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
979 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
980 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
981 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
982 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
983 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
984 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
985 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
986 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
987 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
988 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
989 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
990 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
991 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
992 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
993 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
994 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
995 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
996 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
997 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
998 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
999 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1000 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1001 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1002 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1003 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1004 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1005 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1006 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1007 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1008 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1009 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1010 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1011 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1012 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1013 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1014 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1015 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1016 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1017 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1018 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1019 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1020 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1021 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1022 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1023 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1024 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1025 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1026 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1027 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1028 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1029 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1030 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1031 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1032 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1033 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1034 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1035 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1036 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1037 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1038 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1039 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1040 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1041 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1042 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1043 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1044 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1045 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1049 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1050 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1051 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1052 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1053 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1054 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1055 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1056 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1057 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1058 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1059 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1060 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1061 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1062 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1063 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1064 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1065 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1066 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1067 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1068 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1069 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1070 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1071 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1072 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1073 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1074 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1075 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1076 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1077 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1078 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1079 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1080 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1081 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1082 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1083 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1084 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1085 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1086 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1087 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1088 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1089 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1090 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1091 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1092 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1093 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1094 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1095 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1096 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1097 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1098 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1099 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1100 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1101 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1102 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1103 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1104 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1105 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1106 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1107 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1108 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1109 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1110 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1111 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1112 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1113 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1114 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1115 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1116 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1117 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1121 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1122 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1123 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1124 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1128 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1129 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1130 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1131 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1132 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1133 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1134 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1135 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1136 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1142 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1143 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1144 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1146 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1147 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1148 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1174 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1175 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1176 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1182 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1183 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1184 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1186 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1187 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1188 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1210 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1211 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1212 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1218 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1219 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1220 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1222 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1223 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1224 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1246 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1247 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1248 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1254 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1255 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1256 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1258 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1259 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1260 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1282 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1283 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1284 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1285 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1286 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1287 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1288 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1289 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1290 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1291 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1292 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1293 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1294 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1295 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1296 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1297 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1298 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1299 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1300 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1301 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1302 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1303 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1304 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1305 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1306 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1307 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1308 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1309 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1310 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1311 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1312 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1313 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1314 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1315 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1316 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1317 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1318 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1319 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1320 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1321 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1322 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1323 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1324 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1325 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1326 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1327 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1328 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1329 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1330 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1331 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1332 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1333 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1334 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1335 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1336 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1337 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1338 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1339 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1340 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1341 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1342 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1343 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1344 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1345 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1346 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1347 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1348 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1349 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1350 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1351 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1352 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1353 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1354 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1355 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1356 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1357 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1358 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1359 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1360 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1361 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1362 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1363 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1364 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1365 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1366 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1367 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1368 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1369 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1370 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1371 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1372 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1373 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1374 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1375 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1376 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1377 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1378 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1379 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1380 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1381 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1382 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1383 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1384 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1385 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1386 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1387 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1391 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1392 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1393 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1394 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1395 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1396 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1397 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1398 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1399 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1400 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1401 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1405 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1406 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1407 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1408 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1412 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1413 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1414 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1415 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1416 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1417 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1418 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1419 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1420 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1421 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1422 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1423 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1424 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1425 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1426 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1427 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1428 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1429 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1430 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1431 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1432 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1433 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1434 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1435 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1436 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1440 [ROTATOR_ID_0] = mmROT0_BASE,
1441 [ROTATOR_ID_1] = mmROT1_BASE
1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1445 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1446 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1447 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1448 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1449 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1450 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1451 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1452 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1453 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1454 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1455 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1456 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1457 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1458 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1459 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1460 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1461 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1462 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1463 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1464 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1465 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1466 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1467 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1468 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1469 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1473 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1474 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1478 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1479 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1480 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1481 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1482 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1483 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1484 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1485 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1489 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1490 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1491 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1492 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1493 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1494 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1495 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1496 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1497 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1498 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1526 RTR_ID_X_Y(0, 0),/* 24 no id */
1527 RTR_ID_X_Y(0, 0),/* 25 no id */
1528 RTR_ID_X_Y(0, 0),/* 26 no id */
1529 RTR_ID_X_Y(0, 0),/* 27 no id */
1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1572 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1573 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1574 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1575 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1580 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1581 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1585 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1586 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1595 {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3},
1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1599 DCORE0_RTR0, DCORE0_RTR0
1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1603 DCORE2_RTR0, DCORE3_RTR7
1606 struct mme_initiators_rtr_id {
1618 enum mme_initiators {
1631 static const struct mme_initiators_rtr_id
1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1633 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1634 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1635 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1636 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1637 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1638 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1639 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1640 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1643 enum razwi_event_sources {
1653 struct hbm_mc_error_causes {
1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1659 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1660 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1661 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1662 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1663 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1667 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1668 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1669 [HBM_SEI_READ_ERR] = "SEI read data error",
1670 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1671 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1672 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1673 [HBM_SEI_DFI] = "SEI DFI error",
1674 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1675 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1678 struct mmu_spi_sei_cause {
1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1684 {"page fault", 1}, /* INTERRUPT_CLR[1] */
1685 {"page access", 1}, /* INTERRUPT_CLR[1] */
1686 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
1687 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
1688 {"mmu rei0", -1}, /* no clear register bit */
1689 {"mmu rei1", -1}, /* no clear register bit */
1690 {"stlb rei0", -1}, /* no clear register bit */
1691 {"stlb rei1", -1}, /* no clear register bit */
1692 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
1693 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
1694 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
1695 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
1696 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1697 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1698 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1699 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1700 {"slave error", 16}, /* INTERRUPT_CLR[16] */
1701 {"dec error", 17}, /* INTERRUPT_CLR[17] */
1702 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
1705 struct gaudi2_cache_invld_params {
1710 bool range_invalidation;
1713 struct gaudi2_tpc_idle_data {
1714 struct engines_data *e;
1715 unsigned long *mask;
1717 const char *tpc_fmt;
1720 struct gaudi2_tpc_mmu_data {
1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1743 return sizeof(struct packet_msg_short);
1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1748 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1753 struct asic_fixed_properties *prop = &hdev->asic_prop;
1754 int dcore, inst, tpc_seq;
1757 /* init the return code */
1760 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1761 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1762 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1764 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1767 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1769 ctx->fn(hdev, dcore, inst, offset, ctx);
1771 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1778 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1781 /* special check for PCI TPC (DCORE0_TPC6) */
1782 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1783 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1785 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1788 static bool gaudi2_host_phys_addr_valid(u64 addr)
1790 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1796 static int set_number_of_functional_hbms(struct hl_device *hdev)
1798 struct asic_fixed_properties *prop = &hdev->asic_prop;
1799 u8 faulty_hbms = hweight64(hdev->dram_binning);
1801 /* check if all HBMs should be used */
1803 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1804 prop->num_functional_hbms = GAUDI2_HBM_NUM;
1809 * check for error condition in which number of binning
1810 * candidates is higher than the maximum supported by the
1811 * driver (in which case binning mask shall be ignored and driver will
1814 if (faulty_hbms > MAX_FAULTY_HBMS) {
1816 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1817 MAX_FAULTY_HBMS, hdev->dram_binning);
1822 * by default, number of functional HBMs in Gaudi2 is always
1823 * GAUDI2_HBM_NUM - 1.
1825 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1829 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1831 struct asic_fixed_properties *prop = &hdev->asic_prop;
1832 u32 basic_hbm_page_size;
1835 rc = set_number_of_functional_hbms(hdev);
1840 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1841 * in which we are using x16 bigger page size to be able to populate the entire
1842 * HBM mappings in the TLB
1844 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1845 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1846 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1847 prop->dram_size = prop->num_functional_hbms * SZ_16G;
1848 prop->dram_base_address = DRAM_PHYS_BASE;
1849 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1850 prop->dram_supports_virtual_memory = true;
1852 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1853 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1854 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1855 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1857 /* since DRAM page size differs from DMMU page size we need to allocate
1858 * DRAM memory in units of dram_page size and mapping this memory in
1859 * units of DMMU page size. we overcome this size mismatch using a
1860 * scrambling routine which takes a DRAM page and converts it to a DMMU
1863 * 1. partition the virtual address space to DRAM-page (whole) pages.
1864 * (suppose we get n such pages)
1865 * 2. limit the amount of virtual address space we got from 1 above to
1866 * a multiple of 64M as we don't want the scrambled address to cross
1867 * the DRAM virtual address space.
1868 * ( m = (n * DRAM_page_size) / DMMU_page_size).
1869 * 3. determine the and address accordingly
1870 * end_addr = start_addr + m * 48M
1872 * the DRAM address MSBs (63:48) are not part of the roundup calculation
1874 prop->dmmu.start_addr = prop->dram_base_address +
1875 (prop->dram_page_size *
1876 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1878 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1879 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1886 struct asic_fixed_properties *prop = &hdev->asic_prop;
1887 struct hw_queue_properties *q_props;
1888 u32 num_sync_stream_queues = 0;
1891 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1892 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1895 if (!prop->hw_queues_props)
1898 q_props = prop->hw_queues_props;
1900 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1901 q_props[i].type = QUEUE_TYPE_HW;
1902 q_props[i].driver_only = 0;
1904 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1905 q_props[i].supports_sync_stream = 0;
1907 q_props[i].supports_sync_stream = 1;
1908 num_sync_stream_queues++;
1911 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1914 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1915 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1916 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1918 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1919 prop->cfg_base_address = CFG_BASE;
1920 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1921 prop->host_base_address = HOST_PHYS_BASE_0;
1922 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1923 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1924 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1925 prop->user_dec_intr_count = NUMBER_OF_DEC;
1926 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1927 prop->completion_mode = HL_COMPLETION_MODE_CS;
1928 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1929 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1931 prop->sram_base_address = SRAM_BASE_ADDR;
1932 prop->sram_size = SRAM_SIZE;
1933 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1934 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1936 prop->hints_range_reservation = true;
1939 prop->mmu_pgt_size = 0x800000; /* 8MB */
1941 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1943 prop->mmu_pte_size = HL_PTE_SIZE;
1944 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1945 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1947 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1948 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1949 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1950 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1951 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1952 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1953 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1954 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1955 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1956 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1957 prop->dmmu.page_size = PAGE_SIZE_1GB;
1958 prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1959 prop->dmmu.last_mask = LAST_MASK;
1960 prop->dmmu.host_resident = 1;
1961 /* TODO: will be duplicated until implementing per-MMU props */
1962 prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1963 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1966 * this is done in order to be able to validate FW descriptor (i.e. validating that
1967 * the addresses and allocated space for FW image does not cross memory bounds).
1968 * for this reason we set the DRAM size to the minimum possible and later it will
1969 * be modified according to what reported in the cpucp info packet
1971 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1973 hdev->pmmu_huge_range = true;
1974 prop->pmmu.host_resident = 1;
1975 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1976 prop->pmmu.last_mask = LAST_MASK;
1977 /* TODO: will be duplicated until implementing per-MMU props */
1978 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1979 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1981 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1982 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1983 prop->hints_host_hpage_reserved_va_range.start_addr =
1984 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1985 prop->hints_host_hpage_reserved_va_range.end_addr =
1986 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1988 if (PAGE_SIZE == SZ_64K) {
1989 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1990 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1991 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1992 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1993 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1994 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1995 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1996 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1997 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1998 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1999 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2000 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2001 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2002 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2003 prop->pmmu.page_size = PAGE_SIZE_64KB;
2005 /* shifts and masks are the same in PMMU and HPMMU */
2006 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2007 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2008 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2009 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2011 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2012 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2013 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2014 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2015 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2016 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2017 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2018 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2019 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2020 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2021 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2022 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2023 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2024 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2025 prop->pmmu.page_size = PAGE_SIZE_4KB;
2027 /* shifts and masks are the same in PMMU and HPMMU */
2028 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2029 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2030 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2031 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2034 prop->num_engine_cores = CPU_ID_MAX;
2035 prop->cfg_size = CFG_SIZE;
2036 prop->max_asid = MAX_ASID;
2037 prop->num_of_events = GAUDI2_EVENT_SIZE;
2039 prop->dc_power_default = DC_POWER_DEFAULT;
2041 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2042 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2043 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2044 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2046 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2048 prop->mme_master_slave_mode = 1;
2050 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2051 (num_sync_stream_queues * HL_RSVD_SOBS);
2053 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2054 (num_sync_stream_queues * HL_RSVD_MONS);
2056 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2058 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2060 prop->fw_cpu_boot_dev_sts0_valid = false;
2061 prop->fw_cpu_boot_dev_sts1_valid = false;
2062 prop->hard_reset_done_by_fw = false;
2063 prop->gic_interrupts_enable = true;
2065 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2067 prop->max_dec = NUMBER_OF_DEC;
2069 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2071 prop->dma_mask = 64;
2076 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2078 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2079 bool is_wc[3] = {false, false, true};
2082 rc = hl_pci_bars_map(hdev, name, is_wc);
2086 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2091 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2093 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2094 struct hl_inbound_pci_region pci_region;
2095 u64 old_addr = addr;
2098 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2101 if (hdev->asic_prop.iatu_done_by_fw)
2104 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2105 pci_region.mode = PCI_BAR_MATCH_MODE;
2106 pci_region.bar = DRAM_BAR_ID;
2107 pci_region.addr = addr;
2108 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2113 old_addr = gaudi2->dram_bar_cur_addr;
2114 gaudi2->dram_bar_cur_addr = addr;
2120 static int gaudi2_init_iatu(struct hl_device *hdev)
2122 struct hl_inbound_pci_region inbound_region;
2123 struct hl_outbound_pci_region outbound_region;
2124 u32 bar_addr_low, bar_addr_high;
2127 if (hdev->asic_prop.iatu_done_by_fw)
2130 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2131 * We must map this region in BAR match mode in order to
2132 * fetch BAR physical base address
2134 inbound_region.mode = PCI_BAR_MATCH_MODE;
2135 inbound_region.bar = SRAM_CFG_BAR_ID;
2136 /* Base address must be aligned to Bar size which is 256 MB */
2137 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2138 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2142 /* Fetch physical BAR address */
2143 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2144 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2146 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2148 /* Inbound Region 0 - Bar 0 - Point to CFG */
2149 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2150 inbound_region.bar = SRAM_CFG_BAR_ID;
2151 inbound_region.offset_in_bar = 0;
2152 inbound_region.addr = STM_FLASH_BASE_ADDR;
2153 inbound_region.size = CFG_REGION_SIZE;
2154 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2158 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2159 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2160 inbound_region.bar = SRAM_CFG_BAR_ID;
2161 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2162 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2163 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2164 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2168 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2169 inbound_region.mode = PCI_BAR_MATCH_MODE;
2170 inbound_region.bar = DRAM_BAR_ID;
2171 inbound_region.addr = DRAM_PHYS_BASE;
2172 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2176 /* Outbound Region 0 - Point to Host */
2177 outbound_region.addr = HOST_PHYS_BASE_0;
2178 outbound_region.size = HOST_PHYS_SIZE_0;
2179 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2184 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2186 return RREG32(mmHW_STATE);
2189 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2191 struct asic_fixed_properties *prop = &hdev->asic_prop;
2194 * check for error condition in which number of binning candidates
2195 * is higher than the maximum supported by the driver
2197 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2198 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2199 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2204 prop->tpc_binning_mask = hdev->tpc_binning;
2205 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2210 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2212 struct asic_fixed_properties *prop = &hdev->asic_prop;
2213 struct hw_queue_properties *q_props = prop->hw_queues_props;
2214 u64 tpc_binning_mask;
2218 rc = gaudi2_tpc_binning_init_prop(hdev);
2222 tpc_binning_mask = prop->tpc_binning_mask;
2224 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2225 u8 subst_seq, binned, qid_base;
2227 if (tpc_binning_mask == 0)
2230 if (subst_idx == 0) {
2231 subst_seq = TPC_ID_DCORE0_TPC6;
2232 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2234 subst_seq = TPC_ID_DCORE3_TPC5;
2235 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2239 /* clear bit from mask */
2240 binned = __ffs(tpc_binning_mask);
2242 * Coverity complains about possible out-of-bound access in
2245 if (binned >= TPC_ID_SIZE) {
2247 "Invalid binned TPC (binning mask: %llx)\n",
2251 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2253 /* also clear replacing TPC bit from enabled mask */
2254 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2256 /* bin substite TPC's Qs */
2257 q_props[qid_base].binned = 1;
2258 q_props[qid_base + 1].binned = 1;
2259 q_props[qid_base + 2].binned = 1;
2260 q_props[qid_base + 3].binned = 1;
2268 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2270 struct asic_fixed_properties *prop = &hdev->asic_prop;
2273 num_faulty = hweight32(hdev->decoder_binning);
2276 * check for error condition in which number of binning candidates
2277 * is higher than the maximum supported by the driver
2279 if (num_faulty > MAX_FAULTY_DECODERS) {
2280 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2281 hdev->decoder_binning);
2285 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2287 if (prop->decoder_binning_mask)
2288 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2290 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2295 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2297 struct asic_fixed_properties *prop = &hdev->asic_prop;
2299 /* check if we should override default binning */
2300 if (!hdev->dram_binning) {
2301 prop->dram_binning_mask = 0;
2302 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2306 /* set DRAM binning constraints */
2307 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2308 prop->dram_binning_mask = hdev->dram_binning;
2309 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2312 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2314 struct asic_fixed_properties *prop = &hdev->asic_prop;
2315 struct hw_queue_properties *q_props;
2318 num_faulty = hweight32(hdev->edma_binning);
2321 * check for error condition in which number of binning candidates
2322 * is higher than the maximum supported by the driver
2324 if (num_faulty > MAX_FAULTY_EDMAS) {
2326 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2327 hdev->edma_binning);
2331 if (!hdev->edma_binning) {
2332 prop->edma_binning_mask = 0;
2333 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2337 seq = __ffs((unsigned long)hdev->edma_binning);
2339 /* set binning constraints */
2340 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2341 prop->edma_binning_mask = hdev->edma_binning;
2342 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2344 /* bin substitute EDMA's queue */
2345 q_props = prop->hw_queues_props;
2346 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2347 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2348 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2349 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2354 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2356 struct asic_fixed_properties *prop = &hdev->asic_prop;
2359 /* check if we should override default binning */
2360 if (!xbar_edge_iso_mask) {
2361 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2366 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2367 * only the FW can set a redundancy value). for user it'll always be 0.
2369 num_faulty = hweight32(xbar_edge_iso_mask);
2372 * check for error condition in which number of binning candidates
2373 * is higher than the maximum supported by the driver
2375 if (num_faulty > MAX_FAULTY_XBARS) {
2376 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2381 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2383 /* set binning constraints */
2384 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2385 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2390 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2395 * mark all clusters as good, each component will "fail" cluster
2396 * based on eFuse/user values.
2397 * If more than single cluster is faulty- the chip is unusable
2399 hdev->asic_prop.faulty_dram_cluster_map = 0;
2401 gaudi2_set_dram_binning_masks(hdev);
2403 rc = gaudi2_set_edma_binning_masks(hdev);
2407 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2412 /* always initially set to full mask */
2413 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2418 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2420 struct asic_fixed_properties *prop = &hdev->asic_prop;
2423 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2427 /* if we have DRAM binning reported by FW we should perform cluster config */
2428 if (prop->faulty_dram_cluster_map) {
2429 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2431 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2437 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2439 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2440 struct asic_fixed_properties *prop = &hdev->asic_prop;
2445 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2448 /* No point of asking this information again when not doing hard reset, as the device
2449 * CPU hasn't been reset
2451 if (hdev->reset_info.in_compute_reset)
2454 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2459 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2461 /* we can have wither 5 or 6 HBMs. other values are invalid */
2463 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2464 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2466 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2467 dram_size, prop->dram_size);
2468 dram_size = prop->dram_size;
2471 prop->dram_size = dram_size;
2472 prop->dram_end_address = prop->dram_base_address + dram_size;
2475 if (!strlen(prop->cpucp_info.card_name))
2476 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2478 /* Overwrite binning masks with the actual binning values from F/W */
2479 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2480 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2481 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2482 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2485 * at this point the DRAM parameters need to be updated according to data obtained
2488 rc = gaudi2_set_dram_properties(hdev);
2492 rc = gaudi2_set_cluster_binning_masks(hdev);
2496 rc = gaudi2_set_tpc_binning_masks(hdev);
2500 rc = gaudi2_set_dec_binning_masks(hdev);
2504 max_power = hl_fw_get_max_power(hdev);
2508 prop->max_power_default = (u64) max_power;
2513 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2515 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2516 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2519 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2522 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2526 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2531 static int gaudi2_early_init(struct hl_device *hdev)
2533 struct asic_fixed_properties *prop = &hdev->asic_prop;
2534 struct pci_dev *pdev = hdev->pdev;
2535 resource_size_t pci_bar_size;
2538 rc = gaudi2_set_fixed_properties(hdev);
2542 /* Check BAR sizes */
2543 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2545 if (pci_bar_size != CFG_BAR_SIZE) {
2546 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2547 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2549 goto free_queue_props;
2552 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2553 if (pci_bar_size != MSIX_BAR_SIZE) {
2554 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2555 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2557 goto free_queue_props;
2560 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2561 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2564 * Only in pldm driver config iATU
2567 hdev->asic_prop.iatu_done_by_fw = false;
2569 hdev->asic_prop.iatu_done_by_fw = true;
2571 rc = hl_pci_init(hdev);
2573 goto free_queue_props;
2575 /* Before continuing in the initialization, we need to read the preboot
2576 * version to determine whether we run with a security-enabled firmware
2578 rc = hl_fw_read_preboot_status(hdev);
2580 if (hdev->reset_on_preboot_fail)
2581 hdev->asic_funcs->hw_fini(hdev, true, false);
2585 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2586 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2587 hdev->asic_funcs->hw_fini(hdev, true, false);
2595 kfree(hdev->asic_prop.hw_queues_props);
2599 static int gaudi2_early_fini(struct hl_device *hdev)
2601 kfree(hdev->asic_prop.hw_queues_props);
2607 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2610 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2617 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2620 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2627 static void gaudi2_init_arcs(struct hl_device *hdev)
2629 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2633 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2634 if (gaudi2_is_arc_enabled(hdev, i))
2637 gaudi2_set_arc_id_cap(hdev, i);
2640 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2641 if (!gaudi2_is_queue_enabled(hdev, i))
2644 arc_id = gaudi2_queue_id_to_arc_id[i];
2645 if (gaudi2_is_arc_enabled(hdev, arc_id))
2648 if (gaudi2_is_arc_nic_owned(arc_id) &&
2649 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2652 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2653 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2656 gaudi2_set_arc_id_cap(hdev, arc_id);
2660 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2662 u32 reg_base, reg_val;
2666 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2667 /* Each ARC scheduler has 2 consecutive DCCM blocks */
2668 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2669 ARC_DCCM_BLOCK_SIZE * 2, true);
2673 case CPU_ID_SCHED_ARC4:
2674 case CPU_ID_SCHED_ARC5:
2675 case CPU_ID_MME_QMAN_ARC0:
2676 case CPU_ID_MME_QMAN_ARC1:
2677 reg_base = gaudi2_arc_blocks_bases[cpu_id];
2679 /* Scrub lower DCCM block */
2680 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2681 ARC_DCCM_BLOCK_SIZE, true);
2685 /* Switch to upper DCCM block */
2686 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2687 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2689 /* Scrub upper DCCM block */
2690 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2691 ARC_DCCM_BLOCK_SIZE, true);
2695 /* Switch to lower DCCM block */
2696 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2697 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2700 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2701 ARC_DCCM_BLOCK_SIZE, true);
2709 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2713 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2714 if (!gaudi2_is_arc_enabled(hdev, arc_id))
2717 gaudi2_scrub_arc_dccm(hdev, arc_id);
2721 static int gaudi2_late_init(struct hl_device *hdev)
2723 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2726 hdev->asic_prop.supports_advanced_cpucp_rc = true;
2728 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2729 gaudi2->virt_msix_db_dma_addr);
2731 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2735 rc = gaudi2_fetch_psoc_frequency(hdev);
2737 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2738 goto disable_pci_access;
2741 gaudi2_init_arcs(hdev);
2742 gaudi2_scrub_arcs_dccm(hdev);
2743 gaudi2_init_security(hdev);
2748 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2753 static void gaudi2_late_fini(struct hl_device *hdev)
2755 hl_hwmon_release_resources(hdev);
2758 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2760 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2762 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2763 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2764 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2765 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2766 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2767 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2768 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2769 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2770 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2771 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2774 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2776 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2777 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2778 u32 block_size, umr_start_idx, num_umr_blocks;
2781 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2782 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2783 block_size = ARC_DCCM_BLOCK_SIZE * 2;
2785 block_size = ARC_DCCM_BLOCK_SIZE;
2787 blocks[i].address = gaudi2_arc_dccm_bases[i];
2788 blocks[i].size = block_size;
2791 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2792 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2794 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2795 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2797 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2798 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2800 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2801 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2803 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2804 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2806 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2807 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2809 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2810 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2812 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2813 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2815 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2816 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2817 for (i = 0 ; i < num_umr_blocks ; i++) {
2818 u8 nic_id, umr_block_id;
2820 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2821 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2823 blocks[umr_start_idx + i].address =
2824 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2825 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2826 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2827 umr_block_id * NIC_UMR_OFFSET;
2828 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2831 /* Expose decoder HW configuration block to user */
2832 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2834 for (i = 1; i < NUM_OF_DCORES; ++i) {
2835 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2836 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2838 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2839 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2841 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2842 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2846 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2848 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2849 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2852 /* The device ARC works with 32-bits addresses, and because there is a single HW register
2853 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2857 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2858 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2859 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2860 if (!virt_addr_arr[i]) {
2862 goto free_dma_mem_arr;
2865 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2866 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2870 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2872 "MSB of ARC accessible DMA memory are not identical in all range\n");
2874 goto free_dma_mem_arr;
2877 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2878 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2881 for (j = 0 ; j < i ; j++)
2882 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2888 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2890 struct asic_fixed_properties *prop = &hdev->asic_prop;
2891 struct pci_mem_region *region;
2894 region = &hdev->pci_mem_region[PCI_REGION_CFG];
2895 region->region_base = CFG_BASE;
2896 region->region_size = CFG_SIZE;
2897 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2898 region->bar_size = CFG_BAR_SIZE;
2899 region->bar_id = SRAM_CFG_BAR_ID;
2903 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2904 region->region_base = SRAM_BASE_ADDR;
2905 region->region_size = SRAM_SIZE;
2906 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2907 region->bar_size = CFG_BAR_SIZE;
2908 region->bar_id = SRAM_CFG_BAR_ID;
2912 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2913 region->region_base = DRAM_PHYS_BASE;
2914 region->region_size = hdev->asic_prop.dram_size;
2915 region->offset_in_bar = 0;
2916 region->bar_size = prop->dram_pci_bar_size;
2917 region->bar_id = DRAM_BAR_ID;
2921 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2923 struct asic_fixed_properties *prop = &hdev->asic_prop;
2926 /* Initialize common user CQ interrupt */
2927 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2928 HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2930 /* Initialize common decoder interrupt */
2931 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2932 HL_COMMON_DEC_INTERRUPT_ID, true);
2934 /* User interrupts structure holds both decoder and user interrupts from various engines.
2935 * We first initialize the decoder interrupts and then we add the user interrupts.
2936 * The only limitation is that the last decoder interrupt id must be smaller
2937 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2940 /* Initialize decoder interrupts, expose only normal interrupts,
2941 * error interrupts to be handled by driver
2943 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2945 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2947 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2948 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2951 static inline int gaudi2_get_non_zero_random_int(void)
2953 int rand = get_random_u32();
2955 return rand ? rand : 1;
2958 static int gaudi2_sw_init(struct hl_device *hdev)
2960 struct asic_fixed_properties *prop = &hdev->asic_prop;
2961 struct gaudi2_device *gaudi2;
2964 /* Allocate device structure */
2965 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2969 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2970 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2973 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2974 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2977 goto free_gaudi2_device;
2980 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2983 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2984 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2986 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
2988 hdev->asic_specific = gaudi2;
2990 /* Create DMA pool for small allocations.
2991 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
2992 * PI/CI registers allocated from this pool have this restriction
2994 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
2995 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
2996 if (!hdev->dma_pool) {
2997 dev_err(hdev->dev, "failed to create DMA pool\n");
2999 goto free_gaudi2_device;
3002 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3006 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3007 if (!hdev->cpu_accessible_dma_pool) {
3008 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3010 goto free_cpu_dma_mem;
3013 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3014 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3016 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3018 goto free_cpu_accessible_dma_pool;
3021 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3022 &gaudi2->virt_msix_db_dma_addr);
3023 if (!gaudi2->virt_msix_db_cpu_addr) {
3024 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3026 goto free_cpu_accessible_dma_pool;
3029 spin_lock_init(&gaudi2->hw_queues_lock);
3031 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3032 &gaudi2->scratchpad_bus_address,
3033 GFP_KERNEL | __GFP_ZERO);
3034 if (!gaudi2->scratchpad_kernel_address) {
3036 goto free_virt_msix_db_mem;
3039 gaudi2_user_mapped_blocks_init(hdev);
3041 /* Initialize user interrupts */
3042 gaudi2_user_interrupt_setup(hdev);
3044 hdev->supports_coresight = true;
3045 hdev->supports_sync_stream = true;
3046 hdev->supports_cb_mapping = true;
3047 hdev->supports_wait_for_multi_cs = false;
3049 prop->supports_compute_reset = true;
3051 hdev->asic_funcs->set_pci_memory_regions(hdev);
3055 free_virt_msix_db_mem:
3056 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3057 free_cpu_accessible_dma_pool:
3058 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3060 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3061 hdev->cpu_accessible_dma_address);
3063 dma_pool_destroy(hdev->dma_pool);
3069 static int gaudi2_sw_fini(struct hl_device *hdev)
3071 struct asic_fixed_properties *prop = &hdev->asic_prop;
3072 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3074 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3076 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3078 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3079 hdev->cpu_accessible_dma_address);
3081 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3082 gaudi2->scratchpad_bus_address);
3084 dma_pool_destroy(hdev->dma_pool);
3091 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3093 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3094 QM_GLBL_CFG1_CQF_STOP |
3095 QM_GLBL_CFG1_CP_STOP);
3097 /* stop also the ARC */
3098 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3101 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3103 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3104 QM_GLBL_CFG1_CQF_FLUSH |
3105 QM_GLBL_CFG1_CP_FLUSH);
3108 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3110 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3114 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3116 * @hdev: pointer to the habanalabs device structure
3117 * @queue_id: queue to clear fence counters to
3118 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3119 * getting stuck on any fence value. otherwise set all fence
3120 * counters to 0 (standard clear of fence counters)
3122 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3128 reg_base = gaudi2_qm_blocks_bases[queue_id];
3130 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3131 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3134 * in case we want to make sure that QM that is stuck on a fence will
3135 * be released we should set the fence counter to a higher value that
3136 * the value the QM waiting for. to comply with any fence counter of
3137 * any value we set maximum fence value to all counters
3139 val = skip_fence ? U32_MAX : 0;
3140 gaudi2_memset_device_lbw(hdev, addr, size, val);
3143 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3145 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3147 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3148 gaudi2_flush_qman_common(hdev, reg_base);
3149 gaudi2_flush_qman_arc_common(hdev, reg_base);
3152 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3154 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3157 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3158 goto stop_edma_qmans;
3160 /* Stop CPs of PDMA QMANs */
3161 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3162 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3165 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3168 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3169 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3170 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3173 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3176 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3177 inst * DCORE_EDMA_OFFSET;
3179 /* Stop CPs of EDMA QMANs */
3180 gaudi2_stop_qman_common(hdev, qm_base);
3185 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3187 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3190 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3192 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3193 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3196 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3200 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3202 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3206 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3209 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3210 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3213 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3214 gaudi2_stop_qman_common(hdev, reg_base);
3218 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3220 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3224 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3227 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3228 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3231 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3232 gaudi2_stop_qman_common(hdev, reg_base);
3236 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3238 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3239 u32 reg_base, queue_id;
3242 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3245 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3247 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3248 if (!(hdev->nic_ports_mask & BIT(i)))
3251 reg_base = gaudi2_qm_blocks_bases[queue_id];
3252 gaudi2_stop_qman_common(hdev, reg_base);
3256 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3260 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3261 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3264 static void gaudi2_dma_stall(struct hl_device *hdev)
3266 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3269 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3272 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3273 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3276 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3279 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3280 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3281 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3284 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3287 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3288 inst * DCORE_EDMA_OFFSET;
3290 /* Stall CPs of EDMA QMANs */
3291 gaudi2_stall_dma_common(hdev, core_base);
3296 static void gaudi2_mme_stall(struct hl_device *hdev)
3298 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3301 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3303 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3304 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3305 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3308 static void gaudi2_tpc_stall(struct hl_device *hdev)
3310 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3314 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3317 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3318 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3321 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3322 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3326 static void gaudi2_rotator_stall(struct hl_device *hdev)
3328 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3332 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3335 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3336 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3337 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3339 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3340 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3343 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3347 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3349 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3352 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3354 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3357 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3358 goto stop_edma_qmans;
3360 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3361 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3364 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3367 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3368 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3369 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3372 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3375 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3376 inst * DCORE_EDMA_OFFSET;
3378 /* Disable CPs of EDMA QMANs */
3379 gaudi2_disable_qman_common(hdev, qm_base);
3384 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3386 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3389 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3391 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3392 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3393 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3396 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3398 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3402 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3405 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3406 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3409 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3410 gaudi2_disable_qman_common(hdev, reg_base);
3414 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3416 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3420 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3423 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3424 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3427 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3428 gaudi2_disable_qman_common(hdev, reg_base);
3432 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3434 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3435 u32 reg_base, queue_id;
3438 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3441 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3443 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3444 if (!(hdev->nic_ports_mask & BIT(i)))
3447 reg_base = gaudi2_qm_blocks_bases[queue_id];
3448 gaudi2_disable_qman_common(hdev, reg_base);
3452 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3454 /* Disable the timestamp counter */
3455 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3457 /* Zero the lower/upper parts of the 64-bit counter */
3458 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3459 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3461 /* Enable the counter */
3462 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3465 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3467 /* Disable the timestamp counter */
3468 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3471 static const char *gaudi2_irq_name(u16 irq_number)
3473 switch (irq_number) {
3474 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3475 return "gaudi2 cpu eq";
3476 case GAUDI2_IRQ_NUM_COMPLETION:
3477 return "gaudi2 completion";
3478 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3479 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3480 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3481 return "gaudi2 user completion";
3487 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3489 int i, irq, relative_idx;
3492 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3493 irq = pci_irq_vector(hdev->pdev, i);
3494 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3496 dec = hdev->dec + relative_idx / 2;
3498 /* We pass different structures depending on the irq handler. For the abnormal
3499 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3500 * user_interrupt entry
3502 free_irq(irq, ((relative_idx % 2) ?
3504 (void *) &hdev->user_interrupt[dec->core_id]));
3508 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3510 int rc, i, irq_init_cnt, irq, relative_idx;
3511 irq_handler_t irq_handler;
3514 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3515 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3516 i++, irq_init_cnt++) {
3518 irq = pci_irq_vector(hdev->pdev, i);
3519 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3521 irq_handler = (relative_idx % 2) ?
3522 hl_irq_handler_dec_abnrm :
3523 hl_irq_handler_user_interrupt;
3525 dec = hdev->dec + relative_idx / 2;
3527 /* We pass different structures depending on the irq handler. For the abnormal
3528 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3529 * user_interrupt entry
3531 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3532 ((relative_idx % 2) ?
3534 (void *) &hdev->user_interrupt[dec->core_id]));
3536 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3544 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3548 static int gaudi2_enable_msix(struct hl_device *hdev)
3550 struct asic_fixed_properties *prop = &hdev->asic_prop;
3551 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3552 int rc, irq, i, j, user_irq_init_cnt;
3553 irq_handler_t irq_handler;
3556 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3559 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3562 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3563 GAUDI2_MSIX_ENTRIES, rc);
3567 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3568 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3569 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3571 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3572 goto free_irq_vectors;
3575 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3576 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3577 &hdev->event_queue);
3579 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3580 goto free_completion_irq;
3583 rc = gaudi2_dec_enable_msix(hdev);
3585 dev_err(hdev->dev, "Failed to enable decoder IRQ");
3586 goto free_event_irq;
3589 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3590 user_irq_init_cnt < prop->user_interrupt_count;
3591 i++, j++, user_irq_init_cnt++) {
3593 irq = pci_irq_vector(hdev->pdev, i);
3594 irq_handler = hl_irq_handler_user_interrupt;
3596 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3598 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3603 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3608 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3609 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3611 irq = pci_irq_vector(hdev->pdev, i);
3612 free_irq(irq, &hdev->user_interrupt[j]);
3615 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3618 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3621 free_completion_irq:
3622 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3626 pci_free_irq_vectors(hdev->pdev);
3631 static void gaudi2_sync_irqs(struct hl_device *hdev)
3633 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3637 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3640 /* Wait for all pending IRQs to be finished */
3641 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3643 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3644 irq = pci_irq_vector(hdev->pdev, i);
3645 synchronize_irq(irq);
3648 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3650 irq = pci_irq_vector(hdev->pdev, i);
3651 synchronize_irq(irq);
3654 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3657 static void gaudi2_disable_msix(struct hl_device *hdev)
3659 struct asic_fixed_properties *prop = &hdev->asic_prop;
3660 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3664 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3667 gaudi2_sync_irqs(hdev);
3669 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3670 free_irq(irq, &hdev->event_queue);
3672 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3674 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3675 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3677 irq = pci_irq_vector(hdev->pdev, i);
3678 free_irq(irq, &hdev->user_interrupt[j]);
3681 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3682 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3685 pci_free_irq_vectors(hdev->pdev);
3687 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3690 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3692 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3693 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3694 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3698 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3700 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3702 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3703 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3704 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3707 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3709 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3711 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3713 /* Wait till all traffic from decoder stops
3714 * before apply core reset.
3716 rc = hl_poll_timeout(
3718 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3720 (graceful & graceful_pend_mask),
3725 "Failed to stop traffic from DCORE%d Decoder %d\n",
3730 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3732 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3733 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3734 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3738 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3740 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3742 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3743 dec_bit = PCIE_DEC_SHIFT + dec_id;
3744 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3747 offset = dec_id * PCIE_VDEC_OFFSET;
3749 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3751 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3753 /* Wait till all traffic from decoder stops
3754 * before apply core reset.
3756 rc = hl_poll_timeout(
3758 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3760 (graceful & graceful_pend_mask),
3765 "Failed to stop traffic from PCIe Decoder %d\n",
3770 static void gaudi2_stop_dec(struct hl_device *hdev)
3772 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3775 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3778 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3779 gaudi2_stop_dcore_dec(hdev, dcore_id);
3781 gaudi2_stop_pcie_dec(hdev);
3784 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3786 u32 reg_base, reg_val;
3788 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3789 if (run_mode == HL_ENGINE_CORE_RUN)
3790 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3792 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3794 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3797 static void gaudi2_halt_arcs(struct hl_device *hdev)
3801 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3802 if (gaudi2_is_arc_enabled(hdev, arc_id))
3803 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3807 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3810 u32 reg_base, val, ack_mask, timeout_usec = 100000;
3813 timeout_usec *= 100;
3815 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3816 if (run_mode == HL_ENGINE_CORE_RUN)
3817 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3819 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3821 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3822 val, ((val & ack_mask) == ack_mask),
3823 1000, timeout_usec);
3827 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3828 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3834 static void gaudi2_reset_arcs(struct hl_device *hdev)
3836 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3842 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3843 if (gaudi2_is_arc_enabled(hdev, arc_id))
3844 gaudi2_clr_arc_id_cap(hdev, arc_id);
3847 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3849 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3853 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3856 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3858 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3859 if (!(hdev->nic_ports_mask & BIT(i)))
3862 gaudi2_qman_manual_flush_common(hdev, queue_id);
3866 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3867 u32 num_cores, u32 core_command)
3872 for (i = 0 ; i < num_cores ; i++) {
3873 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3874 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3877 for (i = 0 ; i < num_cores ; i++) {
3878 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3879 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3882 dev_err(hdev->dev, "failed to %s arc: %d\n",
3883 (core_command == HL_ENGINE_CORE_HALT) ?
3884 "HALT" : "RUN", core_ids[i]);
3893 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3895 u32 wait_timeout_ms;
3898 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3900 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3905 gaudi2_stop_dma_qmans(hdev);
3906 gaudi2_stop_mme_qmans(hdev);
3907 gaudi2_stop_tpc_qmans(hdev);
3908 gaudi2_stop_rot_qmans(hdev);
3909 gaudi2_stop_nic_qmans(hdev);
3910 msleep(wait_timeout_ms);
3912 gaudi2_halt_arcs(hdev);
3913 gaudi2_dma_stall(hdev);
3914 gaudi2_mme_stall(hdev);
3915 gaudi2_tpc_stall(hdev);
3916 gaudi2_rotator_stall(hdev);
3918 msleep(wait_timeout_ms);
3920 gaudi2_stop_dec(hdev);
3923 * in case of soft reset do a manual flush for QMANs (currently called
3924 * only for NIC QMANs
3927 gaudi2_nic_qmans_manual_flush(hdev);
3929 gaudi2_disable_dma_qmans(hdev);
3930 gaudi2_disable_mme_qmans(hdev);
3931 gaudi2_disable_tpc_qmans(hdev);
3932 gaudi2_disable_rot_qmans(hdev);
3933 gaudi2_disable_nic_qmans(hdev);
3934 gaudi2_disable_timestamp(hdev);
3938 gaudi2_disable_msix(hdev);
3942 gaudi2_sync_irqs(hdev);
3945 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3947 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3949 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3950 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3951 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3952 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3953 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3954 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3957 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3959 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3960 struct dynamic_fw_load_mgr *dynamic_loader;
3961 struct cpu_dyn_regs *dyn_regs;
3963 /* fill common fields */
3964 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3965 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3966 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3967 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3968 fw_loader->skip_bmc = false;
3969 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3970 fw_loader->dram_bar_id = DRAM_BAR_ID;
3971 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3973 /* here we update initial values for few specific dynamic regs (as
3974 * before reading the first descriptor from FW those value has to be
3975 * hard-coded). in later stages of the protocol those values will be
3976 * updated automatically by reading the FW descriptor so data there
3977 * will always be up-to-date
3979 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3980 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3981 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3982 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3983 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3986 static int gaudi2_init_cpu(struct hl_device *hdev)
3988 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3991 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3994 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
3997 rc = hl_fw_init_cpu(hdev);
4001 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4006 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4008 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4009 struct asic_fixed_properties *prop = &hdev->asic_prop;
4010 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4011 struct cpu_dyn_regs *dyn_regs;
4016 if (!hdev->cpu_queues_enable)
4019 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4022 eq = &hdev->event_queue;
4024 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4026 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4027 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4029 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4030 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4032 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4033 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4035 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4036 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4037 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4039 /* Used for EQ CI */
4040 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4042 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4044 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4046 /* Let the ARC know we are ready as it is now handling those queues */
4048 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4049 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4051 err = hl_poll_timeout(
4053 mmCPU_IF_QUEUE_INIT,
4055 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4060 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4064 /* update FW application security bits */
4065 if (prop->fw_cpu_boot_dev_sts0_valid)
4066 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4068 if (prop->fw_cpu_boot_dev_sts1_valid)
4069 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4071 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4075 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4078 struct hl_hw_queue *q;
4079 u32 pq_id, pq_offset;
4081 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4082 q = &hdev->kernel_queues[queue_id_base + pq_id];
4083 pq_offset = pq_id * 4;
4085 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4086 lower_32_bits(q->bus_address));
4087 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4088 upper_32_bits(q->bus_address));
4089 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4090 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4091 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4095 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4097 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4099 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4100 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4101 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4102 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4104 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4105 cp_offset = cp_id * 4;
4107 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4108 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4109 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4110 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4113 /* allow QMANs to accept work from ARC CQF */
4114 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4117 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4120 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4121 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4123 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4124 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4126 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4127 pq_offset = pq_id * 4;
4129 /* Configure QMAN HBW to scratchpad as it is not needed */
4130 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4131 lower_32_bits(gaudi2->scratchpad_bus_address));
4132 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4133 upper_32_bits(gaudi2->scratchpad_bus_address));
4134 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4135 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4137 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4138 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4139 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4140 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4143 /* Enable QMAN H/W completion */
4144 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4147 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4149 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4152 switch (queue_id_base) {
4153 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4155 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4157 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4159 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4161 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4162 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4164 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4166 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4168 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4170 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4171 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4173 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4175 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4177 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4179 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4180 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4182 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4183 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4185 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4186 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4189 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4196 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4199 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4200 int map_table_entry;
4202 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4204 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4205 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4206 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4208 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4209 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4210 gaudi2_irq_map_table[map_table_entry].cpu_id);
4212 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4214 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4215 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4216 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4218 /* Enable the QMAN channel.
4219 * PDMA QMAN configuration is different, as we do not allow user to
4220 * access some of the CPs.
4221 * PDMA0: CP2/3 are reserved for the ARC usage.
4222 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4224 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4225 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4226 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4227 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4229 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4232 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4237 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4238 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4240 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4241 gaudi2_init_qman_cp(hdev, reg_base);
4242 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4243 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4246 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4247 u32 dma_core_id, bool is_secure)
4249 u32 prot, irq_handler_offset;
4250 struct cpu_dyn_regs *dyn_regs;
4251 int map_table_entry;
4253 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4255 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4257 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4259 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4260 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4262 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4263 lower_32_bits(CFG_BASE + irq_handler_offset));
4265 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4266 upper_32_bits(CFG_BASE + irq_handler_offset));
4268 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4269 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4270 gaudi2_irq_map_table[map_table_entry].cpu_id);
4272 /* Enable the DMA channel */
4273 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4276 static void gaudi2_init_kdma(struct hl_device *hdev)
4278 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4281 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4284 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4286 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4288 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4291 static void gaudi2_init_pdma(struct hl_device *hdev)
4293 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4296 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4299 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4300 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4302 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4303 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4305 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4306 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4308 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4309 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4311 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4314 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4316 u32 reg_base, base_edma_core_id, base_edma_qman_id;
4318 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4319 base_edma_qman_id = edma_stream_base[seq];
4321 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4322 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4324 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4325 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4328 static void gaudi2_init_edma(struct hl_device *hdev)
4330 struct asic_fixed_properties *prop = &hdev->asic_prop;
4331 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4334 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4337 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4338 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4339 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4341 if (!(prop->edma_enabled_mask & BIT(seq)))
4344 gaudi2_init_edma_instance(hdev, seq);
4346 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4352 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4353 * @hdev: pointer to habanalabs device structure.
4354 * @sob_id: sync object ID.
4355 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4356 * @interrupt_id: interrupt ID.
4358 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4359 * write directly to the HBW host memory of the virtual MSI-X doorbell.
4360 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4362 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4363 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4364 * completion, by decrementing the sync object value and re-arming the monitor.
4366 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4367 u32 first_mon_id, u32 interrupt_id)
4369 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4370 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4374 /* Reset the SOB value */
4375 sob_offset = sob_id * sizeof(u32);
4376 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4378 /* Configure 3 monitors:
4379 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4380 * 2. Decrement SOB value by 1.
4381 * 3. Re-arm the master monitor.
4384 first_mon_offset = first_mon_id * sizeof(u32);
4386 /* 2nd monitor: Decrement SOB value by 1 */
4387 mon_offset = first_mon_offset + sizeof(u32);
4389 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4390 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4391 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4393 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4394 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4395 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4396 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4398 /* 3rd monitor: Re-arm the master monitor */
4399 mon_offset = first_mon_offset + 2 * sizeof(u32);
4401 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4402 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4403 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4405 sob_group = sob_id / 8;
4406 mask = ~BIT(sob_id & 0x7);
4407 mode = 0; /* comparison mode is "greater than or equal to" */
4408 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4409 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4410 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4411 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4414 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4416 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4417 mon_offset = first_mon_offset;
4419 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4420 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4422 addr = gaudi2->virt_msix_db_dma_addr;
4423 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4424 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4426 payload = interrupt_id;
4427 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4429 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4432 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4434 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4435 struct asic_fixed_properties *prop = &hdev->asic_prop;
4437 /* Decoder normal/abnormal interrupts */
4438 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4439 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4442 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4443 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4444 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4445 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4447 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4448 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4450 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4454 static void gaudi2_init_sm(struct hl_device *hdev)
4456 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4461 /* Enable HBW/LBW CQ for completion monitors */
4462 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4463 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4465 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4466 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4468 /* Enable only HBW CQ for KDMA completion monitor */
4469 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4470 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4472 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4473 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4474 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4475 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4477 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4479 hdev->completion_queue[i].bus_address;
4481 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4482 lower_32_bits(cq_address));
4483 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4484 upper_32_bits(cq_address));
4485 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4486 ilog2(HL_CQ_SIZE_IN_BYTES));
4489 /* Configure kernel ASID and MMU BP*/
4490 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4491 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4493 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4494 gaudi2_prepare_sm_for_virt_msix_db(hdev);
4497 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4503 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4504 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4505 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4506 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4507 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4508 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4510 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4511 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4513 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4514 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4515 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4519 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4520 bool config_qman_only)
4522 u32 queue_id_base, reg_base;
4526 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4529 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4532 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4535 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4538 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4542 if (!config_qman_only) {
4543 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4544 gaudi2_init_mme_acc(hdev, reg_base);
4547 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4548 gaudi2_init_qman(hdev, reg_base, queue_id_base);
4551 static void gaudi2_init_mme(struct hl_device *hdev)
4553 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4556 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4559 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4560 gaudi2_init_dcore_mme(hdev, i, false);
4562 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4566 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4568 /* Mask arithmetic and QM interrupts in TPC */
4569 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4571 /* Set 16 cache lines */
4572 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4573 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4576 struct gaudi2_tpc_init_cfg_data {
4577 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4580 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4581 u32 offset, struct iterate_module_ctx *ctx)
4583 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4584 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4588 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4590 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4591 /* gets last sequence number */
4592 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4594 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4596 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4597 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4599 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4602 static void gaudi2_init_tpc(struct hl_device *hdev)
4604 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4605 struct gaudi2_tpc_init_cfg_data init_cfg_data;
4606 struct iterate_module_ctx tpc_iter;
4608 if (!hdev->asic_prop.tpc_enabled_mask)
4611 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4614 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4615 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4616 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4617 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4618 tpc_iter.fn = &gaudi2_init_tpc_config;
4619 tpc_iter.data = &init_cfg_data;
4620 gaudi2_iterate_tpcs(hdev, &tpc_iter);
4623 static void gaudi2_init_rotator(struct hl_device *hdev)
4625 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4626 u32 i, reg_base, queue_id;
4628 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4630 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4631 reg_base = gaudi2_qm_blocks_bases[queue_id];
4632 gaudi2_init_qman(hdev, reg_base, queue_id);
4634 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4638 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4642 /* VCMD normal interrupt */
4643 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4644 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4645 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4646 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4648 /* VCMD abnormal interrupt */
4649 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4650 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4651 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4652 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4655 static void gaudi2_init_dec(struct hl_device *hdev)
4657 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4658 u32 dcore_id, dec_id, dec_bit;
4661 if (!hdev->asic_prop.decoder_enabled_mask)
4664 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4667 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4668 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4669 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4671 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4674 base_addr = mmDCORE0_DEC0_CMD_BASE +
4675 BRDG_CTRL_BLOCK_OFFSET +
4676 dcore_id * DCORE_OFFSET +
4677 dec_id * DCORE_VDEC_OFFSET;
4679 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4681 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4684 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4685 dec_bit = PCIE_DEC_SHIFT + dec_id;
4686 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4689 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4690 dec_id * DCORE_VDEC_OFFSET;
4692 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4694 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4698 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4699 u32 stlb_base, u32 asid, u64 phys_addr)
4701 u32 status, timeout_usec;
4704 if (hdev->pldm || !hdev->pdev)
4705 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4707 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4709 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4710 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4711 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4712 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4714 rc = hl_poll_timeout(
4716 stlb_base + STLB_BUSY_OFFSET,
4718 !(status & 0x80000000),
4723 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4730 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4731 u32 start_offset, u32 inv_start_val,
4734 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
4735 if (flags & MMU_OP_CLEAR_MEMCACHE)
4736 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4738 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4741 WREG32(stlb_base + start_offset, inv_start_val);
4744 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4745 struct gaudi2_cache_invld_params *inv_params)
4747 u32 status, timeout_usec, start_offset;
4750 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4751 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4753 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
4754 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4755 rc = hl_poll_timeout(
4757 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4766 /* Need to manually reset the status to 0 */
4767 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4770 /* Lower cache does not work with cache lines, hence we can skip its
4771 * invalidation upon map and invalidate only upon unmap
4773 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4776 start_offset = inv_params->range_invalidation ?
4777 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4779 rc = hl_poll_timeout(
4781 stlb_base + start_offset,
4790 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4792 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4795 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4797 if (gaudi2->hw_cap_initialized & hw_cap)
4803 /* this function shall be called only for HMMUs for which capability bit is set */
4804 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4808 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4809 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4812 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4813 struct gaudi2_cache_invld_params *inv_params)
4817 if (inv_params->range_invalidation) {
4818 /* Set the addresses range
4819 * Note: that the start address we set in register, is not included in
4820 * the range of the invalidation, by design.
4821 * that's why we need to set lower address than the one we actually
4822 * want to be included in the range invalidation.
4824 u64 start = inv_params->start_va - 1;
4826 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4828 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4829 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4831 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4832 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4834 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4835 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4837 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4838 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4840 start_offset = STLB_INV_ALL_START_OFFSET;
4843 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4844 inv_params->inv_start_val, inv_params->flags);
4847 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4848 int dcore_id, int hmmu_id,
4849 struct gaudi2_cache_invld_params *inv_params)
4851 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4853 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4856 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4857 int dcore_id, int hmmu_id,
4858 struct gaudi2_cache_invld_params *inv_params)
4860 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4862 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4865 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4866 struct gaudi2_cache_invld_params *inv_params)
4868 int dcore_id, hmmu_id;
4870 /* first send all invalidation commands */
4871 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4872 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4873 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4876 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4880 /* next, poll all invalidations status */
4881 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4882 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4885 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4888 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4898 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4900 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4901 struct gaudi2_cache_invld_params invld_params;
4904 if (hdev->reset_info.hard_reset_pending)
4907 invld_params.range_invalidation = false;
4908 invld_params.inv_start_val = 1;
4910 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4911 invld_params.flags = flags;
4912 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4913 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4915 } else if (flags & MMU_OP_PHYS_PACK) {
4916 invld_params.flags = 0;
4917 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4923 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4924 u32 flags, u32 asid, u64 va, u64 size)
4926 struct gaudi2_cache_invld_params invld_params = {0};
4927 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4928 u64 start_va, end_va;
4932 if (hdev->reset_info.hard_reset_pending)
4935 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4936 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4937 asid << MMU_RANGE_INV_ASID_SHIFT);
4939 end_va = start_va + size;
4941 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4942 /* As range invalidation does not support zero address we will
4943 * do full invalidation in this case
4946 invld_params.range_invalidation = true;
4947 invld_params.start_va = start_va;
4948 invld_params.end_va = end_va;
4949 invld_params.inv_start_val = inv_start_val;
4950 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
4952 invld_params.range_invalidation = false;
4953 invld_params.inv_start_val = 1;
4954 invld_params.flags = flags;
4958 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4959 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4964 } else if (flags & MMU_OP_PHYS_PACK) {
4965 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
4966 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
4967 invld_params.inv_start_val = inv_start_val;
4968 invld_params.flags = flags;
4969 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4975 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
4977 struct asic_fixed_properties *prop = &hdev->asic_prop;
4979 u32 asid, max_asid = prop->max_asid;
4982 /* it takes too much time to init all of the ASIDs on palladium */
4984 max_asid = min((u32) 8, max_asid);
4986 for (asid = 0 ; asid < max_asid ; asid++) {
4987 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
4988 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
4990 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
4998 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5000 u32 status, timeout_usec;
5003 if (hdev->pldm || !hdev->pdev)
5004 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5006 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5008 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5010 rc = hl_poll_timeout(
5012 stlb_base + STLB_SRAM_INIT_OFFSET,
5019 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5021 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5025 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5027 rc = hl_poll_timeout(
5029 stlb_base + STLB_INV_ALL_START_OFFSET,
5036 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5038 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5043 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5045 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5046 u32 mmu_base, stlb_base;
5049 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5052 mmu_base = mmPMMU_HBW_MMU_BASE;
5053 stlb_base = mmPMMU_HBW_STLB_BASE;
5055 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5056 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5057 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5058 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5059 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5060 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5061 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5062 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5063 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5064 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5065 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5067 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5069 if (PAGE_SIZE == SZ_64K) {
5070 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5071 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5072 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5073 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5075 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5077 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5078 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5079 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5082 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5084 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5088 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5093 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5096 struct asic_fixed_properties *prop = &hdev->asic_prop;
5097 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5098 u32 offset, mmu_base, stlb_base, hw_cap;
5102 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5103 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5106 * return if DMMU is already initialized or if it's not out of
5107 * isolation (due to cluster binning)
5109 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5112 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5113 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5114 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5116 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5117 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5119 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5120 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5121 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5122 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5123 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5124 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5125 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5126 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5127 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5128 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5129 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5131 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5132 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5134 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5136 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5140 gaudi2->hw_cap_initialized |= hw_cap;
5145 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5147 int rc, dcore_id, hmmu_id;
5149 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5150 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5151 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5159 static int gaudi2_mmu_init(struct hl_device *hdev)
5163 rc = gaudi2_pci_mmu_init(hdev);
5167 rc = gaudi2_hbm_mmu_init(hdev);
5174 static int gaudi2_hw_init(struct hl_device *hdev)
5176 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5179 /* Let's mark in the H/W that we have reached this point. We check
5180 * this value in the reset_before_init function to understand whether
5181 * we need to reset the chip before doing H/W init. This register is
5182 * cleared by the H/W upon H/W reset
5184 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5186 /* Perform read from the device to make sure device is up */
5189 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5190 * So we set it here and if anyone tries to move it later to
5191 * a different address, there will be an error
5193 if (hdev->asic_prop.iatu_done_by_fw)
5194 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5197 * Before pushing u-boot/linux to device, need to set the hbm bar to
5198 * base address of dram
5200 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5201 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5205 rc = gaudi2_init_cpu(hdev);
5207 dev_err(hdev->dev, "failed to initialize CPU\n");
5211 gaudi2_init_scrambler_hbm(hdev);
5212 gaudi2_init_kdma(hdev);
5214 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5216 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5220 rc = gaudi2->cpucp_info_get(hdev);
5222 dev_err(hdev->dev, "Failed to get cpucp info\n");
5226 rc = gaudi2_mmu_init(hdev);
5230 gaudi2_init_pdma(hdev);
5231 gaudi2_init_edma(hdev);
5232 gaudi2_init_sm(hdev);
5233 gaudi2_init_tpc(hdev);
5234 gaudi2_init_mme(hdev);
5235 gaudi2_init_rotator(hdev);
5236 gaudi2_init_dec(hdev);
5237 gaudi2_enable_timestamp(hdev);
5239 rc = gaudi2_coresight_init(hdev);
5241 goto disable_queues;
5243 rc = gaudi2_enable_msix(hdev);
5245 goto disable_queues;
5247 /* Perform read from the device to flush all configuration */
5253 gaudi2_disable_dma_qmans(hdev);
5254 gaudi2_disable_mme_qmans(hdev);
5255 gaudi2_disable_tpc_qmans(hdev);
5256 gaudi2_disable_rot_qmans(hdev);
5257 gaudi2_disable_nic_qmans(hdev);
5259 gaudi2_disable_timestamp(hdev);
5265 * gaudi2_send_hard_reset_cmd - common function to handle reset
5267 * @hdev: pointer to the habanalabs device structure
5269 * This function handles the various possible scenarios for reset.
5270 * It considers if reset is handled by driver\FW and what FW components are loaded
5272 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5274 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5275 bool heartbeat_reset, preboot_only, cpu_initialized = false;
5276 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5277 u32 cpu_boot_status;
5279 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5280 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5283 * Handle corner case where failure was at cpu management app load,
5284 * and driver didn't detect any failure while loading the FW,
5285 * then at such scenario driver will send only HALT_MACHINE
5286 * and no one will respond to this request since FW already back to preboot
5287 * and it cannot handle such cmd.
5288 * In this case next time the management app loads it'll check on events register
5289 * which will still have the halt indication, and will reboot the device.
5290 * The solution is to let preboot clear all relevant registers before next boot
5291 * once driver send COMMS_RST_DEV.
5293 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5295 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5296 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5297 cpu_initialized = true;
5300 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5301 * 1. FW reset: FW initiate the reset sequence
5302 * 2. driver reset: FW will start HALT sequence (the preparations for the
5303 * reset but not the reset itself as it is not implemented
5304 * on their part) and LKD will wait to let FW complete the
5305 * sequence before issuing the reset
5307 if (!preboot_only && cpu_initialized) {
5308 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5309 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5311 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5315 * When working with preboot (without Linux/Boot fit) we can
5316 * communicate only using the COMMS commands to issue halt/reset.
5318 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5319 * attempt to revive the card in the small chance that the f/w has
5320 * experienced a watchdog event, which caused it to return back to preboot.
5321 * In that case, triggering reset through GIC won't help. We need to
5322 * trigger the reset as if Linux wasn't loaded.
5324 * We do it only if the reset cause was HB, because that would be the
5325 * indication of such an event.
5327 * In case watchdog hasn't expired but we still got HB, then this won't
5331 if (heartbeat_reset || preboot_only || !cpu_initialized) {
5332 if (hdev->asic_prop.hard_reset_done_by_fw)
5333 hl_fw_ask_hard_reset_without_linux(hdev);
5335 hl_fw_ask_halt_machine_without_linux(hdev);
5340 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5342 * @hdev: pointer to the habanalabs device structure
5343 * @reset_sleep_ms: sleep time in msec after reset
5345 * This function executes hard reset based on if driver/FW should do the reset
5347 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5349 if (hdev->asic_prop.hard_reset_done_by_fw) {
5350 gaudi2_send_hard_reset_cmd(hdev);
5354 /* Set device to handle FLR by H/W as we will put the device
5357 WREG32(mmPCIE_AUX_FLR_CTRL,
5358 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5360 gaudi2_send_hard_reset_cmd(hdev);
5362 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5366 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5368 * @hdev: pointer to the habanalabs device structure
5369 * @reset_sleep_ms: sleep time in msec after reset
5370 * @driver_performs_reset: true if driver should perform reset instead of f/w.
5372 * This function executes soft reset based on if driver/FW should do the reset
5374 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5375 bool driver_performs_reset)
5377 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5379 if (!driver_performs_reset) {
5380 /* set SP to indicate reset request sent to FW */
5381 if (dyn_regs->cpu_rst_status)
5382 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5384 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5386 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5387 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5391 /* Block access to engines, QMANs and SM during reset, these
5392 * RRs will be reconfigured after soft reset.
5393 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5395 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5396 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5398 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5399 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5400 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5402 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5405 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5406 u32 poll_timeout_us)
5411 /* without this sleep reset will not work */
5412 msleep(reset_sleep_ms);
5414 /* We poll the BTM done indication multiple times after reset due to
5415 * a HW errata 'GAUDI2_0300'
5417 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5418 rc = hl_poll_timeout(
5420 mmPSOC_GLOBAL_CONF_BTM_FSM,
5427 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5430 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5435 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5436 rc = hl_poll_timeout(
5438 mmCPU_RST_STATUS_TO_HOST,
5440 reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5445 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5449 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5451 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5452 u32 poll_timeout_us, reset_sleep_ms;
5453 bool driver_performs_reset = false;
5456 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5457 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5458 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5460 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5461 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5467 gaudi2_reset_arcs(hdev);
5470 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5471 gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5474 * As we have to support also work with preboot only (which does not supports
5475 * soft reset) we have to make sure that security is disabled before letting driver
5476 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5477 * secured device with preboot only.
5479 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5480 !hdev->asic_prop.fw_security_enabled);
5481 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5485 if (driver_performs_reset || hard_reset)
5486 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5488 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5493 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5494 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5497 * Clear NIC capability mask in order for driver to re-configure
5498 * NIC QMANs. NIC ports will not be re-configured during soft
5499 * reset as we call gaudi2_nic_init only during hard reset
5501 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5504 gaudi2->hw_cap_initialized &=
5505 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5506 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5507 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5508 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5509 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5511 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5513 gaudi2->hw_cap_initialized &=
5514 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5515 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5520 static int gaudi2_suspend(struct hl_device *hdev)
5524 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5526 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5531 static int gaudi2_resume(struct hl_device *hdev)
5533 return gaudi2_init_iatu(hdev);
5536 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5537 void *cpu_addr, dma_addr_t dma_addr, size_t size)
5541 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5542 VM_DONTCOPY | VM_NORESERVE;
5544 #ifdef _HAS_DMA_MMAP_COHERENT
5546 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5548 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5552 rc = remap_pfn_range(vma, vma->vm_start,
5553 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5554 size, vma->vm_page_prot);
5556 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5563 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5565 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5566 u64 hw_cap_mask = 0;
5567 u64 hw_tpc_cap_bit = 0;
5568 u64 hw_nic_cap_bit = 0;
5569 u64 hw_test_cap_bit = 0;
5571 switch (hw_queue_id) {
5572 case GAUDI2_QUEUE_ID_PDMA_0_0:
5573 case GAUDI2_QUEUE_ID_PDMA_0_1:
5574 case GAUDI2_QUEUE_ID_PDMA_1_0:
5575 hw_cap_mask = HW_CAP_PDMA_MASK;
5577 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5578 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5579 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5581 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5582 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5583 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5585 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5586 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5587 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5589 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5590 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5591 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5594 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5595 hw_test_cap_bit = HW_CAP_MME_SHIFT;
5598 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5599 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5602 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5603 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5606 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5607 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5610 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5611 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5612 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5614 /* special case where cap bit refers to the first queue id */
5615 if (!hw_tpc_cap_bit)
5616 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5619 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5620 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5621 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5624 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5625 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5626 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5629 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5630 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5631 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5634 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5635 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5638 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5639 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5642 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5643 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5645 /* special case where cap bit refers to the first queue id */
5646 if (!hw_nic_cap_bit)
5647 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5650 case GAUDI2_QUEUE_ID_CPU_PQ:
5651 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5658 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5661 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5663 if (hw_test_cap_bit)
5664 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5666 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5669 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5671 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5674 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5675 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5676 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5678 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5679 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5681 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5682 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5689 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5691 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5694 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5695 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5696 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5699 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5700 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5703 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5704 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5712 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5714 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5717 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5718 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5719 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5722 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5723 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5726 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5727 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5735 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5737 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5738 u32 pq_offset, reg_base, db_reg_offset, db_value;
5740 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5742 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5743 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5746 pq_offset = (hw_queue_id & 0x3) * 4;
5747 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5748 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5750 db_reg_offset = mmCPU_IF_PF_PQ_PI;
5755 /* ring the doorbell */
5756 WREG32(db_reg_offset, db_value);
5758 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5759 /* make sure device CPU will read latest data from host */
5761 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5762 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5766 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5768 __le64 *pbd = (__le64 *) bd;
5770 /* The QMANs are on the host memory so a simple copy suffice */
5775 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5776 dma_addr_t *dma_handle, gfp_t flags)
5778 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5781 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5782 void *cpu_addr, dma_addr_t dma_handle)
5784 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5787 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5788 u32 timeout, u64 *result)
5790 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5792 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5799 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5801 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5804 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5805 gfp_t mem_flags, dma_addr_t *dma_handle)
5807 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5810 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5813 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5815 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5818 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5819 dma_addr_t *dma_handle)
5821 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5824 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5826 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5829 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5830 enum dma_data_direction dir)
5832 dma_addr_t dma_addr;
5834 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5835 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5841 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5842 enum dma_data_direction dir)
5844 dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5847 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5849 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5850 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5853 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5857 /* Just check if CB address is valid */
5859 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5860 parser->user_cb_size,
5861 asic_prop->sram_user_base_address,
5862 asic_prop->sram_end_address))
5865 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5866 parser->user_cb_size,
5867 asic_prop->dram_user_base_address,
5868 asic_prop->dram_end_address))
5871 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5872 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5873 parser->user_cb_size,
5874 asic_prop->dmmu.start_addr,
5875 asic_prop->dmmu.end_addr))
5878 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5879 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5880 parser->user_cb_size,
5881 asic_prop->pmmu.start_addr,
5882 asic_prop->pmmu.end_addr) ||
5883 hl_mem_area_inside_range(
5884 (u64) (uintptr_t) parser->user_cb,
5885 parser->user_cb_size,
5886 asic_prop->pmmu_huge.start_addr,
5887 asic_prop->pmmu_huge.end_addr))
5890 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5894 if (!device_iommu_mapped(&hdev->pdev->dev))
5898 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5899 parser->user_cb, parser->user_cb_size);
5904 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5906 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5908 if (!parser->is_kernel_allocated_cb)
5909 return gaudi2_validate_cb_address(hdev, parser);
5911 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5912 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5919 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5921 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5923 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5926 return hl_fw_send_heartbeat(hdev);
5929 /* This is an internal helper function, used to update the KDMA mmu props.
5930 * Should be called with a proper kdma lock.
5932 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5933 bool mmu_bypass, u32 asid)
5935 u32 rw_asid, rw_mmu_bp;
5937 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5938 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
5940 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
5941 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
5943 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
5944 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
5947 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
5948 u32 mon_payload, u32 sync_value)
5950 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
5953 sob_offset = sob_id * 4;
5954 mon_offset = mon_id * 4;
5956 /* Reset the SOB value */
5957 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5959 /* Configure this address with CQ_ID 0 because CQ_EN is set */
5960 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
5962 /* Configure this address with CS index because CQ_EN is set */
5963 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
5965 sync_group_id = sob_id / 8;
5966 mask = ~(1 << (sob_id & 0x7));
5967 mode = 1; /* comparison mode is "equal to" */
5969 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
5970 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
5971 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
5972 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
5973 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
5976 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
5977 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
5978 u64 src_addr, u64 dst_addr,
5979 u32 size, bool is_memset)
5981 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
5982 struct hl_cq_entry *cq_base;
5987 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
5988 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
5989 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
5991 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5992 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
5994 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
5995 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
5997 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
5998 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
5999 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6000 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6001 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6002 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6003 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6004 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6006 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6007 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6010 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6012 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6014 /* Wait for completion */
6015 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6016 cq_base = cq->kernel_address;
6017 polling_addr = (u32 *)&cq_base[cq->ci];
6020 /* for each 1MB 20 second of timeout */
6021 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6023 timeout = KDMA_TIMEOUT_USEC;
6026 rc = hl_poll_timeout_memory(
6038 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6039 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6043 cq->ci = hl_cq_inc_ptr(cq->ci);
6048 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6052 for (i = 0 ; i < size ; i += sizeof(u32))
6053 WREG32(addr + i, val);
6056 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6058 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6061 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6062 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6064 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6065 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6069 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6071 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6072 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6073 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6074 struct packet_msg_short *msg_short_pkt;
6075 dma_addr_t pkt_dma_addr;
6080 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6082 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6084 pkt_size = sizeof(*msg_short_pkt);
6085 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6086 if (!msg_short_pkt) {
6087 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6092 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6093 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6094 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6095 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6096 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6098 msg_short_pkt->value = cpu_to_le32(sob_val);
6099 msg_short_pkt->ctl = cpu_to_le32(tmp);
6101 /* Reset the SOB value */
6102 WREG32(sob_addr, 0);
6104 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6106 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6111 rc = hl_poll_timeout(
6119 if (rc == -ETIMEDOUT) {
6120 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6125 /* Reset the SOB value */
6126 WREG32(sob_addr, 0);
6129 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6133 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6135 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6138 * check capability here as send_cpu_message() won't update the result
6139 * value if no capability
6141 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6144 return hl_fw_test_cpu_queue(hdev);
6147 static int gaudi2_test_queues(struct hl_device *hdev)
6149 int i, rc, ret_val = 0;
6151 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6152 if (!gaudi2_is_queue_enabled(hdev, i))
6155 gaudi2_qman_set_test_mode(hdev, i, true);
6156 rc = gaudi2_test_queue(hdev, i);
6157 gaudi2_qman_set_test_mode(hdev, i, false);
6165 rc = gaudi2_test_cpu_queue(hdev);
6175 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6177 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6178 size_t irq_arr_size;
6180 /* TODO: missing gaudi2_nic_resume.
6181 * Until implemented nic_hw_cap_initialized will remain zeroed
6183 gaudi2_init_arcs(hdev);
6184 gaudi2_scrub_arcs_dccm(hdev);
6185 gaudi2_init_security(hdev);
6187 /* Unmask all IRQs since some could have been received during the soft reset */
6188 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6189 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6192 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6193 struct iterate_module_ctx *ctx)
6195 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6196 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6200 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6201 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6203 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6204 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6206 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6207 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6208 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6209 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6211 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6212 IS_TPC_IDLE(tpc_cfg_sts);
6213 *(idle_data->is_idle) &= is_eng_idle;
6215 if (idle_data->mask && !is_eng_idle)
6216 set_bit(engine_idx, idle_data->mask);
6219 hl_engine_data_sprintf(idle_data->e,
6220 idle_data->tpc_fmt, dcore, inst,
6221 is_eng_idle ? "Y" : "N",
6222 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6225 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6226 struct engines_data *e)
6228 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6229 mme_arch_sts, dec_swreg15, dec_enabled_bit;
6230 struct asic_fixed_properties *prop = &hdev->asic_prop;
6231 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6232 unsigned long *mask = (unsigned long *) mask_arr;
6233 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6234 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6235 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6236 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6237 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6238 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6239 bool is_idle = true, is_eng_idle;
6242 struct gaudi2_tpc_idle_data tpc_idle_data = {
6243 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6246 .is_idle = &is_idle,
6248 struct iterate_module_ctx tpc_iter = {
6249 .fn = &gaudi2_is_tpc_engine_idle,
6250 .data = &tpc_idle_data,
6253 int engine_idx, i, j;
6255 /* EDMA, Two engines per Dcore */
6257 hl_engine_data_sprintf(e,
6258 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
6259 "---- ---- ------- ------------ ----------------------\n");
6261 for (i = 0; i < NUM_OF_DCORES; i++) {
6262 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6263 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6265 if (!(prop->edma_enabled_mask & BIT(seq)))
6268 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6269 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6270 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6272 dma_core_idle_ind_mask =
6273 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6275 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6276 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6277 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6279 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6280 IS_DMA_IDLE(dma_core_idle_ind_mask);
6281 is_idle &= is_eng_idle;
6283 if (mask && !is_eng_idle)
6284 set_bit(engine_idx, mask);
6287 hl_engine_data_sprintf(e, edma_fmt, i, j,
6288 is_eng_idle ? "Y" : "N",
6290 dma_core_idle_ind_mask);
6294 /* PDMA, Two engines in Full chip */
6296 hl_engine_data_sprintf(e,
6297 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
6298 "---- ------- ------------ ----------------------\n");
6300 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6301 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6302 offset = i * PDMA_OFFSET;
6303 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6305 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6306 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6307 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6309 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6310 IS_DMA_IDLE(dma_core_idle_ind_mask);
6311 is_idle &= is_eng_idle;
6313 if (mask && !is_eng_idle)
6314 set_bit(engine_idx, mask);
6317 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6318 qm_glbl_sts0, dma_core_idle_ind_mask);
6321 /* NIC, twelve macros in Full chip */
6322 if (e && hdev->nic_ports_mask)
6323 hl_engine_data_sprintf(e,
6324 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
6325 "--- ------- ------------ ----------\n");
6327 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6329 offset = i / 2 * NIC_OFFSET;
6331 offset += NIC_QM_OFFSET;
6333 if (!(hdev->nic_ports_mask & BIT(i)))
6336 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6339 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6340 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6341 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6343 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6344 is_idle &= is_eng_idle;
6346 if (mask && !is_eng_idle)
6347 set_bit(engine_idx, mask);
6350 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6351 qm_glbl_sts0, qm_cgm_sts);
6355 hl_engine_data_sprintf(e,
6356 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
6357 "--- ---- ------- ------------ ---------------\n");
6358 /* MME, one per Dcore */
6359 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6360 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6361 offset = i * DCORE_OFFSET;
6363 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6364 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6365 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6367 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6368 is_idle &= is_eng_idle;
6370 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6371 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6372 is_idle &= is_eng_idle;
6375 hl_engine_data_sprintf(e, mme_fmt, i, "N",
6376 is_eng_idle ? "Y" : "N",
6380 if (mask && !is_eng_idle)
6381 set_bit(engine_idx, mask);
6387 if (e && prop->tpc_enabled_mask)
6388 hl_engine_data_sprintf(e,
6389 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n"
6390 "---- --- -------- ------------ ---------- ----------------------\n");
6392 gaudi2_iterate_tpcs(hdev, &tpc_iter);
6394 /* Decoders, two each Dcore and two shared PCIe decoders */
6395 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6396 hl_engine_data_sprintf(e,
6397 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
6398 "---- --- ------- ---------------\n");
6400 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6401 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6402 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6403 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6406 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6407 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6408 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6410 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6411 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6412 is_idle &= is_eng_idle;
6414 if (mask && !is_eng_idle)
6415 set_bit(engine_idx, mask);
6418 hl_engine_data_sprintf(e, dec_fmt, i, j,
6419 is_eng_idle ? "Y" : "N", dec_swreg15);
6423 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6424 hl_engine_data_sprintf(e,
6425 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
6426 "-------- ------- ---------------\n");
6428 /* Check shared(PCIe) decoders */
6429 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6430 dec_enabled_bit = PCIE_DEC_SHIFT + i;
6431 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6434 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6435 offset = i * DCORE_DEC_OFFSET;
6436 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6437 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6438 is_idle &= is_eng_idle;
6440 if (mask && !is_eng_idle)
6441 set_bit(engine_idx, mask);
6444 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6445 is_eng_idle ? "Y" : "N", dec_swreg15);
6449 hl_engine_data_sprintf(e,
6450 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6451 "---- ---- ------- ------------ ---------- -------------\n");
6453 for (i = 0 ; i < NUM_OF_ROT ; i++) {
6454 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6456 offset = i * ROT_OFFSET;
6458 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6459 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6460 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6462 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6463 is_idle &= is_eng_idle;
6465 if (mask && !is_eng_idle)
6466 set_bit(engine_idx, mask);
6469 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6470 qm_glbl_sts0, qm_cgm_sts, "-");
6476 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6477 __acquires(&gaudi2->hw_queues_lock)
6479 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6481 spin_lock(&gaudi2->hw_queues_lock);
6484 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6485 __releases(&gaudi2->hw_queues_lock)
6487 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6489 spin_unlock(&gaudi2->hw_queues_lock);
6492 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6494 return hdev->pdev->device;
6497 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6501 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6504 return hl_fw_get_eeprom_data(hdev, data, max_size);
6507 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6509 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6512 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6514 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6517 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
6518 return gaudi2->events_stat_aggregate;
6521 *size = (u32) sizeof(gaudi2->events_stat);
6522 return gaudi2->events_stat;
6525 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6526 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6528 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6529 dcore_vdec_id + DCORE_OFFSET * dcore_id;
6531 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6532 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6534 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6535 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6537 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6538 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6540 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6541 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6543 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6544 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6547 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6549 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6550 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6551 struct asic_fixed_properties *prop = &hdev->asic_prop;
6552 u32 dcore_offset = dcore_id * DCORE_OFFSET;
6553 u32 vdec_id, i, ports_offset, reg_val;
6557 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6558 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6559 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6560 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6561 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6562 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6565 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6566 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6567 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6568 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6569 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6573 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6575 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6576 * for any access type
6579 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6580 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6581 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6582 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6585 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6586 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6588 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6589 ports_offset = i * DCORE_MME_SBTE_OFFSET;
6590 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6591 dcore_offset + ports_offset, 0);
6592 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6593 dcore_offset + ports_offset, rw_asid);
6596 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6597 ports_offset = i * DCORE_MME_WB_OFFSET;
6598 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6599 dcore_offset + ports_offset, 0);
6600 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6601 dcore_offset + ports_offset, rw_asid);
6604 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6605 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6610 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6611 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6612 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6616 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6617 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6619 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6621 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6622 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6624 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6625 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6627 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6628 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6630 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6631 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6633 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6634 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6637 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6638 u32 rw_asid, u32 rw_mmu_bp)
6640 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6642 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6643 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6646 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6648 u32 reg_base, reg_offset, reg_val = 0;
6650 reg_base = gaudi2_arc_blocks_bases[cpu_id];
6652 /* Enable MMU and configure asid for all relevant ARC regions */
6653 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6654 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6656 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6657 WREG32(reg_base + reg_offset, reg_val);
6659 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6660 WREG32(reg_base + reg_offset, reg_val);
6662 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6663 WREG32(reg_base + reg_offset, reg_val);
6665 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6666 WREG32(reg_base + reg_offset, reg_val);
6668 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6669 WREG32(reg_base + reg_offset, reg_val);
6671 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6672 WREG32(reg_base + reg_offset, reg_val);
6674 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6675 WREG32(reg_base + reg_offset, reg_val);
6677 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6678 WREG32(reg_base + reg_offset, reg_val);
6680 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6681 WREG32(reg_base + reg_offset, reg_val);
6683 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6684 WREG32(reg_base + reg_offset, reg_val);
6686 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6687 WREG32(reg_base + reg_offset, reg_val);
6690 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6694 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6695 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6697 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6698 gaudi2_arc_mmu_prepare(hdev, i, asid);
6700 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6701 if (!gaudi2_is_queue_enabled(hdev, i))
6704 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6710 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6712 struct asic_fixed_properties *prop = &hdev->asic_prop;
6713 u32 rw_asid, offset;
6716 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6717 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6719 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6720 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6721 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6722 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6724 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6725 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6726 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6727 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6730 for (i = 0 ; i < NUM_OF_ROT ; i++) {
6731 offset = i * ROT_OFFSET;
6732 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6733 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6734 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6735 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6736 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6739 /* Shared Decoders are the last bits in the decoders mask */
6740 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6741 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6743 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6744 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6746 /* arc farm arc dup eng */
6747 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6748 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6750 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6757 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
6758 struct iterate_module_ctx *ctx)
6760 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6762 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6763 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6764 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6765 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6768 /* zero the MMUBP and set the ASID */
6769 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6771 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6772 struct gaudi2_tpc_mmu_data tpc_mmu_data;
6773 struct iterate_module_ctx tpc_iter = {
6774 .fn = &gaudi2_tpc_mmu_prepare,
6775 .data = &tpc_mmu_data,
6779 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6780 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6784 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6787 rc = gaudi2_mmu_shared_prepare(hdev, asid);
6791 /* configure DCORE MMUs */
6792 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6793 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6794 gaudi2_iterate_tpcs(hdev, &tpc_iter);
6795 for (i = 0 ; i < NUM_OF_DCORES ; i++)
6796 gaudi2_mmu_dcore_prepare(hdev, i, asid);
6801 static inline bool is_info_event(u32 event)
6804 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6805 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6812 static void gaudi2_print_irq_info(struct hl_device *hdev, u16 event_type)
6815 bool event_valid = false;
6817 /* return in case of NIC status event - these events are received periodically and not as
6818 * an indication to an error, thus not printed.
6820 if (event_type >= GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 &&
6821 event_type <= GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1)
6824 if (gaudi2_irq_map_table[event_type].valid) {
6825 snprintf(desc, sizeof(desc), gaudi2_irq_map_table[event_type].name);
6830 snprintf(desc, sizeof(desc), "N/A");
6832 if (is_info_event(event_type))
6833 dev_info_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6836 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6840 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6841 struct hl_eq_ecc_data *ecc_data)
6843 u64 ecc_address = 0, ecc_syndrom = 0;
6844 u8 memory_wrapper_idx = 0;
6846 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6847 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6848 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6851 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6852 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6854 return !!ecc_data->is_critical;
6858 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6860 * @idx: the current pi/ci value
6861 * @q_len: the queue length (power of 2)
6863 * @return the cyclically decremented index
6865 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6867 u32 mask = q_len - 1;
6870 * modular decrement is equivalent to adding (queue_size -1)
6871 * later we take LSBs to make sure the value is in the
6872 * range [0, queue_len - 1]
6874 return (idx + q_len - 1) & mask;
6878 * gaudi2_print_sw_config_stream_data - print SW config stream data
6880 * @hdev: pointer to the habanalabs device structure
6881 * @stream: the QMAN's stream
6882 * @qman_base: base address of QMAN registers block
6884 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6885 u32 stream, u64 qman_base)
6887 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6888 u32 cq_ptr_lo_off, size;
6890 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6892 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6893 stream * cq_ptr_lo_off;
6895 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6897 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6899 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6900 size = RREG32(cq_tsize);
6901 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6902 stream, cq_ptr, size);
6906 * gaudi2_print_last_pqes_on_err - print last PQEs on error
6908 * @hdev: pointer to the habanalabs device structure
6909 * @qid_base: first QID of the QMAN (out of 4 streams)
6910 * @stream: the QMAN's stream
6911 * @qman_base: base address of QMAN registers block
6912 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6914 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6915 u64 qman_base, bool pr_sw_conf)
6917 u32 ci, qm_ci_stream_off;
6918 struct hl_hw_queue *q;
6922 q = &hdev->kernel_queues[qid_base + stream];
6924 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6925 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6926 stream * qm_ci_stream_off;
6928 hdev->asic_funcs->hw_queues_lock(hdev);
6931 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6935 /* we should start printing form ci -1 */
6936 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6938 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6943 bd = q->kernel_address;
6946 len = le32_to_cpu(bd->len);
6947 /* len 0 means uninitialized entry- break */
6951 addr = le64_to_cpu(bd->ptr);
6953 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
6954 stream, ci, addr, len);
6956 /* get previous ci, wrap if needed */
6957 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6960 hdev->asic_funcs->hw_queues_unlock(hdev);
6964 * print_qman_data_on_err - extract QMAN data on error
6966 * @hdev: pointer to the habanalabs device structure
6967 * @qid_base: first QID of the QMAN (out of 4 streams)
6968 * @stream: the QMAN's stream
6969 * @qman_base: base address of QMAN registers block
6971 * This function attempt to extract as much data as possible on QMAN error.
6972 * On upper CP print the SW config stream data and last 8 PQEs.
6973 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6975 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
6979 if (stream != QMAN_STREAMS) {
6980 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
6984 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6986 for (i = 0 ; i < QMAN_STREAMS ; i++)
6987 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
6990 static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name,
6991 u64 qman_base, u32 qid_base)
6993 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes;
6994 u64 glbl_sts_addr, arb_err_addr;
6997 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
6998 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7000 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7001 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7002 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7007 if (i == QMAN_STREAMS) {
7008 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7009 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7011 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7012 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7015 for (j = 0 ; j < num_error_causes ; j++)
7016 if (glbl_sts_val & BIT(j))
7017 dev_err_ratelimited(hdev->dev, "%s %s. err cause: %s\n",
7020 gaudi2_qman_lower_cp_error_cause[j] :
7021 gaudi2_qman_error_cause[j]);
7023 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7026 arb_err_val = RREG32(arb_err_addr);
7031 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7032 if (arb_err_val & BIT(j))
7033 dev_err_ratelimited(hdev->dev, "%s ARB_ERR. err cause: %s\n",
7034 qm_name, gaudi2_qman_arb_error_cause[j]);
7038 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7039 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7040 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7041 enum gaudi2_engine_id id, u64 *event_mask)
7043 u32 razwi_hi, razwi_lo, razwi_xy;
7048 if (read_razwi_regs) {
7049 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7050 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7051 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7053 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7054 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7055 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7057 rd_wr_flag = HL_RAZWI_WRITE;
7059 if (read_razwi_regs) {
7060 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7061 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7062 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7064 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7065 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7066 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7068 rd_wr_flag = HL_RAZWI_READ;
7071 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7072 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7074 dev_err_ratelimited(hdev->dev,
7075 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7076 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7079 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7080 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7081 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7082 enum gaudi2_engine_id id, u64 *event_mask)
7084 u32 razwi_addr, razwi_xy;
7089 if (read_razwi_regs) {
7090 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7091 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7093 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7094 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7097 rd_wr_flag = HL_RAZWI_WRITE;
7099 if (read_razwi_regs) {
7100 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7101 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7103 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7104 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7107 rd_wr_flag = HL_RAZWI_READ;
7110 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7111 dev_err_ratelimited(hdev->dev,
7112 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7113 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7117 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7118 enum razwi_event_sources module, u8 module_idx)
7122 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7123 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7124 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7125 (module_idx % NUM_OF_TPC_PER_DCORE) +
7126 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7129 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7130 (module_idx * ENGINE_ID_DCORE_OFFSET));
7133 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7134 (module_idx % NUM_OF_EDMA_PER_DCORE));
7137 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7140 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7143 if (module_idx == 8)
7144 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7146 if (module_idx == 9)
7147 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7149 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7150 (module_idx % NUM_OF_DEC_PER_DCORE) +
7151 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7154 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7157 return GAUDI2_ENGINE_ID_SIZE;
7162 * This function handles RR(Range register) hit events.
7163 * raised be initiators not PSOC RAZWI.
7165 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7166 enum razwi_event_sources module, u8 module_idx,
7167 u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info,
7170 bool via_sft = false, read_razwi_regs = false;
7171 u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
7172 u64 rtr_mstr_if_base_addr;
7173 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7174 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7175 char initiator_name[64];
7177 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7178 read_razwi_regs = true;
7182 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7183 sprintf(initiator_name, "TPC_%u", module_idx);
7186 sprintf(initiator_name, "MME_%u", module_idx);
7187 switch (module_sub_idx) {
7189 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7192 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7195 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7198 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7201 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7204 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7207 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7210 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7213 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7220 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7221 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7223 sprintf(initiator_name, "EDMA_%u", module_idx);
7226 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7227 sprintf(initiator_name, "PDMA_%u", module_idx);
7230 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7231 sprintf(initiator_name, "NIC_%u", module_idx);
7234 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7235 sprintf(initiator_name, "DEC_%u", module_idx);
7238 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7239 sprintf(initiator_name, "ROT_%u", module_idx);
7245 if (!read_razwi_regs) {
7246 if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7247 hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7249 hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7251 } else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7252 lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7254 lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7257 rtr_mstr_if_base_addr = 0;
7262 /* Find router mstr_if register base */
7264 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7265 dcore_id * SFT_DCORE_OFFSET +
7266 sft_id * SFT_IF_OFFSET +
7269 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7270 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7271 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7272 dcore_id * DCORE_OFFSET +
7273 dcore_rtr_id * DCORE_RTR_OFFSET +
7277 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
7278 hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7280 hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7283 /* SFT has separate MSTR_IF for LBW, only there we can
7284 * read the LBW razwi related registers
7288 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7289 RTR_LBW_MSTR_IF_OFFSET;
7291 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7293 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7295 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7297 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7301 /* check if there is no RR razwi indication at all */
7302 if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7305 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7307 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7308 initiator_name, read_razwi_regs, razwi_info,
7309 eng_id, event_mask);
7311 /* Clear event indication */
7312 if (read_razwi_regs)
7313 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7317 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7318 initiator_name, read_razwi_regs, razwi_info,
7319 eng_id, event_mask);
7321 /* Clear event indication */
7322 if (read_razwi_regs)
7323 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7327 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7328 initiator_name, read_razwi_regs, razwi_info,
7329 eng_id, event_mask);
7331 /* Clear event indication */
7332 if (read_razwi_regs)
7333 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7337 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7338 initiator_name, read_razwi_regs, razwi_info,
7339 eng_id, event_mask);
7341 /* Clear event indication */
7342 if (read_razwi_regs)
7343 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7347 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7349 struct asic_fixed_properties *prop = &hdev->asic_prop;
7350 u8 mod_idx, sub_mod;
7352 /* check all TPCs */
7353 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7354 if (prop->tpc_enabled_mask & BIT(mod_idx))
7355 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL,
7359 /* check all MMEs */
7360 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7361 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7362 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7363 sub_mod, NULL, NULL);
7365 /* check all EDMAs */
7366 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7367 if (prop->edma_enabled_mask & BIT(mod_idx))
7368 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL,
7371 /* check all PDMAs */
7372 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7373 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL,
7376 /* check all NICs */
7377 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7378 if (hdev->nic_ports_mask & BIT(mod_idx))
7379 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7382 /* check all DECs */
7383 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7384 if (prop->decoder_enabled_mask & BIT(mod_idx))
7385 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL,
7388 /* check all ROTs */
7389 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7390 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL);
7393 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7397 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7405 return "MME0_SBTE0/1";
7407 return "MME0_WAP0/SBTE2";
7409 return "MME0_CTRL_WR/SBTE3";
7411 return "MME0_WAP1/CTRL_RD/SBTE4";
7413 return "MME1_WAP1/CTRL_RD/SBTE4";
7415 return "MME1_CTRL_WR/SBTE3";
7417 return "MME1_WAP0/SBTE2";
7419 return "MME1_SBTE0/1";
7427 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7429 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7437 return "MME2_SBTE0/1";
7439 return "MME2_WAP0/SBTE2";
7441 return "MME2_CTRL_WR/SBTE3";
7443 return "MME2_WAP1/CTRL_RD/SBTE4";
7445 return "MME3_WAP1/CTRL_RD/SBTE4";
7447 return "MME3_CTRL_WR/SBTE3";
7449 return "MME3_WAP0/SBTE2";
7451 return "MME3_SBTE0/1";
7459 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7465 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7469 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7470 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7471 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7472 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7473 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7474 engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7475 engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7476 engines[7] = GAUDI2_ENGINE_ID_PCIE;
7477 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7478 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7479 engines[10] = GAUDI2_ENGINE_ID_PSOC;
7483 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7484 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7488 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7489 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7493 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7494 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7501 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7508 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7512 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7513 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7517 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7518 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7522 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7523 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7527 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7528 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7529 engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7530 engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7531 engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7532 engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7533 engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7534 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7535 engines[8] = GAUDI2_ENGINE_ID_KDMA;
7536 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7537 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7541 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7542 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7543 engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7544 engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7545 engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7546 engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7547 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7548 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7549 engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7553 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7554 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7558 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7559 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7563 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7564 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7571 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7577 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7580 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7581 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7584 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7585 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7588 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7589 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7592 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7593 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7594 engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7595 engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7596 engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7597 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7598 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7599 engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7600 engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7607 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7608 u64 rtr_ctrl_base_addr, bool is_write,
7611 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7612 u32 razwi_hi, razwi_lo;
7615 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7618 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7619 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7620 rd_wr_flag = HL_RAZWI_WRITE;
7622 /* Clear set indication */
7623 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7625 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7626 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7627 rd_wr_flag = HL_RAZWI_READ;
7629 /* Clear set indication */
7630 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7633 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7634 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7635 dev_err_ratelimited(hdev->dev,
7636 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7637 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7639 dev_err_ratelimited(hdev->dev,
7640 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7643 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7644 u64 rtr_ctrl_base_addr, bool is_write,
7647 u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7651 num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7654 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7655 rd_wr_flag = HL_RAZWI_WRITE;
7657 /* Clear set indication */
7658 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7660 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7661 rd_wr_flag = HL_RAZWI_READ;
7663 /* Clear set indication */
7664 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7667 hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7669 dev_err_ratelimited(hdev->dev,
7670 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7671 is_write ? "WR" : "RD", rtr_id, razwi_addr);
7673 dev_err_ratelimited(hdev->dev,
7674 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7677 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7678 static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7680 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7681 razwi_mask_info, razwi_intr = 0;
7682 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7683 u64 rtr_ctrl_base_addr;
7685 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7686 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7691 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7692 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7694 dev_err_ratelimited(hdev->dev,
7695 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7696 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7697 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7698 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7700 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7703 dev_err_ratelimited(hdev->dev,
7704 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7708 /* Find router id by router coordinates */
7709 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7710 if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7713 if (rtr_id == rtr_map_arr_len) {
7714 dev_err_ratelimited(hdev->dev,
7715 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7719 /* Find router mstr_if register base */
7720 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7721 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7722 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7723 dcore_rtr_id * DCORE_RTR_OFFSET;
7725 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7726 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7727 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7728 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7731 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7732 rtr_ctrl_base_addr, true, event_mask);
7735 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7736 rtr_ctrl_base_addr, false, event_mask);
7739 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7740 rtr_ctrl_base_addr, true, event_mask);
7743 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7744 rtr_ctrl_base_addr, false, event_mask);
7747 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7748 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7749 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7752 static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
7754 u32 i, sts_val, sts_clr_val = 0;
7756 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7758 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7759 if (sts_val & BIT(i)) {
7760 dev_err_ratelimited(hdev->dev, "QM SEI. err cause: %s\n",
7761 gaudi2_qm_sei_error_cause[i]);
7762 sts_clr_val |= BIT(i);
7766 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7769 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7770 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
7772 enum razwi_event_sources module;
7776 switch (event_type) {
7777 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7778 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7779 qman_base = mmDCORE0_TPC0_QM_BASE +
7780 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7781 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7784 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7785 qman_base = mmDCORE0_TPC6_QM_BASE;
7788 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7789 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7790 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7791 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7792 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7793 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7794 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7795 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7798 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7799 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7800 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7801 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7802 module = RAZWI_PDMA;
7804 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7805 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7806 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7807 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7814 _gaudi2_handle_qm_sei_err(hdev, qman_base);
7816 /* There is a single event per NIC macro, so should check its both QMAN blocks */
7817 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7818 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7819 _gaudi2_handle_qm_sei_err(hdev, qman_base + NIC_QM_OFFSET);
7821 /* check if RAZWI happened */
7823 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask);
7826 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7833 switch (event_type) {
7834 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7835 index = event_type - GAUDI2_EVENT_TPC0_QM;
7836 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7837 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7838 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC%d_QM", index);
7840 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7841 index = event_type - GAUDI2_EVENT_TPC6_QM;
7842 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7843 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7844 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_TPC%d_QM", index);
7846 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7847 index = event_type - GAUDI2_EVENT_TPC12_QM;
7848 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7849 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7850 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_TPC%d_QM", index);
7852 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7853 index = event_type - GAUDI2_EVENT_TPC18_QM;
7854 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7855 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7856 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_TPC%d_QM", index);
7858 case GAUDI2_EVENT_TPC24_QM:
7859 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7860 qman_base = mmDCORE0_TPC6_QM_BASE;
7861 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC6_QM");
7863 case GAUDI2_EVENT_MME0_QM:
7864 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7865 qman_base = mmDCORE0_MME_QM_BASE;
7866 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_MME_QM");
7868 case GAUDI2_EVENT_MME1_QM:
7869 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7870 qman_base = mmDCORE1_MME_QM_BASE;
7871 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_MME_QM");
7873 case GAUDI2_EVENT_MME2_QM:
7874 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7875 qman_base = mmDCORE2_MME_QM_BASE;
7876 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_MME_QM");
7878 case GAUDI2_EVENT_MME3_QM:
7879 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7880 qman_base = mmDCORE3_MME_QM_BASE;
7881 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_MME_QM");
7883 case GAUDI2_EVENT_HDMA0_QM:
7884 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7885 qman_base = mmDCORE0_EDMA0_QM_BASE;
7886 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA0_QM");
7888 case GAUDI2_EVENT_HDMA1_QM:
7889 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7890 qman_base = mmDCORE0_EDMA1_QM_BASE;
7891 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA1_QM");
7893 case GAUDI2_EVENT_HDMA2_QM:
7894 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7895 qman_base = mmDCORE1_EDMA0_QM_BASE;
7896 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA0_QM");
7898 case GAUDI2_EVENT_HDMA3_QM:
7899 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7900 qman_base = mmDCORE1_EDMA1_QM_BASE;
7901 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA1_QM");
7903 case GAUDI2_EVENT_HDMA4_QM:
7904 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7905 qman_base = mmDCORE2_EDMA0_QM_BASE;
7906 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA0_QM");
7908 case GAUDI2_EVENT_HDMA5_QM:
7909 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7910 qman_base = mmDCORE2_EDMA1_QM_BASE;
7911 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA1_QM");
7913 case GAUDI2_EVENT_HDMA6_QM:
7914 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7915 qman_base = mmDCORE3_EDMA0_QM_BASE;
7916 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA0_QM");
7918 case GAUDI2_EVENT_HDMA7_QM:
7919 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7920 qman_base = mmDCORE3_EDMA1_QM_BASE;
7921 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA1_QM");
7923 case GAUDI2_EVENT_PDMA0_QM:
7924 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7925 qman_base = mmPDMA0_QM_BASE;
7926 snprintf(desc, ARRAY_SIZE(desc), "PDMA0_QM");
7928 case GAUDI2_EVENT_PDMA1_QM:
7929 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7930 qman_base = mmPDMA1_QM_BASE;
7931 snprintf(desc, ARRAY_SIZE(desc), "PDMA1_QM");
7933 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7934 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7935 qman_base = mmROT0_QM_BASE;
7936 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR0_QM");
7938 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7939 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7940 qman_base = mmROT1_QM_BASE;
7941 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR1_QM");
7947 gaudi2_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7949 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7950 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7951 _gaudi2_handle_qm_sei_err(hdev, qman_base);
7954 static void gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev)
7956 u32 i, sts_val, sts_clr_val = 0;
7958 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7960 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7961 if (sts_val & BIT(i)) {
7962 dev_err_ratelimited(hdev->dev, "ARC SEI. err cause: %s\n",
7963 gaudi2_arc_sei_error_cause[i]);
7964 sts_clr_val |= BIT(i);
7968 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7971 static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev)
7973 u32 i, sts_val, sts_clr_val = 0;
7975 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
7977 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
7978 if (sts_val & BIT(i)) {
7979 dev_err_ratelimited(hdev->dev, "CPU SEI. err cause: %s\n",
7980 gaudi2_cpu_sei_error_cause[i]);
7981 sts_clr_val |= BIT(i);
7985 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
7988 static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
7989 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
7992 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7995 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
7996 if (intr_cause_data & BIT(i))
7997 dev_err_ratelimited(hdev->dev, "ROT%u. err cause: %s\n",
7998 rot_index, guadi2_rot_error_cause[i]);
8000 /* check if RAZWI happened */
8001 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
8002 &razwi_with_intr_cause->razwi_info, event_mask);
8005 static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name,
8006 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8009 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8012 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8013 if (intr_cause_data & BIT(i))
8014 dev_err_ratelimited(hdev->dev, "TPC%d_%s interrupt cause: %s\n",
8015 tpc_index, interrupt_name, gaudi2_tpc_interrupts_cause[i]);
8017 /* check if RAZWI happened */
8018 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
8019 &razwi_with_intr_cause->razwi_info, event_mask);
8022 static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name,
8023 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8025 u32 sts_addr, sts_val, sts_clr_val = 0;
8028 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8030 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8031 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8032 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8035 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8036 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8038 sts_val = RREG32(sts_addr);
8040 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8041 if (sts_val & BIT(i)) {
8042 dev_err_ratelimited(hdev->dev, "DEC%u_%s err cause: %s\n",
8043 dec_index, interrupt_name, gaudi2_dec_error_cause[i]);
8044 sts_clr_val |= BIT(i);
8048 /* check if RAZWI happened */
8049 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info,
8052 /* Write 1 clear errors */
8053 WREG32(sts_addr, sts_clr_val);
8056 static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name,
8057 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8059 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
8062 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8063 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8065 sts_val = RREG32(sts_addr);
8067 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8068 if (sts_val & BIT(i)) {
8069 dev_err_ratelimited(hdev->dev, "MME%u_%s err cause: %s\n",
8070 mme_index, interrupt_name, guadi2_mme_error_cause[i]);
8071 sts_clr_val |= BIT(i);
8075 /* check if RAZWI happened */
8076 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8077 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info,
8080 WREG32(sts_clr_addr, sts_clr_val);
8083 static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8 sbte_index,
8084 u64 intr_cause_data)
8088 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8089 if (intr_cause_data & BIT(i))
8090 dev_err_ratelimited(hdev->dev, "MME%uSBTE%u_AXI_ERR_RSP err cause: %s\n",
8091 mme_index, sbte_index, guadi2_mme_sbte_error_cause[i]);
8094 static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
8095 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8097 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
8100 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8101 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8103 sts_val = RREG32(sts_addr);
8105 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8106 if (sts_val & BIT(i)) {
8107 dev_err_ratelimited(hdev->dev,
8108 "MME%u_WAP_SOURCE_RESULT_INVALID err cause: %s\n",
8109 mme_index, guadi2_mme_wap_error_cause[i]);
8110 sts_clr_val |= BIT(i);
8114 /* check if RAZWI happened on WAP0/1 */
8115 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info,
8117 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info,
8120 WREG32(sts_clr_addr, sts_clr_val);
8123 static void gaudi2_handle_kdma_core_event(struct hl_device *hdev, u64 intr_cause_data)
8127 /* If an AXI read or write error is received, an error is reported and
8128 * interrupt message is sent. Due to an HW errata, when reading the cause
8129 * register of the KDMA engine, the reported error is always HBW even if
8130 * the actual error caused by a LBW KDMA transaction.
8132 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8133 if (intr_cause_data & BIT(i))
8134 dev_err_ratelimited(hdev->dev, "kdma core err cause: %s\n",
8135 gaudi2_kdma_core_interrupts_cause[i]);
8138 static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_data)
8142 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8143 if (intr_cause_data & BIT(i))
8144 dev_err_ratelimited(hdev->dev, "dma core err cause: %s\n",
8145 gaudi2_dma_core_interrupts_cause[i]);
8148 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8150 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8152 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8153 if (RREG32(razwi_happened_addr)) {
8154 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8155 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8156 WREG32(razwi_happened_addr, 0x1);
8159 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8160 if (RREG32(razwi_happened_addr)) {
8161 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8162 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8163 WREG32(razwi_happened_addr, 0x1);
8166 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8167 if (RREG32(razwi_happened_addr)) {
8168 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8169 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8170 WREG32(razwi_happened_addr, 0x1);
8173 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8174 if (RREG32(razwi_happened_addr)) {
8175 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8176 NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8177 WREG32(razwi_happened_addr, 0x1);
8181 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data,
8186 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8187 if (!(intr_cause_data & BIT_ULL(i)))
8190 dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
8191 gaudi2_pcie_addr_dec_error_cause[i]);
8193 switch (intr_cause_data & BIT_ULL(i)) {
8194 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8196 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8197 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8203 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
8208 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8209 if (intr_cause_data & BIT_ULL(i))
8210 dev_err_ratelimited(hdev->dev, "PMMU PIF err cause: %s\n",
8211 gaudi2_pmmu_fatal_interrupts_cause[i]);
8215 static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8217 u32 dcore_id, hif_id;
8220 dcore_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) / 4;
8221 hif_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) % 4;
8223 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8224 if (intr_cause_data & BIT_ULL(i))
8225 dev_err_ratelimited(hdev->dev, "DCORE%u_HIF%u: %s\n", dcore_id, hif_id,
8226 gaudi2_hif_fatal_interrupts_cause[i]);
8230 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8236 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8238 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8241 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8242 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8244 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8246 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8247 is_pmmu ? "PMMU" : "HMMU", addr);
8248 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8250 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8253 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8258 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8260 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8263 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8264 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8266 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8268 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8269 is_pmmu ? "PMMU" : "HMMU", addr);
8270 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8273 static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name,
8274 u64 mmu_base, bool is_pmmu, u64 *event_mask)
8276 u32 spi_sei_cause, interrupt_clr = 0x0;
8279 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8281 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8282 if (spi_sei_cause & BIT(i)) {
8283 dev_err_ratelimited(hdev->dev, "%s SPI_SEI ERR. err cause: %s\n",
8284 mmu_name, gaudi2_mmu_spi_sei[i].cause);
8287 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8289 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8291 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8292 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8297 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8299 /* Clear interrupt */
8300 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8303 static void gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
8305 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log;
8306 u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index;
8309 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8310 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8312 sei_cause_val = RREG32(sei_cause_addr);
8313 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8314 cq_intr_val = RREG32(cq_intr_addr);
8317 if (sei_cause_cause) {
8318 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8319 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8322 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8323 if (!(sei_cause_cause & BIT(i)))
8326 dev_err_ratelimited(hdev->dev, "SM%u SEI ERR. err cause: %s. %s: 0x%X\n",
8328 gaudi2_sm_sei_cause[i].cause_name,
8329 gaudi2_sm_sei_cause[i].log_name,
8330 sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
8334 /* Clear SM_SEI_CAUSE */
8335 WREG32(sei_cause_addr, 0);
8339 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8340 cq_intr_queue_index =
8341 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8344 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8345 sm_index, cq_intr_queue_index);
8348 WREG32(cq_intr_addr, 0);
8352 static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8354 bool is_pmmu = false;
8359 switch (event_type) {
8360 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8361 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8362 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8363 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8365 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8366 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8367 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8368 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8370 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8371 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8372 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8373 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8375 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8376 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8377 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8378 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8380 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8381 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8382 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8383 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8385 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8386 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8387 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8388 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8390 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8391 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8392 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8393 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8395 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8396 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8397 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8398 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8400 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8401 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8403 mmu_base = mmPMMU_HBW_MMU_BASE;
8404 snprintf(desc, ARRAY_SIZE(desc), "PMMU");
8410 gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu, event_mask);
8414 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8415 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8416 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8418 u32 addr, beat, beat_shift;
8421 dev_err_ratelimited(hdev->dev,
8422 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8423 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8424 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8425 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8427 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8428 dev_err_ratelimited(hdev->dev,
8429 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8430 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8431 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8432 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8433 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8434 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8436 /* For each beat (RDQS edge), look for possible errors and print relevant info */
8437 for (beat = 0 ; beat < 4 ; beat++) {
8438 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8439 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8440 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8442 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8443 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8445 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8446 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8447 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8449 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8450 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8454 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8455 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8456 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8457 dev_err_ratelimited(hdev->dev,
8458 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8460 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8461 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8462 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8463 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8467 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8468 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8469 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8470 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8471 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8477 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8478 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8480 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8481 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8483 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8485 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8486 derr & 0x3, derr & 0xc);
8488 /* JIRA H6-3286 - the following prints may not be valid */
8489 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8490 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8491 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8492 dev_err_ratelimited(hdev->dev,
8493 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8495 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8496 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8497 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8498 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8502 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8503 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8505 __le32 *col_cmd = ca_par_err_data->dbg_col;
8506 __le16 *row_cmd = ca_par_err_data->dbg_row;
8509 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8511 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8512 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8513 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8514 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8515 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8518 /* Returns true if hard reset is needed or false otherwise */
8519 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8520 struct hl_eq_hbm_sei_data *sei_data)
8522 bool require_hard_reset = false;
8523 u32 hbm_id, mc_id, cause_idx;
8525 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8526 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8528 cause_idx = sei_data->hdr.sei_cause;
8529 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8530 dev_err_ratelimited(hdev->dev, "Invalid HBM SEI event cause (%d) provided by FW\n",
8535 if (sei_data->hdr.is_critical)
8537 "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8538 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8539 hbm_mc_sei_cause[cause_idx]);
8542 dev_err_ratelimited(hdev->dev,
8543 "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8544 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8545 hbm_mc_sei_cause[cause_idx]);
8547 /* Print error-specific info */
8548 switch (cause_idx) {
8549 case HBM_SEI_CATTRIP:
8550 require_hard_reset = true;
8553 case HBM_SEI_CMD_PARITY_EVEN:
8554 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8555 le32_to_cpu(sei_data->hdr.cnt));
8556 require_hard_reset = true;
8559 case HBM_SEI_CMD_PARITY_ODD:
8560 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8561 le32_to_cpu(sei_data->hdr.cnt));
8562 require_hard_reset = true;
8565 case HBM_SEI_WRITE_DATA_PARITY_ERR:
8566 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8567 le32_to_cpu(sei_data->hdr.cnt));
8568 require_hard_reset = true;
8571 case HBM_SEI_READ_ERR:
8572 /* Unlike other SEI events, read error requires further processing of the
8573 * raw data in order to determine the root cause.
8575 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8576 &sei_data->read_err_info,
8577 le32_to_cpu(sei_data->hdr.cnt));
8584 require_hard_reset |= !!sei_data->hdr.is_critical;
8586 return require_hard_reset;
8589 static void gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u64 intr_cause_data)
8592 "HBM catastrophic temperature error (CATTRIP) cause %#llx\n",
8596 static void gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8600 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8601 if (intr_cause_data & hbm_mc_spi[i].mask)
8602 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8603 hbm_mc_spi[i].cause);
8606 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
8608 ktime_t zero_time = ktime_set(0, 0);
8610 mutex_lock(&hdev->clk_throttling.lock);
8612 switch (event_type) {
8613 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8614 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8615 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8616 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8617 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8618 dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8621 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8622 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8623 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8624 dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8627 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8628 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8629 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8630 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8631 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8632 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8635 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8636 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8637 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8638 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8642 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8646 mutex_unlock(&hdev->clk_throttling.lock);
8649 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev,
8650 struct cpucp_pkt_sync_err *sync_err)
8652 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8654 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8655 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8658 static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev)
8660 u32 p2p_intr, msix_gw_intr;
8662 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8663 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8666 dev_err_ratelimited(hdev->dev,
8667 "pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8668 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8670 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8674 dev_err_ratelimited(hdev->dev,
8675 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8676 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8678 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8682 static void gaudi2_handle_pcie_drain(struct hl_device *hdev,
8683 struct hl_eq_pcie_drain_ind_data *drain_data)
8685 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause;
8687 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8688 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8689 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8690 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8691 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8693 if (cause & BIT_ULL(0))
8694 dev_err_ratelimited(hdev->dev,
8695 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8696 !!lbw_rd, !!lbw_wr);
8698 if (cause & BIT_ULL(1))
8699 dev_err_ratelimited(hdev->dev,
8700 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8704 static void gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8708 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8709 if (intr_cause_data & BIT_ULL(i))
8710 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8711 gaudi2_psoc_axi_drain_interrupts_cause[i]);
8715 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev,
8716 struct cpucp_pkt_sync_err *sync_err)
8718 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8721 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8722 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8725 static void hl_arc_event_handle(struct hl_device *hdev,
8726 struct hl_eq_engine_arc_intr_data *data)
8728 struct hl_engine_arc_dccm_queue_full_irq *q;
8729 u32 intr_type, engine_id;
8732 intr_type = le32_to_cpu(data->intr_type);
8733 engine_id = le32_to_cpu(data->engine_id);
8734 payload = le64_to_cpu(data->payload);
8736 switch (intr_type) {
8737 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8738 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8740 dev_err_ratelimited(hdev->dev,
8741 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8742 engine_id, intr_type, q->queue_index);
8745 dev_err_ratelimited(hdev->dev, "Unknown ARC event type\n");
8749 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8751 struct gaudi2_device *gaudi2 = hdev->asic_specific;
8752 bool reset_required = false, is_critical = false;
8753 u32 ctl, reset_flags = HL_DRV_RESET_HARD;
8754 int index, sbte_index;
8758 ctl = le32_to_cpu(eq_entry->hdr.ctl);
8759 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8761 if (event_type >= GAUDI2_EVENT_SIZE) {
8762 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8763 event_type, GAUDI2_EVENT_SIZE - 1);
8767 gaudi2->events_stat[event_type]++;
8768 gaudi2->events_stat_aggregate[event_type]++;
8770 gaudi2_print_irq_info(hdev, event_type);
8772 switch (event_type) {
8773 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8775 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8776 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8777 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8778 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8779 is_critical = eq_entry->ecc_data.is_critical;
8782 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8784 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8786 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8787 gaudi2_handle_qman_err(hdev, event_type);
8788 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8791 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8792 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8793 gaudi2_handle_arc_farm_sei_err(hdev);
8794 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8797 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8798 gaudi2_handle_cpu_sei_err(hdev);
8799 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8802 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8803 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8804 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8805 gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info, &event_mask);
8806 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8809 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8810 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8811 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8812 gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause, &event_mask);
8813 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8814 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8817 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8818 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8819 gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
8820 &eq_entry->razwi_with_intr_cause, &event_mask);
8821 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8822 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8825 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8826 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8827 gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info,
8829 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8832 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8833 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8834 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8835 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8836 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8837 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8838 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8839 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8840 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8841 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8842 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8843 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8844 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8845 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8846 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8847 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8848 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8849 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8850 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8851 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8852 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8853 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8854 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8855 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8856 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8857 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8858 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8859 gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause,
8861 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8864 case GAUDI2_EVENT_DEC0_SPI:
8865 case GAUDI2_EVENT_DEC1_SPI:
8866 case GAUDI2_EVENT_DEC2_SPI:
8867 case GAUDI2_EVENT_DEC3_SPI:
8868 case GAUDI2_EVENT_DEC4_SPI:
8869 case GAUDI2_EVENT_DEC5_SPI:
8870 case GAUDI2_EVENT_DEC6_SPI:
8871 case GAUDI2_EVENT_DEC7_SPI:
8872 case GAUDI2_EVENT_DEC8_SPI:
8873 case GAUDI2_EVENT_DEC9_SPI:
8874 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8875 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8876 gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info, &event_mask);
8877 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8880 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8881 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8882 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8883 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8884 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8885 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8886 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8887 gaudi2_handle_mme_err(hdev, index,
8888 "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info, &event_mask);
8889 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8890 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8893 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8894 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8895 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8896 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8897 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8898 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
8899 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
8900 gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info,
8902 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8905 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
8906 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
8907 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
8908 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
8909 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
8910 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
8911 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
8912 gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info, &event_mask);
8913 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8916 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
8917 case GAUDI2_EVENT_KDMA0_CORE:
8918 gaudi2_handle_kdma_core_event(hdev,
8919 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8920 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8923 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
8924 gaudi2_handle_dma_core_event(hdev,
8925 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8926 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8929 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
8930 gaudi2_print_pcie_addr_dec_info(hdev,
8931 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
8932 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8933 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8936 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8937 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8938 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8939 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8940 gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
8941 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8942 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8945 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
8946 gaudi2_handle_hif_fatal(hdev, event_type,
8947 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8948 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8949 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8952 case GAUDI2_EVENT_PMMU_FATAL_0:
8953 gaudi2_handle_pif_fatal(hdev,
8954 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8955 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8956 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8959 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
8960 gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
8961 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8964 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
8965 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8966 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
8967 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8968 reset_required = true;
8972 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
8973 gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8974 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8977 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
8978 gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8979 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8982 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
8983 gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
8984 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8987 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
8988 gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8989 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8992 case GAUDI2_EVENT_CPU_AXI_ECC:
8993 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8994 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8996 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
8997 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8998 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9000 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9001 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9002 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9003 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9004 index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) /
9005 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
9006 GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
9007 sbte_index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) %
9008 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
9009 GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
9010 gaudi2_handle_mme_sbte_err(hdev, index, sbte_index,
9011 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9012 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9014 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9015 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9016 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9018 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9019 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9021 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9022 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9024 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9025 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9026 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9028 case GAUDI2_EVENT_PCIE_FATAL_ERR:
9029 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9031 case GAUDI2_EVENT_TPC0_BMON_SPMU:
9032 case GAUDI2_EVENT_TPC1_BMON_SPMU:
9033 case GAUDI2_EVENT_TPC2_BMON_SPMU:
9034 case GAUDI2_EVENT_TPC3_BMON_SPMU:
9035 case GAUDI2_EVENT_TPC4_BMON_SPMU:
9036 case GAUDI2_EVENT_TPC5_BMON_SPMU:
9037 case GAUDI2_EVENT_TPC6_BMON_SPMU:
9038 case GAUDI2_EVENT_TPC7_BMON_SPMU:
9039 case GAUDI2_EVENT_TPC8_BMON_SPMU:
9040 case GAUDI2_EVENT_TPC9_BMON_SPMU:
9041 case GAUDI2_EVENT_TPC10_BMON_SPMU:
9042 case GAUDI2_EVENT_TPC11_BMON_SPMU:
9043 case GAUDI2_EVENT_TPC12_BMON_SPMU:
9044 case GAUDI2_EVENT_TPC13_BMON_SPMU:
9045 case GAUDI2_EVENT_TPC14_BMON_SPMU:
9046 case GAUDI2_EVENT_TPC15_BMON_SPMU:
9047 case GAUDI2_EVENT_TPC16_BMON_SPMU:
9048 case GAUDI2_EVENT_TPC17_BMON_SPMU:
9049 case GAUDI2_EVENT_TPC18_BMON_SPMU:
9050 case GAUDI2_EVENT_TPC19_BMON_SPMU:
9051 case GAUDI2_EVENT_TPC20_BMON_SPMU:
9052 case GAUDI2_EVENT_TPC21_BMON_SPMU:
9053 case GAUDI2_EVENT_TPC22_BMON_SPMU:
9054 case GAUDI2_EVENT_TPC23_BMON_SPMU:
9055 case GAUDI2_EVENT_TPC24_BMON_SPMU:
9056 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9057 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9058 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9059 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9060 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9061 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9062 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9063 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9064 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9065 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9066 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9067 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9068 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9070 case GAUDI2_EVENT_DEC0_BMON_SPMU:
9071 case GAUDI2_EVENT_DEC1_BMON_SPMU:
9072 case GAUDI2_EVENT_DEC2_BMON_SPMU:
9073 case GAUDI2_EVENT_DEC3_BMON_SPMU:
9074 case GAUDI2_EVENT_DEC4_BMON_SPMU:
9075 case GAUDI2_EVENT_DEC5_BMON_SPMU:
9076 case GAUDI2_EVENT_DEC6_BMON_SPMU:
9077 case GAUDI2_EVENT_DEC7_BMON_SPMU:
9078 case GAUDI2_EVENT_DEC8_BMON_SPMU:
9079 case GAUDI2_EVENT_DEC9_BMON_SPMU:
9080 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9081 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9084 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9085 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9086 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9087 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9088 gaudi2_print_clk_change_info(hdev, event_type);
9089 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9092 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9093 gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
9094 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9097 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9098 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9099 /* Do nothing- FW will handle it */
9102 case GAUDI2_EVENT_PCIE_P2P_MSIX:
9103 gaudi2_handle_pcie_p2p_msix(hdev);
9104 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9107 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9108 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9109 gaudi2_handle_sm_err(hdev, index);
9110 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9113 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9114 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9117 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9118 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9119 le64_to_cpu(eq_entry->data[0]));
9120 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9122 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9123 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9124 le64_to_cpu(eq_entry->data[0]));
9125 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9128 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9129 gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err);
9130 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9133 case GAUDI2_EVENT_ARC_DCCM_FULL:
9134 hl_arc_event_handle(hdev, &eq_entry->arc_data);
9135 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9138 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9139 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9144 if (gaudi2_irq_map_table[event_type].valid)
9145 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9149 if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9150 (hdev->hard_reset_on_fw_events ||
9151 (hdev->asic_prop.fw_security_enabled && is_critical)))
9154 /* Send unmask irq only for interrupts not classified as MSG */
9155 if (!gaudi2_irq_map_table[event_type].msg)
9156 hl_fw_unmask_irq(hdev, event_type);
9159 hl_notifier_event_send_all(hdev, event_mask);
9164 if (hdev->asic_prop.fw_security_enabled && is_critical) {
9165 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9166 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9168 reset_flags |= HL_DRV_RESET_DELAY;
9170 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9171 hl_device_cond_reset(hdev, reset_flags, event_mask);
9174 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9175 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9176 u32 hw_queue_id, u32 size, u64 addr, u32 val)
9181 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9182 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9183 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9184 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9186 lin_dma_pkt->ctl = cpu_to_le32(ctl);
9187 lin_dma_pkt->src_addr = cpu_to_le64(val);
9188 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9189 lin_dma_pkt->tsize = cpu_to_le32(size);
9191 pkt_size = sizeof(struct packet_lin_dma);
9193 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9195 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9201 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9203 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9204 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9205 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9206 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9207 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9208 old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9209 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9210 struct asic_fixed_properties *prop = &hdev->asic_prop;
9211 void *lin_dma_pkts_arr;
9212 dma_addr_t pkt_dma_addr;
9213 int rc = 0, dma_num = 0;
9215 if (prop->edma_enabled_mask == 0) {
9216 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9220 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9221 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9222 comp_addr = CFG_BASE + sob_addr;
9223 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9224 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9225 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9226 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9228 /* Calculate how many lin dma pkts we'll need */
9229 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9230 pkt_size = sizeof(struct packet_lin_dma);
9232 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9233 &pkt_dma_addr, GFP_KERNEL);
9234 if (!lin_dma_pkts_arr)
9238 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9239 * only the first one to restore later
9240 * also set the sob addr for all edma cores for completion.
9241 * set QM as trusted to allow it to access physical address with MMU bp.
9243 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9244 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9245 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9246 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9247 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9249 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9252 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9253 edma_offset, mmubp);
9254 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9255 lower_32_bits(comp_addr));
9256 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9257 upper_32_bits(comp_addr));
9258 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9260 gaudi2_qman_set_test_mode(hdev,
9261 edma_queues_id[dcore] + 4 * edma_idx, true);
9265 WREG32(sob_addr, 0);
9267 while (cur_addr < end_addr) {
9268 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9269 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9270 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9272 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9275 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9277 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9278 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9279 pkt_dma_addr + dma_num * pkt_size,
9280 edma_queues_id[dcore] + edma_idx * 4,
9281 chunk_size, cur_addr, val);
9286 cur_addr += chunk_size;
9287 if (cur_addr == end_addr)
9293 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9295 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9299 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9300 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9301 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9302 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9304 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9307 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9308 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9309 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9310 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9311 gaudi2_qman_set_test_mode(hdev,
9312 edma_queues_id[dcore] + 4 * edma_idx, false);
9316 WREG32(sob_addr, 0);
9317 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9322 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9325 struct asic_fixed_properties *prop = &hdev->asic_prop;
9326 u64 size = prop->dram_end_address - prop->dram_user_base_address;
9328 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9331 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9332 prop->dram_user_base_address, size);
9336 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9339 struct asic_fixed_properties *prop = &hdev->asic_prop;
9340 u64 val = hdev->memory_scrub_val;
9343 if (!hdev->memory_scrub)
9347 addr = prop->sram_user_base_address;
9348 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9349 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9350 addr, addr + size, val);
9351 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9353 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9358 rc = gaudi2_scrub_device_dram(hdev, val);
9360 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9366 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9368 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9369 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9370 u32 val, size, offset;
9373 offset = hdev->asic_prop.first_available_cq[0] * 4;
9374 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9375 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9376 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9377 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9378 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9379 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9380 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9381 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9383 /* memset dcore0 CQ registers */
9384 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9385 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9386 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9387 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9388 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9389 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9391 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9392 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9393 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9394 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9395 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9396 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9397 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9399 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9400 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9401 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9402 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9403 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9404 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9405 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9407 cq_lbw_l_addr += DCORE_OFFSET;
9408 cq_lbw_h_addr += DCORE_OFFSET;
9409 cq_lbw_data_addr += DCORE_OFFSET;
9410 cq_base_l_addr += DCORE_OFFSET;
9411 cq_base_h_addr += DCORE_OFFSET;
9412 cq_size_addr += DCORE_OFFSET;
9415 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9416 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9417 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9418 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9420 /* memset dcore0 monitors */
9421 gaudi2_memset_device_lbw(hdev, addr, size, val);
9423 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9424 gaudi2_memset_device_lbw(hdev, addr, size, 0);
9426 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9427 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9428 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9430 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9431 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9432 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9433 mon_sts_addr += DCORE_OFFSET;
9434 mon_cfg_addr += DCORE_OFFSET;
9437 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9438 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9440 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9441 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9443 /* memset dcore0 sobs */
9444 gaudi2_memset_device_lbw(hdev, addr, size, val);
9446 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9447 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9449 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9450 gaudi2_memset_device_lbw(hdev, addr, size, val);
9451 addr += DCORE_OFFSET;
9454 /* Flush all WREG to prevent race */
9455 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9458 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9460 u32 reg_base, hw_queue_id;
9462 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9463 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9464 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9467 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9469 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9470 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9473 /* Flush all WREG to prevent race */
9474 RREG32(mmPDMA0_QM_ARB_CFG_0);
9477 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9479 u32 reg_base, hw_queue_id;
9481 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9482 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9483 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9486 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9488 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9489 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9492 /* Flush all WREG to prevent race */
9493 RREG32(mmPDMA0_QM_ARB_CFG_0);
9496 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9501 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9505 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9506 struct dup_block_ctx *cfg_ctx)
9508 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9512 for (i = 0 ; i < cfg_ctx->instances ; i++) {
9513 seq = block_idx * cfg_ctx->instances + i;
9515 /* skip disabled instance */
9516 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9519 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9524 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9529 cfg_ctx->enabled_mask = mask;
9531 for (i = 0 ; i < cfg_ctx->blocks ; i++)
9532 gaudi2_init_block_instances(hdev, i, cfg_ctx);
9535 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9537 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9540 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9542 void *host_mem_virtual_addr;
9543 dma_addr_t host_mem_dma_addr;
9544 u64 reserved_va_base;
9545 u32 pos, size_left, size_to_dma;
9550 ctx = hl_get_compute_ctx(hdev);
9552 dev_err(hdev->dev, "No ctx available\n");
9556 /* Allocate buffers for read and for poll */
9557 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9558 GFP_KERNEL | __GFP_ZERO);
9559 if (host_mem_virtual_addr == NULL) {
9560 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9565 /* Reserve VM region on asic side */
9566 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9567 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9568 if (!reserved_va_base) {
9569 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9571 goto free_data_buffer;
9574 /* Create mapping on asic side */
9575 mutex_lock(&hdev->mmu_lock);
9576 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9577 hl_mmu_invalidate_cache_range(hdev, false,
9578 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9579 ctx->asid, reserved_va_base, SZ_2M);
9580 mutex_unlock(&hdev->mmu_lock);
9582 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9586 /* Enable MMU on KDMA */
9587 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9591 size_to_dma = SZ_2M;
9593 while (size_left > 0) {
9594 if (size_left < SZ_2M)
9595 size_to_dma = size_left;
9597 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9601 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9603 if (size_left <= SZ_2M)
9611 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9613 mutex_lock(&hdev->mmu_lock);
9614 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9615 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9616 ctx->asid, reserved_va_base, SZ_2M);
9617 mutex_unlock(&hdev->mmu_lock);
9619 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9621 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9628 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9630 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9631 int min_alloc_order, rc;
9633 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9636 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9637 HOST_SPACE_INTERNAL_CB_SZ,
9638 &hdev->internal_cb_pool_dma_addr,
9639 GFP_KERNEL | __GFP_ZERO);
9641 if (!hdev->internal_cb_pool_virt_addr)
9644 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9645 gaudi2_get_wait_cb_size(hdev)));
9647 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9648 if (!hdev->internal_cb_pool) {
9649 dev_err(hdev->dev, "Failed to create internal CB pool\n");
9651 goto free_internal_cb_pool;
9654 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9655 HOST_SPACE_INTERNAL_CB_SZ, -1);
9657 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9659 goto destroy_internal_cb_pool;
9662 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9663 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9665 if (!hdev->internal_cb_va_base) {
9667 goto destroy_internal_cb_pool;
9670 mutex_lock(&hdev->mmu_lock);
9671 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9672 HOST_SPACE_INTERNAL_CB_SZ);
9673 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9674 mutex_unlock(&hdev->mmu_lock);
9677 goto unreserve_internal_cb_pool;
9681 unreserve_internal_cb_pool:
9682 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9683 destroy_internal_cb_pool:
9684 gen_pool_destroy(hdev->internal_cb_pool);
9685 free_internal_cb_pool:
9686 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9687 hdev->internal_cb_pool_dma_addr);
9692 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9694 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9696 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9699 mutex_lock(&hdev->mmu_lock);
9700 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9701 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9702 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9703 mutex_unlock(&hdev->mmu_lock);
9705 gen_pool_destroy(hdev->internal_cb_pool);
9707 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9708 hdev->internal_cb_pool_dma_addr);
9711 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9713 gaudi2_restore_user_sm_registers(hdev);
9714 gaudi2_restore_user_qm_registers(hdev);
9717 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9719 struct hl_device *hdev = ctx->hdev;
9720 struct asic_fixed_properties *prop = &hdev->asic_prop;
9721 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9724 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9725 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9727 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9728 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9733 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9735 struct hl_device *hdev = ctx->hdev;
9736 struct asic_fixed_properties *prop = &hdev->asic_prop;
9739 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9740 prop->pmmu.page_size, true);
9742 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9743 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9746 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9750 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9754 /* No need to clear user registers if the device has just
9755 * performed reset, we restore only nic qm registers
9757 if (ctx->hdev->reset_upon_device_release)
9758 gaudi2_restore_nic_qm_registers(ctx->hdev);
9760 gaudi2_restore_user_registers(ctx->hdev);
9762 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9766 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9768 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9773 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9775 if (ctx->asid == HL_KERNEL_ASID_ID)
9778 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9780 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9783 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9785 struct hl_device *hdev = cs->ctx->hdev;
9786 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9787 u32 mon_payload, sob_id, mon_id;
9789 if (!cs_needs_completion(cs))
9793 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9794 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9795 * cyclic index. The SOB value is increased when each of the CS jobs is
9796 * completed. When the SOB reaches the number of CS jobs, the monitor
9797 * generates MSI-X interrupt.
9800 sob_id = mon_id = index;
9801 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9802 (1 << CQ_ENTRY_READY_SHIFT) | index;
9804 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9810 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9812 return HL_INVALID_QUEUE;
9815 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9817 struct hl_cb *cb = data;
9818 struct packet_msg_short *pkt;
9819 u32 value, ctl, pkt_size = sizeof(*pkt);
9821 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9822 memset(pkt, 0, pkt_size);
9824 /* Inc by 1, Mode ADD */
9825 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9826 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9828 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9829 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9830 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9831 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9832 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9834 pkt->value = cpu_to_le32(value);
9835 pkt->ctl = cpu_to_le32(ctl);
9837 return size + pkt_size;
9840 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9842 u32 ctl, pkt_size = sizeof(*pkt);
9844 memset(pkt, 0, pkt_size);
9846 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9847 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9848 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9849 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9850 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9852 pkt->value = cpu_to_le32(value);
9853 pkt->ctl = cpu_to_le32(ctl);
9858 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9859 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9861 u32 ctl, value, pkt_size = sizeof(*pkt);
9864 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9865 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9869 memset(pkt, 0, pkt_size);
9871 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
9872 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
9873 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
9874 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
9876 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9877 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9878 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9879 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9880 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9882 pkt->value = cpu_to_le32(value);
9883 pkt->ctl = cpu_to_le32(ctl);
9888 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
9890 u32 ctl, cfg, pkt_size = sizeof(*pkt);
9892 memset(pkt, 0, pkt_size);
9894 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
9895 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
9896 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
9898 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
9899 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9900 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9902 pkt->cfg = cpu_to_le32(cfg);
9903 pkt->ctl = cpu_to_le32(ctl);
9908 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
9910 struct hl_cb *cb = prop->data;
9911 void *buf = (void *) (uintptr_t) (cb->kernel_address);
9913 u64 monitor_base, fence_addr = 0;
9914 u32 stream_index, size = prop->size;
9915 u16 msg_addr_offset;
9917 stream_index = prop->q_idx % 4;
9918 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
9919 QM_FENCE2_OFFSET + stream_index * 4;
9922 * monitor_base should be the content of the base0 address registers,
9923 * so it will be added to the msg short offsets
9925 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9927 /* First monitor config packet: low address of the sync */
9928 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
9931 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
9933 /* Second monitor config packet: high address of the sync */
9934 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
9937 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
9940 * Third monitor config packet: the payload, i.e. what to write when the
9943 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
9946 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9948 /* Fourth monitor config packet: bind the monitor to a sync object */
9949 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
9951 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
9952 prop->sob_val, msg_addr_offset);
9955 size += gaudi2_add_fence_pkt(buf + size);
9960 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
9962 struct hl_hw_sob *hw_sob = data;
9964 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
9966 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
9968 kref_init(&hw_sob->kref);
9971 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
9975 static u64 gaudi2_get_device_time(struct hl_device *hdev)
9977 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9979 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9982 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
9987 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
9988 struct hl_cs *cs, u32 wait_queue_id,
9989 u32 collective_engine_id, u32 encaps_signal_offset)
9995 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
9996 * to DMMU page-size address (64MB) before mapping it in
9998 * The operation is performed on both the virtual and physical addresses.
9999 * for device with 6 HBMs the scramble is:
10000 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10003 * =============================================================================
10004 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
10005 * Phys address in MMU last
10007 * =============================================================================
10008 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
10009 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
10010 * =============================================================================
10012 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10014 struct asic_fixed_properties *prop = &hdev->asic_prop;
10015 u32 divisor, mod_va;
10018 /* accept any address in the DRAM address space */
10019 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10020 VA_HBM_SPACE_END)) {
10022 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10023 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10024 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10025 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10026 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10032 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10034 struct asic_fixed_properties *prop = &hdev->asic_prop;
10035 u32 divisor, mod_va;
10038 /* accept any address in the DRAM address space */
10039 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10040 VA_HBM_SPACE_END)) {
10042 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10043 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10044 PAGE_SIZE_64MB, &mod_va);
10046 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10047 (div_va * divisor + mod_va));
10050 return scrambled_addr;
10053 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10055 u32 base = 0, dcore_id, dec_id;
10057 if (core_id >= NUMBER_OF_DEC) {
10058 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10063 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10064 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10066 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10067 dec_id * DCORE_VDEC_OFFSET;
10069 /* PCIe Shared Decoder */
10070 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10076 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10077 u32 *block_size, u32 *block_id)
10079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10082 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10083 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10086 *block_size = gaudi2->mapped_blocks[i].size;
10091 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10096 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10097 u32 block_id, u32 block_size)
10099 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10104 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10105 dev_err(hdev->dev, "Invalid block id %u", block_id);
10109 /* we allow mapping only an entire block */
10110 if (block_size != gaudi2->mapped_blocks[block_id].size) {
10111 dev_err(hdev->dev, "Invalid block size %u", block_size);
10115 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10117 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10119 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10120 VM_DONTCOPY | VM_NORESERVE;
10122 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10123 block_size, vma->vm_page_prot);
10125 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10130 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10132 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10134 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10135 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10137 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10138 WREG32(irq_handler_offset,
10139 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10142 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10145 case HW_CAP_DCORE0_DMMU0:
10146 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10148 case HW_CAP_DCORE0_DMMU1:
10149 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10151 case HW_CAP_DCORE0_DMMU2:
10152 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10154 case HW_CAP_DCORE0_DMMU3:
10155 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10157 case HW_CAP_DCORE1_DMMU0:
10158 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10160 case HW_CAP_DCORE1_DMMU1:
10161 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10163 case HW_CAP_DCORE1_DMMU2:
10164 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10166 case HW_CAP_DCORE1_DMMU3:
10167 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10169 case HW_CAP_DCORE2_DMMU0:
10170 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10172 case HW_CAP_DCORE2_DMMU1:
10173 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10175 case HW_CAP_DCORE2_DMMU2:
10176 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10178 case HW_CAP_DCORE2_DMMU3:
10179 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10181 case HW_CAP_DCORE3_DMMU0:
10182 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10184 case HW_CAP_DCORE3_DMMU1:
10185 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10187 case HW_CAP_DCORE3_DMMU2:
10188 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10190 case HW_CAP_DCORE3_DMMU3:
10191 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10194 *mmu_base = mmPMMU_HBW_MMU_BASE;
10203 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10205 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10206 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10209 if (!(gaudi2->hw_cap_initialized & mmu_id))
10212 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10215 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10216 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10219 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10221 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10223 /* check all HMMUs */
10224 for (i = 0 ; i < num_of_hmmus ; i++) {
10225 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10227 if (mmu_cap_mask & mmu_id)
10228 gaudi2_ack_mmu_error(hdev, mmu_id);
10232 if (mmu_cap_mask & HW_CAP_PMMU)
10233 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10238 static void gaudi2_get_msi_info(__le32 *table)
10240 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10243 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10246 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10247 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10248 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10249 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10250 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10251 case HL_GAUDI2_MME_PLL: return MME_PLL;
10252 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10253 case HL_GAUDI2_IF_PLL: return IF_PLL;
10254 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10255 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10256 case HL_GAUDI2_VID_PLL: return VID_PLL;
10257 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10258 default: return -EINVAL;
10262 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10264 /* Not implemented */
10268 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10270 /* Not implemented */
10274 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10275 struct hl_device *hdev, struct hl_mon_state_dump *mon)
10277 /* Not implemented */
10282 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10283 u64 status_base_offset, enum hl_sync_engine_type engine_type,
10284 u32 engine_id, char **buf, size_t *size, size_t *offset)
10286 /* Not implemented */
10291 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10292 .monitor_valid = gaudi2_monitor_valid,
10293 .print_single_monitor = gaudi2_print_single_monitor,
10294 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10295 .print_fences_single_engine = gaudi2_print_fences_single_engine,
10298 static void gaudi2_state_dump_init(struct hl_device *hdev)
10300 /* Not implemented */
10301 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10302 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10305 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10310 static u32 *gaudi2_get_stream_master_qid_arr(void)
10315 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10316 struct attribute_group *dev_vrm_attr_grp)
10318 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10319 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10322 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10323 u32 page_size, u32 *real_page_size, bool is_dram_addr)
10325 struct asic_fixed_properties *prop = &hdev->asic_prop;
10327 /* for host pages the page size must be */
10328 if (!is_dram_addr) {
10329 if (page_size % mmu_prop->page_size)
10330 goto page_size_err;
10332 *real_page_size = mmu_prop->page_size;
10336 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10337 goto page_size_err;
10340 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10341 * than DRAM page size).
10342 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10343 * this mismatch when calculating the address to place in the MMU page table.
10344 * (in that case also make sure that the dram_page_size is not greater than the
10347 *real_page_size = prop->dram_page_size;
10352 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10353 page_size, mmu_prop->page_size >> 10);
10357 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10359 return -EOPNOTSUPP;
10362 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10364 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10366 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10369 return hl_fw_send_device_activity(hdev, open);
10372 static const struct hl_asic_funcs gaudi2_funcs = {
10373 .early_init = gaudi2_early_init,
10374 .early_fini = gaudi2_early_fini,
10375 .late_init = gaudi2_late_init,
10376 .late_fini = gaudi2_late_fini,
10377 .sw_init = gaudi2_sw_init,
10378 .sw_fini = gaudi2_sw_fini,
10379 .hw_init = gaudi2_hw_init,
10380 .hw_fini = gaudi2_hw_fini,
10381 .halt_engines = gaudi2_halt_engines,
10382 .suspend = gaudi2_suspend,
10383 .resume = gaudi2_resume,
10384 .mmap = gaudi2_mmap,
10385 .ring_doorbell = gaudi2_ring_doorbell,
10386 .pqe_write = gaudi2_pqe_write,
10387 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10388 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
10389 .scrub_device_mem = gaudi2_scrub_device_mem,
10390 .scrub_device_dram = gaudi2_scrub_device_dram,
10391 .get_int_queue_base = NULL,
10392 .test_queues = gaudi2_test_queues,
10393 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10394 .asic_dma_pool_free = gaudi2_dma_pool_free,
10395 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10396 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10397 .asic_dma_unmap_single = gaudi2_dma_unmap_single,
10398 .asic_dma_map_single = gaudi2_dma_map_single,
10399 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10400 .cs_parser = gaudi2_cs_parser,
10401 .asic_dma_map_sgtable = hl_dma_map_sgtable,
10402 .add_end_of_cb_packets = NULL,
10403 .update_eq_ci = gaudi2_update_eq_ci,
10404 .context_switch = gaudi2_context_switch,
10405 .restore_phase_topology = gaudi2_restore_phase_topology,
10406 .debugfs_read_dma = gaudi2_debugfs_read_dma,
10407 .add_device_attr = gaudi2_add_device_attr,
10408 .handle_eqe = gaudi2_handle_eqe,
10409 .get_events_stat = gaudi2_get_events_stat,
10412 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10413 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10414 .mmu_prefetch_cache_range = NULL,
10415 .send_heartbeat = gaudi2_send_heartbeat,
10416 .debug_coresight = gaudi2_debug_coresight,
10417 .is_device_idle = gaudi2_is_device_idle,
10418 .compute_reset_late_init = gaudi2_compute_reset_late_init,
10419 .hw_queues_lock = gaudi2_hw_queues_lock,
10420 .hw_queues_unlock = gaudi2_hw_queues_unlock,
10421 .get_pci_id = gaudi2_get_pci_id,
10422 .get_eeprom_data = gaudi2_get_eeprom_data,
10423 .get_monitor_dump = gaudi2_get_monitor_dump,
10424 .send_cpu_message = gaudi2_send_cpu_message,
10425 .pci_bars_map = gaudi2_pci_bars_map,
10426 .init_iatu = gaudi2_init_iatu,
10429 .halt_coresight = gaudi2_halt_coresight,
10430 .ctx_init = gaudi2_ctx_init,
10431 .ctx_fini = gaudi2_ctx_fini,
10432 .pre_schedule_cs = gaudi2_pre_schedule_cs,
10433 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10434 .load_firmware_to_device = NULL,
10435 .load_boot_fit_to_device = NULL,
10436 .get_signal_cb_size = gaudi2_get_signal_cb_size,
10437 .get_wait_cb_size = gaudi2_get_wait_cb_size,
10438 .gen_signal_cb = gaudi2_gen_signal_cb,
10439 .gen_wait_cb = gaudi2_gen_wait_cb,
10440 .reset_sob = gaudi2_reset_sob,
10441 .reset_sob_group = gaudi2_reset_sob_group,
10442 .get_device_time = gaudi2_get_device_time,
10443 .pb_print_security_errors = gaudi2_pb_print_security_errors,
10444 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10445 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10446 .get_dec_base_addr = gaudi2_get_dec_base_addr,
10447 .scramble_addr = gaudi2_mmu_scramble_addr,
10448 .descramble_addr = gaudi2_mmu_descramble_addr,
10449 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10450 .get_hw_block_id = gaudi2_get_hw_block_id,
10451 .hw_block_mmap = gaudi2_block_mmap,
10452 .enable_events_from_fw = gaudi2_enable_events_from_fw,
10453 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10454 .get_msi_info = gaudi2_get_msi_info,
10455 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10456 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10457 .init_firmware_loader = gaudi2_init_firmware_loader,
10458 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10459 .state_dump_init = gaudi2_state_dump_init,
10460 .get_sob_addr = &gaudi2_get_sob_addr,
10461 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10462 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10463 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10464 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10465 .access_dev_mem = hl_access_dev_mem,
10466 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
10467 .set_engine_cores = gaudi2_set_engine_cores,
10468 .send_device_activity = gaudi2_send_device_activity,
10471 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10473 hdev->asic_funcs = &gaudi2_funcs;