6e3aa89b19c2955cc91f1c2c5755ddb8b3005df7
[platform/kernel/linux-starfive.git] / drivers / accel / habanalabs / gaudi2 / gaudi2.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22
23 #define GAUDI2_DMA_POOL_BLK_SIZE                SZ_256          /* 256 bytes */
24
25 #define GAUDI2_RESET_TIMEOUT_MSEC               2000            /* 2000ms */
26
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC          500000          /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC         25000           /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC         25000           /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC     3000000         /* 3s */
31 #define GAUDI2_RESET_POLL_CNT                   3
32 #define GAUDI2_RESET_WAIT_MSEC                  1               /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC              100             /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC             1000            /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT                   512
36 #define GAUDI2_CB_POOL_CB_SIZE                  SZ_128K         /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC          4000000         /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC         25000000        /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC             100000          /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC        1000000         /* 1s */
41
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT          3
43
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS         1
50 #define MAX_FAULTY_XBARS                        1
51 #define MAX_FAULTY_EDMAS                        1
52 #define MAX_FAULTY_DECODERS                     1
53
54 #define GAUDI2_TPC_FULL_MASK                    0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK               0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK                0x3FF
57
58 #define GAUDI2_NA_EVENT_CAUSE                   0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE              18
60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE          25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE          3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE         14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE         3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE          2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE             22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE            30
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE             25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE             16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE        5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE         7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE       8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE         19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE             9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE          3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE   3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE      2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE       2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE       2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE          5
80
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC       (MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC            (MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT                  (0x1000000)
84
85 #define GAUDI2_VDEC_TIMEOUT_USEC                10000           /* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC           (GAUDI2_VDEC_TIMEOUT_USEC * 100)
87
88 #define KDMA_TIMEOUT_USEC                       USEC_PER_SEC
89
90 #define IS_DMA_IDLE(dma_core_sts0)      \
91         (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92
93 #define IS_DMA_HALTED(dma_core_sts1)    \
94         ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101         ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102         (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103         (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104
105 #define PCIE_DEC_EN_MASK                        0x300
106 #define DEC_WORK_STATE_IDLE                     0
107 #define DEC_WORK_STATE_PEND                     3
108 #define IS_DEC_IDLE(dec_swreg15) \
109         (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110         ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE            SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT           26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT           0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK        DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR  16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT              12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT              44
120 #define MMU_RANGE_INV_EN_SHIFT                  0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT             1
122 #define MMU_RANGE_INV_ASID_SHIFT                2
123
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125  * a 2 entries FIFO, and hence it is not enabled for it.
126  */
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK         GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK         GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129
130 #define GAUDI2_MAX_STRING_LEN                   64
131
132 #define GAUDI2_VDEC_MSIX_ENTRIES                (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133                                                         GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139         ((x & 0xF8001FF0) >> 4)
140
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142         ((x & 0x00001FF0) >> 4)
143
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT         0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK          0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT         5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK          0xF
148
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT         23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK          0x1F
151
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153         ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154                 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157                 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160         (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164
165 struct gaudi2_razwi_info {
166         u32 axuser_xy;
167         u32 rtr_ctrl;
168         u16 eng_id;
169         char *eng_name;
170 };
171
172 static struct gaudi2_razwi_info common_razwi_info[] = {
173                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
174                                 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
175                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
176                                 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
177                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
178                                 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
179                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
180                                 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
181                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
182                                 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
183                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
184                                 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
185                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
186                                 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
187                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
188                                 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
189                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
190                                 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
191                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
192                                 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
193                 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
194                                 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
195                 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
196                                 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
197                 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
198                                 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
199                 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
200                                 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
201                 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
202                                 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
203                 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
204                                 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
205                 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
206                                 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
207                 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
208                                 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
209                 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
210                                 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
211                 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
212                                 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
213                 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
214                                 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
215                 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
216                                 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
217                 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
218                                 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
219                 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
220                                 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
221                 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
222                                 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
223                 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
224                                 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
225                 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
226                                 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
227                 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
228                                 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
229                 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
230                                 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
231                 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
232                                 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
233                 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
234                                 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
235                 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
236                                 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
237                 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
238                                 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
239                 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
240                                 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
241                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
242                                 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
243                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
244                                 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
245                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
246                                 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
247                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
248                                 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
249                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
250                                 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
251                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
252                                 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
253                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
254                                 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
255                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
256                                 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
257                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
258                                 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
259                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
260                                 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
261                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
262                                 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
263                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
264                                 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
265                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
266                                 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
267                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
268                                 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
269                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
270                                 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
271                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
272                                 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
273                 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
274                                 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
275                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
276                                 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
277                 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
278                                 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
279                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
280                                 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
281                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
282                                 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
283                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
284                                 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
285                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
286                                 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
287                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
288                                 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
289                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
290                                 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
291                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292                                 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
293                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294                                 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
295                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
296                                 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
297                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
298                                 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
299                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300                                 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
301                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302                                 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
303                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304                                 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
305                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306                                 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
307                 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308                                 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
309                 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310                                 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
311                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
312                                 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
313                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
314                                 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
315                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316                                 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
317                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318                                 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
319                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320                                 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
321                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322                                 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
323                 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324                                 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
325                 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326                                 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
327                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
328                                 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
329                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
330                                 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
331                 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332                                 GAUDI2_ENGINE_ID_PSOC, "CPU"},
333                 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
334                                 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
335 };
336
337 static struct gaudi2_razwi_info mme_razwi_info[] = {
338                 /* MME X high coordinate is N/A, hence using only low coordinates */
339                 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
340                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
341                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
342                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
343                 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
344                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
345                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
347                 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
348                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
349                 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
351                 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
352                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
353                 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
354                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
355                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
356                                 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
357                 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
358                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
359                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
360                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
361                 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
362                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
363                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
365                 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
366                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
367                 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
369                 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
370                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
371                 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
372                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
373                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
374                                 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
375                 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
376                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
377                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
378                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
379                 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
380                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
381                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
383                 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
384                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
385                 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
387                 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
388                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
389                 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
390                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
391                 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
392                                 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
393                 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
394                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
395                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
396                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
397                 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
398                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
399                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
401                 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
402                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
403                 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
405                 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
406                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
407                 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
408                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
409                 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
410                                 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
411 };
412
413 enum hl_pmmu_fatal_cause {
414         LATENCY_RD_OUT_FIFO_OVERRUN,
415         LATENCY_WR_OUT_FIFO_OVERRUN,
416 };
417
418 enum hl_pcie_drain_ind_cause {
419         LBW_AXI_DRAIN_IND,
420         HBW_AXI_DRAIN_IND
421 };
422
423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
424         [HBM_ID0] = 0xFFFC,
425         [HBM_ID1] = 0xFFCF,
426         [HBM_ID2] = 0xF7F7,
427         [HBM_ID3] = 0x7F7F,
428         [HBM_ID4] = 0xFCFF,
429         [HBM_ID5] = 0xCFFF,
430 };
431
432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
433         [0] = HBM_ID0,
434         [1] = HBM_ID1,
435         [2] = HBM_ID4,
436         [3] = HBM_ID5,
437 };
438
439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
440         [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
441         [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
442         [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
443         [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
444         [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
445         [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
446         [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
447         [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
448 };
449
450 static const int gaudi2_qman_async_event_id[] = {
451         [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
452         [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
453         [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
454         [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
455         [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
456         [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
457         [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
458         [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
459         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
460         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
461         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
462         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
463         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
464         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
465         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
466         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
467         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
468         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
469         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
470         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
471         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
472         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
473         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
474         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
475         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
476         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
477         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
478         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
479         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
480         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
481         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
482         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
483         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
484         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
485         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
486         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
487         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
488         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
489         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
490         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
491         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
492         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
493         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
494         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
495         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
496         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
497         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
498         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
499         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
500         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
501         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
502         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
503         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
504         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
505         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
506         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
507         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
508         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
509         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
510         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
511         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
512         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
513         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
514         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
515         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
516         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
517         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
518         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
519         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
520         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
521         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
522         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
523         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
524         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
525         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
526         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
527         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
528         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
529         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
530         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
531         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
532         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
533         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
534         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
535         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
536         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
537         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
538         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
539         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
540         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
541         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
542         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
543         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
544         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
545         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
546         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
547         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
548         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
549         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
550         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
551         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
552         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
553         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
554         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
555         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
556         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
557         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
558         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
559         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
560         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
561         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
562         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
563         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
564         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
565         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
566         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
567         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
568         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
569         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
570         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
571         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
572         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
573         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
574         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
575         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
576         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
577         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
578         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
579         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
580         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
581         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
582         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
583         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
584         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
585         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
586         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
587         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
588         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
589         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
590         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
591         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
592         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
593         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
594         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
595         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
596         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
597         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
598         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
599         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
600         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
601         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
602         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
603         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
604         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
605         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
606         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
607         [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
608         [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
609         [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
610         [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
611         [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
612         [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
613         [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
614         [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
615         [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
616         [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
617         [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
618         [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
619         [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
620         [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
621         [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
622         [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
623         [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
624         [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
625         [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
626         [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
627         [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
628         [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
629         [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
630         [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
631         [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
632         [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
633         [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
634         [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
635         [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
636         [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
637         [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
638         [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
639         [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
640         [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
641         [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
642         [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
643         [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
644         [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
645         [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
646         [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
647         [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
648         [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
649         [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
650         [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
651         [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
652         [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
653         [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
654         [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
655         [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
656         [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
657         [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
658         [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
659         [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
660         [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
661         [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
662         [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
663         [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
664         [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
665         [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
666         [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
667         [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
668         [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
669         [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
670         [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
671         [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
672         [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
673         [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
674         [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
675         [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
676         [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
677         [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
678         [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
679         [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
680         [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
681         [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
682         [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
683         [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
684         [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
685         [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
686         [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
687         [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
688         [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
689         [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
690         [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
691         [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
692         [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
693         [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
694         [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
695         [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
696         [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
697         [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
698         [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
699         [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
700         [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
701         [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
702         [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
703         [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
704         [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
705         [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706         [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707         [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
708         [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
709         [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710         [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
711 };
712
713 static const int gaudi2_dma_core_async_event_id[] = {
714         [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
715         [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
716         [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
717         [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
718         [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
719         [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
720         [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
721         [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
722         [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
723         [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
724         [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
725 };
726
727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
728         "qman sei intr",
729         "arc sei intr"
730 };
731
732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
733         "AXI_TERMINATOR WR",
734         "AXI_TERMINATOR RD",
735         "AXI SPLIT SEI Status"
736 };
737
738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
739         "cbu_bresp_sei_intr_cause",
740         "cbu_rresp_sei_intr_cause",
741         "lbu_bresp_sei_intr_cause",
742         "lbu_rresp_sei_intr_cause",
743         "cbu_axi_split_intr_cause",
744         "lbu_axi_split_intr_cause",
745         "arc_ip_excptn_sei_intr_cause",
746         "dmi_bresp_sei_intr_cause",
747         "aux2apb_err_sei_intr_cause",
748         "cfg_lbw_wr_terminated_intr_cause",
749         "cfg_lbw_rd_terminated_intr_cause",
750         "cfg_dccm_wr_terminated_intr_cause",
751         "cfg_dccm_rd_terminated_intr_cause",
752         "cfg_hbw_rd_terminated_intr_cause"
753 };
754
755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
756         "msix_vcd_hbw_sei",
757         "msix_l2c_hbw_sei",
758         "msix_nrm_hbw_sei",
759         "msix_abnrm_hbw_sei",
760         "msix_vcd_lbw_sei",
761         "msix_l2c_lbw_sei",
762         "msix_nrm_lbw_sei",
763         "msix_abnrm_lbw_sei",
764         "apb_vcd_lbw_sei",
765         "apb_l2c_lbw_sei",
766         "apb_nrm_lbw_sei",
767         "apb_abnrm_lbw_sei",
768         "dec_sei",
769         "dec_apb_sei",
770         "trc_apb_sei",
771         "lbw_mstr_if_sei",
772         "axi_split_bresp_err_sei",
773         "hbw_axi_wr_viol_sei",
774         "hbw_axi_rd_viol_sei",
775         "lbw_axi_wr_viol_sei",
776         "lbw_axi_rd_viol_sei",
777         "vcd_spi",
778         "l2c_spi",
779         "nrm_spi",
780         "abnrm_spi",
781 };
782
783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
784         "PQ AXI HBW error",
785         "CQ AXI HBW error",
786         "CP AXI HBW error",
787         "CP error due to undefined OPCODE",
788         "CP encountered STOP OPCODE",
789         "CP AXI LBW error",
790         "CP WRREG32 or WRBULK returned error",
791         "N/A",
792         "FENCE 0 inc over max value and clipped",
793         "FENCE 1 inc over max value and clipped",
794         "FENCE 2 inc over max value and clipped",
795         "FENCE 3 inc over max value and clipped",
796         "FENCE 0 dec under min value and clipped",
797         "FENCE 1 dec under min value and clipped",
798         "FENCE 2 dec under min value and clipped",
799         "FENCE 3 dec under min value and clipped",
800         "CPDMA Up overflow",
801         "PQC L2H error"
802 };
803
804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
805         "RSVD0",
806         "CQ AXI HBW error",
807         "CP AXI HBW error",
808         "CP error due to undefined OPCODE",
809         "CP encountered STOP OPCODE",
810         "CP AXI LBW error",
811         "CP WRREG32 or WRBULK returned error",
812         "N/A",
813         "FENCE 0 inc over max value and clipped",
814         "FENCE 1 inc over max value and clipped",
815         "FENCE 2 inc over max value and clipped",
816         "FENCE 3 inc over max value and clipped",
817         "FENCE 0 dec under min value and clipped",
818         "FENCE 1 dec under min value and clipped",
819         "FENCE 2 dec under min value and clipped",
820         "FENCE 3 dec under min value and clipped",
821         "CPDMA Up overflow",
822         "RSVD17",
823         "CQ_WR_IFIFO_CI_ERR",
824         "CQ_WR_CTL_CI_ERR",
825         "ARC_CQF_RD_ERR",
826         "ARC_CQ_WR_IFIFO_CI_ERR",
827         "ARC_CQ_WR_CTL_CI_ERR",
828         "ARC_AXI_ERR",
829         "CP_SWITCH_WDT_ERR"
830 };
831
832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
833         "Choice push while full error",
834         "Choice Q watchdog error",
835         "MSG AXI LBW returned with error"
836 };
837
838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
839         "qm_axi_err",
840         "qm_trace_fence_events",
841         "qm_sw_err",
842         "qm_cp_sw_stop",
843         "lbw_mstr_rresp_err",
844         "lbw_mstr_bresp_err",
845         "lbw_msg_slverr",
846         "hbw_msg_slverr",
847         "wbc_slverr",
848         "hbw_mstr_rresp_err",
849         "hbw_mstr_bresp_err",
850         "sb_resp_intr",
851         "mrsb_resp_intr",
852         "core_dw_status_0",
853         "core_dw_status_1",
854         "core_dw_status_2",
855         "core_dw_status_3",
856         "core_dw_status_4",
857         "core_dw_status_5",
858         "core_dw_status_6",
859         "core_dw_status_7",
860         "async_arc2cpu_sei_intr",
861 };
862
863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
864         "tpc_address_exceed_slm",
865         "tpc_div_by_0",
866         "tpc_spu_mac_overflow",
867         "tpc_spu_addsub_overflow",
868         "tpc_spu_abs_overflow",
869         "tpc_spu_fma_fp_dst_nan",
870         "tpc_spu_fma_fp_dst_inf",
871         "tpc_spu_convert_fp_dst_nan",
872         "tpc_spu_convert_fp_dst_inf",
873         "tpc_spu_fp_dst_denorm",
874         "tpc_vpu_mac_overflow",
875         "tpc_vpu_addsub_overflow",
876         "tpc_vpu_abs_overflow",
877         "tpc_vpu_convert_fp_dst_nan",
878         "tpc_vpu_convert_fp_dst_inf",
879         "tpc_vpu_fma_fp_dst_nan",
880         "tpc_vpu_fma_fp_dst_inf",
881         "tpc_vpu_fp_dst_denorm",
882         "tpc_assertions",
883         "tpc_illegal_instruction",
884         "tpc_pc_wrap_around",
885         "tpc_qm_sw_err",
886         "tpc_hbw_rresp_err",
887         "tpc_hbw_bresp_err",
888         "tpc_lbw_rresp_err",
889         "tpc_lbw_bresp_err",
890         "st_unlock_already_locked",
891         "invalid_lock_access",
892         "LD_L protection violation",
893         "ST_L protection violation",
894 };
895
896 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
897         "agu_resp_intr",
898         "qman_axi_err",
899         "wap sei (wbc axi err)",
900         "arc sei",
901         "cfg access error",
902         "qm_sw_err",
903         "sbte_dbg_intr_0",
904         "sbte_dbg_intr_1",
905         "sbte_dbg_intr_2",
906         "sbte_dbg_intr_3",
907         "sbte_dbg_intr_4",
908         "sbte_prtn_intr_0",
909         "sbte_prtn_intr_1",
910         "sbte_prtn_intr_2",
911         "sbte_prtn_intr_3",
912         "sbte_prtn_intr_4",
913 };
914
915 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
916         "i0",
917         "i1",
918         "i2",
919         "i3",
920         "i4",
921 };
922
923 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
924         "WBC ERR RESP_0",
925         "WBC ERR RESP_1",
926         "AP SOURCE POS INF",
927         "AP SOURCE NEG INF",
928         "AP SOURCE NAN",
929         "AP RESULT POS INF",
930         "AP RESULT NEG INF",
931 };
932
933 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
934         "HBW Read returned with error RRESP",
935         "HBW write returned with error BRESP",
936         "LBW write returned with error BRESP",
937         "descriptor_fifo_overflow",
938         "KDMA SB LBW Read returned with error",
939         "KDMA WBC LBW Write returned with error",
940         "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
941         "WRONG CFG FOR COMMIT IN LIN DMA"
942 };
943
944 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
945         "HBW/LBW Read returned with error RRESP",
946         "HBW/LBW write returned with error BRESP",
947         "LBW write returned with error BRESP",
948         "descriptor_fifo_overflow",
949         "KDMA SB LBW Read returned with error",
950         "KDMA WBC LBW Write returned with error",
951         "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
952         "WRONG CFG FOR COMMIT IN LIN DMA"
953 };
954
955 struct gaudi2_sm_sei_cause_data {
956         const char *cause_name;
957         const char *log_name;
958 };
959
960 static const struct gaudi2_sm_sei_cause_data
961 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
962         {"calculated SO value overflow/underflow", "SOB ID"},
963         {"payload address of monitor is not aligned to 4B", "monitor addr"},
964         {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
965 };
966
967 static const char * const
968 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
969         "LATENCY_RD_OUT_FIFO_OVERRUN",
970         "LATENCY_WR_OUT_FIFO_OVERRUN",
971 };
972
973 static const char * const
974 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
975         "LATENCY_RD_OUT_FIFO_OVERRUN",
976         "LATENCY_WR_OUT_FIFO_OVERRUN",
977 };
978
979 static const char * const
980 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
981         "AXI drain HBW",
982         "AXI drain LBW",
983 };
984
985 static const char * const
986 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
987         "HBW error response",
988         "LBW error response",
989         "TLP is blocked by RR"
990 };
991
992 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
993         [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
994         [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
995         [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
996         [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
997         [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
998         [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
999         [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1000         [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1001         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1002         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1003         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1004         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1005         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1006         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1007         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1008         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1009         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1010         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1011         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1012         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1013         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1014         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1015         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1016         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1017         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1018         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1019         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1020         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1021         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1022         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1023         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1024         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1025         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1026         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1027         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1028         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1029         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1030         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1031         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1032         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1033         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1034         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1035         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1036         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1037         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1038         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1039         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1040         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1041         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1042         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1043         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1044         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1045         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1046         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1047         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1048         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1049         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1050         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1051         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1052         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1053         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1054         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1055         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1056         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1057         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1058         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1059         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1060         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1061         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1062         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1063         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1064         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1065         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1066         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1067         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1068         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1069         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1070         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1071         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1072         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1073         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1074         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1075         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1076         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1077         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1078         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1079         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1080         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1081         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1082         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1083         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1084         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1085         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1086         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1087         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1088         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1089         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1090         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1091         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1092         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1093         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1094         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1095         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1096         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1097         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1098         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1099         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1100         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1101         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1102         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1103         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1104         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1105         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1106         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1107         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1108         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1109         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1110         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1111         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1112         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1113         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1114         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1115         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1116         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1117         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1118         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1119         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1120         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1121         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1122         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1123         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1124         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1125         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1126         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1127         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1128         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1129         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1130         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1131         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1132         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1133         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1134         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1135         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1136         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1137         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1138         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1139         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1140         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1141         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1142         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1143         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1144         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1145         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1146         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1147         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1148         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1149         [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1150         [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1151         [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1152         [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1153         [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1154         [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1155         [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1156         [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1157         [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1158         [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1159         [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1160         [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1161         [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1162         [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1163         [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1164         [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1165         [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1166         [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1167         [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1168         [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1169         [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1170         [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1171         [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1172         [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1173         [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1174         [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1175         [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1176         [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1177         [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1178         [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1179         [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1180         [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1181         [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1182         [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1183         [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1184         [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1185         [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1186         [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1187         [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1188         [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1189         [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1190         [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1191         [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1192         [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1193         [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1194         [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1195         [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1196         [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1197         [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1198         [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1199         [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1200         [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1201         [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1202         [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1203         [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1204         [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1205         [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1206         [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1207         [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1208         [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1209         [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1210         [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1211         [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1212         [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1213         [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1214         [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1215         [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1216         [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1217         [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1218         [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1219         [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1220         [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1221         [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1222         [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1223         [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1224         [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1225         [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1226         [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1227         [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1228         [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1229         [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1230         [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1231         [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1232         [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1233         [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1234         [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1235         [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1236         [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1237         [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1238         [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1239         [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1240         [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1241         [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1242         [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1243         [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1244         [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1245         [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1246         [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1247         [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1248         [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1249         [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1250         [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1251         [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1252         [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1253 };
1254
1255 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1256         [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1257         [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1258         [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1259         [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1260         [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1261         [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1262         [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1263         [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1264         [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1265         [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1266         [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1267         [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1268         [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1269         [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1270         [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1271         [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1272         [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1273         [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1274         [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1275         [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1276         [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1277         [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1278         [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1279         [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1280         [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1281         [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1282         [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1283         [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1284         [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1285         [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1286         [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1287         [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1288         [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1289         [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1290         [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1291         [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1292         [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1293         [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1294         [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1295         [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1296         [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1297         [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1298         [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1299         [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1300         [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1301         [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1302         [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1303         [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1304         [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1305         [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1306         [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1307         [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1308         [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1309         [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1310         [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1311         [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1312         [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1313         [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1314         [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1315         [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1316         [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1317         [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1318         [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1319         [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1320         [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1321         [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1322         [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1323         [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1324         [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1325 };
1326
1327 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1328         [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1329         [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1330         [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1331         [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1332         [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1333         [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1334         [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1335         [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1336         [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1337         [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1338         [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1339         [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1340         [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1341         [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1342         [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1343         [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1344         [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1345         [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1346         [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1347         [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1348         [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1349         [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1350         [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1351         [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1352         [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1353         [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1354         [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1355         [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1356         [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1357         [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1358         [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1359         [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1360         [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1361         [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1362         [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1363         [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1364         [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1365         [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1366         [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1367         [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1368         [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1369         [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1370         [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1371         [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1372         [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1373         [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1374         [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1375         [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1376         [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1377         [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1378         [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1379         [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1380         [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1381         [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1382         [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1383         [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1384         [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1385         [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1386         [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1387         [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1388         [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1389         [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1390         [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1391         [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1392         [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1393         [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1394         [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1395         [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1396         [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1397 };
1398
1399 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1400         [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1401         [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1402         [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1403         [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1404 };
1405
1406 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1407         [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1408         [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1409         [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1410         [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1411         [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1412         [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1413         [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1414         [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1415         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1416         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1417         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1418         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1419         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1420         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1421         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1422         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1423         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1424         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1425         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1426         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1427         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1428         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1429         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1430         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1431         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1432         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1433         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1434         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1435         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1436         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1437         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1438         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1439         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1440         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1441         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1442         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1443         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1444         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1445         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1446         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1447         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1448         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1449         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1450         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1451         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1452         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1453         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1454         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1455         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1456         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1457         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1458         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1459         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1460         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1461         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1462         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1463         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1464         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1465         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1466         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1467         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1468         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1469         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1470         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1471         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1472         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1473         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1474         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1475         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1476         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1477         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1478         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1479         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1480         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1481         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1482         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1483         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1484         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1485         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1486         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1487         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1488         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1489         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1490         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1491         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1492         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1493         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1494         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1495         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1496         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1497         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1498         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1499         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1500         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1501         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1502         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1503         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1504         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1505         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1506         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1507         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1508         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1509         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1510         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1511         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1512         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1513         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1514         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1515         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1516         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1517         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1518         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1519         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1520         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1521         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1522         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1523         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1524         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1525         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1526         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1527         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1528         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1529         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1530         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1531         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1532         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1533         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1534         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1535         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1536         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1537         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1538         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1539         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1540         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1541         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1542         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1543         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1544         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1545         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1546         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1547         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1548         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1549         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1550         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1551         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1552         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1553         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1554         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1555         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1556         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1557         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1558         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1559         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1560         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1561         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1562         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1563         [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1564         [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1565         [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1566         [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1567         [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1568         [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1569         [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1570         [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1571         [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1572         [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1573         [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1574         [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1575         [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1576         [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1577         [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1578         [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1579         [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1580         [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1581         [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1582         [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1583         [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1584         [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1585         [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1586         [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1587         [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1588         [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1589         [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1590         [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1591         [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1592         [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1593         [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1594         [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1595         [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1596         [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1597         [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1598         [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1599         [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1600         [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1601         [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1602         [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1603         [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1604         [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1605         [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1606         [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1607         [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1608         [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1609         [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1610         [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1611         [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1612         [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1613         [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1614         [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1615         [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1616         [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1617         [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1618         [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1619         [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1620         [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1621         [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1622         [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1623         [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1624         [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1625         [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1626         [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1627         [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1628         [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1629         [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1630         [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1631         [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1632         [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1633         [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1634         [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1635         [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1636         [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1637         [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1638         [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1639         [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1640         [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1641         [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1642         [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1643         [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1644         [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1645         [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1646         [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1647         [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1648         [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1649         [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1650         [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1651         [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1652         [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1653         [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1654         [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1655         [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1656         [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1657         [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1658         [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1659         [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1660         [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1661         [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1662         [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1663         [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1664         [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1665         [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1666         [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1667 };
1668
1669 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1670         [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1671         [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1672         [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1673         [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1674         [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1675         [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1676         [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1677         [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1678         [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1679         [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1680         [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1681 };
1682
1683 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1684         [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1685         [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1686         [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1687         [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1688 };
1689
1690 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1691         [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1692         [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1693         [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1694         [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1695         [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1696         [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1697         [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1698         [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1699         [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1700         [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1701         [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1702         [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1703         [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1704         [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1705         [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1706         [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1707         [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1708         [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1709         [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1710         [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1711         [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1712         [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1713         [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1714         [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1715         [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1716 };
1717
1718 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1719         [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1720         [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1721         [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1722         [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1723         [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1724         [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1725         [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1726         [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1727         [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1728         [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1729         [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1730         [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1731         [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1732         [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1733         [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1734         [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1735         [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1736         [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1737         [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1738         [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1739         [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1740         [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1741         [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1742         [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1743         [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1744 };
1745
1746 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1747         [ROTATOR_ID_0] = mmROT0_BASE,
1748         [ROTATOR_ID_1] = mmROT1_BASE
1749 };
1750
1751 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1752         [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1753         [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1754         [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1755         [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1756         [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1757         [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1758         [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1759         [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1760         [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1761         [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1762         [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1763         [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1764         [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1765         [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1766         [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1767         [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1768         [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1769         [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1770         [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1771         [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1772         [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1773         [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1774         [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1775         [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1776         [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1777 };
1778
1779 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1780         [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1781         [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1782 };
1783
1784 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1785         [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1786         [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1787         [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1788         [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1789         [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1790         [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1791         [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1792         [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1793         [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1794         [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1795         [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1796         [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1797         [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1798         [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1799         [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1800         [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1801         [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1802         [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1803         [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1804         [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1805         [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1806         [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1807         [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1808         [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1809         /* the PCI TPC is placed last (mapped liked HW) */
1810         [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1811 };
1812
1813 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1814         [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1815         [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1816         [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1817         [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1818 };
1819
1820 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1821         [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1822         [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1823         [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1824         [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1825         [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1826         [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1827         [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1828         [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1829         [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1830         [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1831         [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1832 };
1833
1834 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1835         GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1836         GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1837         GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1838         GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1839         GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1840         GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1841         GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1842         GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1843 };
1844
1845 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1846         "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1847         "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1848         "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1849         "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1850         "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1851         "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1852         "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1853         "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1854         "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1855         "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1856 };
1857
1858 enum rtr_id {
1859         DCORE0_RTR0,
1860         DCORE0_RTR1,
1861         DCORE0_RTR2,
1862         DCORE0_RTR3,
1863         DCORE0_RTR4,
1864         DCORE0_RTR5,
1865         DCORE0_RTR6,
1866         DCORE0_RTR7,
1867         DCORE1_RTR0,
1868         DCORE1_RTR1,
1869         DCORE1_RTR2,
1870         DCORE1_RTR3,
1871         DCORE1_RTR4,
1872         DCORE1_RTR5,
1873         DCORE1_RTR6,
1874         DCORE1_RTR7,
1875         DCORE2_RTR0,
1876         DCORE2_RTR1,
1877         DCORE2_RTR2,
1878         DCORE2_RTR3,
1879         DCORE2_RTR4,
1880         DCORE2_RTR5,
1881         DCORE2_RTR6,
1882         DCORE2_RTR7,
1883         DCORE3_RTR0,
1884         DCORE3_RTR1,
1885         DCORE3_RTR2,
1886         DCORE3_RTR3,
1887         DCORE3_RTR4,
1888         DCORE3_RTR5,
1889         DCORE3_RTR6,
1890         DCORE3_RTR7,
1891 };
1892
1893 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1894         DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1895         DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1896         DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1897         DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1898         DCORE0_RTR0
1899 };
1900
1901 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1902         DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1903         DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1904         DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1905         DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1906         DCORE0_RTR0
1907 };
1908
1909 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1910         DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1911         DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1912 };
1913
1914 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1915         DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1916         DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1917 };
1918
1919 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1920         DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1921         DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1922 };
1923
1924 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1925         DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1926         DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1927 };
1928
1929 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1930         mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1931         mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1932         mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1933         mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1934         mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1935         mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1936         mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1937         mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1938 };
1939
1940 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1941         DCORE0_RTR0, DCORE0_RTR0
1942 };
1943
1944 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1945         DCORE0_RTR2, DCORE0_RTR2
1946 };
1947
1948 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1949         DCORE2_RTR0, DCORE3_RTR7
1950 };
1951
1952 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1953         DCORE2_RTR2, DCORE3_RTR5
1954 };
1955
1956 struct mme_initiators_rtr_id {
1957         u32 wap0;
1958         u32 wap1;
1959         u32 write;
1960         u32 read;
1961         u32 sbte0;
1962         u32 sbte1;
1963         u32 sbte2;
1964         u32 sbte3;
1965         u32 sbte4;
1966 };
1967
1968 enum mme_initiators {
1969         MME_WAP0 = 0,
1970         MME_WAP1,
1971         MME_WRITE,
1972         MME_READ,
1973         MME_SBTE0,
1974         MME_SBTE1,
1975         MME_SBTE2,
1976         MME_SBTE3,
1977         MME_SBTE4,
1978         MME_INITIATORS_MAX
1979 };
1980
1981 static const struct mme_initiators_rtr_id
1982 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1983         { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1984         .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1985         { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1986         .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1987         { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1988         .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1989         { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1990         .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1991 };
1992
1993 enum razwi_event_sources {
1994         RAZWI_TPC,
1995         RAZWI_MME,
1996         RAZWI_EDMA,
1997         RAZWI_PDMA,
1998         RAZWI_NIC,
1999         RAZWI_DEC,
2000         RAZWI_ROT
2001 };
2002
2003 struct hbm_mc_error_causes {
2004         u32 mask;
2005         char cause[50];
2006 };
2007
2008 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2009
2010 /* Special blocks iterator is currently used to configure security protection bits,
2011  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2012  * must be skipped. Following configurations are commonly used for both PB config
2013  * and global error reading, since currently they both share the same settings.
2014  * Once it changes, we must remember to use separate configurations for either one.
2015  */
2016 static int gaudi2_iterator_skip_block_types[] = {
2017                 GAUDI2_BLOCK_TYPE_PLL,
2018                 GAUDI2_BLOCK_TYPE_EU_BIST,
2019                 GAUDI2_BLOCK_TYPE_HBM,
2020                 GAUDI2_BLOCK_TYPE_XFT
2021 };
2022
2023 static struct range gaudi2_iterator_skip_block_ranges[] = {
2024                 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2025                 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2026                 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2027                 /* Skip all CPU blocks except for CPU_IF */
2028                 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2029                 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2030 };
2031
2032 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2033         {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2034         {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2035         {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2036         {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2037         {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2038 };
2039
2040 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2041         [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2042         [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2043         [HBM_SEI_READ_ERR] = "SEI read data error",
2044         [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2045         [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2046         [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2047         [HBM_SEI_DFI] = "SEI DFI error",
2048         [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2049         [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2050 };
2051
2052 struct mmu_spi_sei_cause {
2053         char cause[50];
2054         int clear_bit;
2055 };
2056
2057 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2058         {"page fault", 1},              /* INTERRUPT_CLR[1] */
2059         {"page access", 1},             /* INTERRUPT_CLR[1] */
2060         {"bypass ddr", 2},              /* INTERRUPT_CLR[2] */
2061         {"multi hit", 2},               /* INTERRUPT_CLR[2] */
2062         {"mmu rei0", -1},               /* no clear register bit */
2063         {"mmu rei1", -1},               /* no clear register bit */
2064         {"stlb rei0", -1},              /* no clear register bit */
2065         {"stlb rei1", -1},              /* no clear register bit */
2066         {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2067         {"rr privileged read hit", 2},  /* INTERRUPT_CLR[2] */
2068         {"rr secure write hit", 2},     /* INTERRUPT_CLR[2] */
2069         {"rr secure read hit", 2},      /* INTERRUPT_CLR[2] */
2070         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
2071         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
2072         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
2073         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
2074         {"slave error", 16},            /* INTERRUPT_CLR[16] */
2075         {"dec error", 17},              /* INTERRUPT_CLR[17] */
2076         {"burst fifo full", 2}          /* INTERRUPT_CLR[2] */
2077 };
2078
2079 struct gaudi2_cache_invld_params {
2080         u64 start_va;
2081         u64 end_va;
2082         u32 inv_start_val;
2083         u32 flags;
2084         bool range_invalidation;
2085 };
2086
2087 struct gaudi2_tpc_idle_data {
2088         struct engines_data *e;
2089         unsigned long *mask;
2090         bool *is_idle;
2091         const char *tpc_fmt;
2092 };
2093
2094 struct gaudi2_tpc_mmu_data {
2095         u32 rw_asid;
2096 };
2097
2098 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2099
2100 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2101 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2102 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2103 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2104 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2105 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2106 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2107                                                                                 bool is_memset);
2108 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2109                 struct engines_data *e);
2110 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2111                 struct engines_data *e);
2112 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113                 struct engines_data *e);
2114 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2115 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2116
2117 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2118 {
2119
2120 }
2121
2122 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2123 {
2124         return sizeof(struct packet_msg_short);
2125 }
2126
2127 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2128 {
2129         return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2130 }
2131
2132 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2133 {
2134         struct asic_fixed_properties *prop = &hdev->asic_prop;
2135         int dcore, inst, tpc_seq;
2136         u32 offset;
2137
2138         /* init the return code */
2139         ctx->rc = 0;
2140
2141         for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2142                 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2143                         tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2144
2145                         if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2146                                 continue;
2147
2148                         offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2149
2150                         ctx->fn(hdev, dcore, inst, offset, ctx);
2151                         if (ctx->rc) {
2152                                 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2153                                                         dcore, inst);
2154                                 return;
2155                         }
2156                 }
2157         }
2158
2159         if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2160                 return;
2161
2162         /* special check for PCI TPC (DCORE0_TPC6) */
2163         offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2164         ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2165         if (ctx->rc)
2166                 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2167 }
2168
2169 static bool gaudi2_host_phys_addr_valid(u64 addr)
2170 {
2171         if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2172                 return true;
2173
2174         return false;
2175 }
2176
2177 static int set_number_of_functional_hbms(struct hl_device *hdev)
2178 {
2179         struct asic_fixed_properties *prop = &hdev->asic_prop;
2180         u8 faulty_hbms = hweight64(hdev->dram_binning);
2181
2182         /* check if all HBMs should be used */
2183         if (!faulty_hbms) {
2184                 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2185                 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2186                 return 0;
2187         }
2188
2189         /*
2190          * check for error condition in which number of binning
2191          * candidates is higher than the maximum supported by the
2192          * driver (in which case binning mask shall be ignored and driver will
2193          * set the default)
2194          */
2195         if (faulty_hbms > MAX_FAULTY_HBMS) {
2196                 dev_err(hdev->dev,
2197                         "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2198                         MAX_FAULTY_HBMS, hdev->dram_binning);
2199                 return -EINVAL;
2200         }
2201
2202         /*
2203          * by default, number of functional HBMs in Gaudi2 is always
2204          * GAUDI2_HBM_NUM - 1.
2205          */
2206         prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2207         return 0;
2208 }
2209
2210 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2211 {
2212         struct asic_fixed_properties *prop = &hdev->asic_prop;
2213         u32 basic_hbm_page_size;
2214         int rc;
2215
2216         rc = set_number_of_functional_hbms(hdev);
2217         if (rc)
2218                 return -EINVAL;
2219
2220         /*
2221          * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2222          * in which we are using x16 bigger page size to be able to populate the entire
2223          * HBM mappings in the TLB
2224          */
2225         basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2226         prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2227         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2228         prop->dram_size = prop->num_functional_hbms * SZ_16G;
2229         prop->dram_base_address = DRAM_PHYS_BASE;
2230         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2231         prop->dram_supports_virtual_memory = true;
2232
2233         prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2234         prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2235         prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2236         prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2237
2238         /* since DRAM page size differs from DMMU page size we need to allocate
2239          * DRAM memory in units of dram_page size and mapping this memory in
2240          * units of DMMU page size. we overcome this size mismatch using a
2241          * scrambling routine which takes a DRAM page and converts it to a DMMU
2242          * page.
2243          * We therefore:
2244          * 1. partition the virtual address space to DRAM-page (whole) pages.
2245          *    (suppose we get n such pages)
2246          * 2. limit the amount of virtual address space we got from 1 above to
2247          *    a multiple of 64M as we don't want the scrambled address to cross
2248          *    the DRAM virtual address space.
2249          *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2250          * 3. determine the and address accordingly
2251          *    end_addr = start_addr + m * 48M
2252          *
2253          *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2254          */
2255         prop->dmmu.start_addr = prop->dram_base_address +
2256                         (prop->dram_page_size *
2257                                 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2258
2259         prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2260                         div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2261
2262         return 0;
2263 }
2264
2265 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2266 {
2267         struct asic_fixed_properties *prop = &hdev->asic_prop;
2268         struct hw_queue_properties *q_props;
2269         u32 num_sync_stream_queues = 0;
2270         int i;
2271
2272         prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2273         prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2274                                         GFP_KERNEL);
2275
2276         if (!prop->hw_queues_props)
2277                 return -ENOMEM;
2278
2279         q_props = prop->hw_queues_props;
2280
2281         for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2282                 q_props[i].type = QUEUE_TYPE_HW;
2283                 q_props[i].driver_only = 0;
2284
2285                 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2286                         q_props[i].supports_sync_stream = 0;
2287                 } else {
2288                         q_props[i].supports_sync_stream = 1;
2289                         num_sync_stream_queues++;
2290                 }
2291
2292                 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2293         }
2294
2295         q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2296         q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2297         q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2298
2299         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2300         prop->cfg_base_address = CFG_BASE;
2301         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2302         prop->host_base_address = HOST_PHYS_BASE_0;
2303         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2304         prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2305         prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2306         prop->user_dec_intr_count = NUMBER_OF_DEC;
2307         prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2308         prop->completion_mode = HL_COMPLETION_MODE_CS;
2309         prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2310         prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2311
2312         prop->sram_base_address = SRAM_BASE_ADDR;
2313         prop->sram_size = SRAM_SIZE;
2314         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2315         prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2316
2317         prop->hints_range_reservation = true;
2318
2319         prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2320
2321         if (hdev->pldm)
2322                 prop->mmu_pgt_size = 0x800000; /* 8MB */
2323         else
2324                 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2325
2326         prop->mmu_pte_size = HL_PTE_SIZE;
2327         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2328         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2329
2330         prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2331         prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2332         prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2333         prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2334         prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2335         prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2336         prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2337         prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2338         prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2339         prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2340         prop->dmmu.page_size = PAGE_SIZE_1GB;
2341         prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2342         prop->dmmu.last_mask = LAST_MASK;
2343         prop->dmmu.host_resident = 1;
2344         prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2345         prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2346
2347         /*
2348          * this is done in order to be able to validate FW descriptor (i.e. validating that
2349          * the addresses and allocated space for FW image does not cross memory bounds).
2350          * for this reason we set the DRAM size to the minimum possible and later it will
2351          * be modified according to what reported in the cpucp info packet
2352          */
2353         prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2354
2355         hdev->pmmu_huge_range = true;
2356         prop->pmmu.host_resident = 1;
2357         prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2358         prop->pmmu.last_mask = LAST_MASK;
2359         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2360         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2361
2362         prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2363         prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2364         prop->hints_host_hpage_reserved_va_range.start_addr =
2365                         RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2366         prop->hints_host_hpage_reserved_va_range.end_addr =
2367                         RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2368
2369         if (PAGE_SIZE == SZ_64K) {
2370                 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2371                 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2372                 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2373                 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2374                 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2375                 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2376                 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2377                 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2378                 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2379                 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2380                 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2381                 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2382                 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2383                 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2384                 prop->pmmu.page_size = PAGE_SIZE_64KB;
2385
2386                 /* shifts and masks are the same in PMMU and HPMMU */
2387                 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2388                 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2389                 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2390                 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2391         } else {
2392                 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2393                 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2394                 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2395                 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2396                 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2397                 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2398                 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2399                 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2400                 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2401                 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2402                 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2403                 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2404                 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2405                 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2406                 prop->pmmu.page_size = PAGE_SIZE_4KB;
2407
2408                 /* shifts and masks are the same in PMMU and HPMMU */
2409                 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2410                 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2411                 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2412                 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2413         }
2414
2415         prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2416         prop->num_engine_cores = CPU_ID_MAX;
2417         prop->cfg_size = CFG_SIZE;
2418         prop->max_asid = MAX_ASID;
2419         prop->num_of_events = GAUDI2_EVENT_SIZE;
2420
2421         prop->supports_engine_modes = true;
2422
2423         prop->dc_power_default = DC_POWER_DEFAULT;
2424
2425         prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2426         prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2427         prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2428         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2429
2430         strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2431
2432         prop->mme_master_slave_mode = 1;
2433
2434         prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2435                                         (num_sync_stream_queues * HL_RSVD_SOBS);
2436
2437         prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2438                                         (num_sync_stream_queues * HL_RSVD_MONS);
2439
2440         prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2441         prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2442         prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2443
2444         prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2445
2446         prop->fw_cpu_boot_dev_sts0_valid = false;
2447         prop->fw_cpu_boot_dev_sts1_valid = false;
2448         prop->hard_reset_done_by_fw = false;
2449         prop->gic_interrupts_enable = true;
2450
2451         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2452
2453         prop->max_dec = NUMBER_OF_DEC;
2454
2455         prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2456
2457         prop->dma_mask = 64;
2458
2459         prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2460
2461         return 0;
2462 }
2463
2464 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2465 {
2466         static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2467         bool is_wc[3] = {false, false, true};
2468         int rc;
2469
2470         rc = hl_pci_bars_map(hdev, name, is_wc);
2471         if (rc)
2472                 return rc;
2473
2474         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2475
2476         return 0;
2477 }
2478
2479 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2480 {
2481         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2482         struct hl_inbound_pci_region pci_region;
2483         u64 old_addr = addr;
2484         int rc;
2485
2486         if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2487                 return old_addr;
2488
2489         if (hdev->asic_prop.iatu_done_by_fw)
2490                 return U64_MAX;
2491
2492         /* Inbound Region 2 - Bar 4 - Point to DRAM */
2493         pci_region.mode = PCI_BAR_MATCH_MODE;
2494         pci_region.bar = DRAM_BAR_ID;
2495         pci_region.addr = addr;
2496         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2497         if (rc)
2498                 return U64_MAX;
2499
2500         if (gaudi2) {
2501                 old_addr = gaudi2->dram_bar_cur_addr;
2502                 gaudi2->dram_bar_cur_addr = addr;
2503         }
2504
2505         return old_addr;
2506 }
2507
2508 static int gaudi2_init_iatu(struct hl_device *hdev)
2509 {
2510         struct hl_inbound_pci_region inbound_region;
2511         struct hl_outbound_pci_region outbound_region;
2512         u32 bar_addr_low, bar_addr_high;
2513         int rc;
2514
2515         if (hdev->asic_prop.iatu_done_by_fw)
2516                 return 0;
2517
2518         /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2519          * We must map this region in BAR match mode in order to
2520          * fetch BAR physical base address
2521          */
2522         inbound_region.mode = PCI_BAR_MATCH_MODE;
2523         inbound_region.bar = SRAM_CFG_BAR_ID;
2524         /* Base address must be aligned to Bar size which is 256 MB */
2525         inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2526         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2527         if (rc)
2528                 return rc;
2529
2530         /* Fetch physical BAR address */
2531         bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2532         bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2533
2534         hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2535
2536         /* Inbound Region 0 - Bar 0 - Point to CFG */
2537         inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2538         inbound_region.bar = SRAM_CFG_BAR_ID;
2539         inbound_region.offset_in_bar = 0;
2540         inbound_region.addr = STM_FLASH_BASE_ADDR;
2541         inbound_region.size = CFG_REGION_SIZE;
2542         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2543         if (rc)
2544                 return rc;
2545
2546         /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2547         inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2548         inbound_region.bar = SRAM_CFG_BAR_ID;
2549         inbound_region.offset_in_bar = CFG_REGION_SIZE;
2550         inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2551         inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2552         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2553         if (rc)
2554                 return rc;
2555
2556         /* Inbound Region 2 - Bar 4 - Point to DRAM */
2557         inbound_region.mode = PCI_BAR_MATCH_MODE;
2558         inbound_region.bar = DRAM_BAR_ID;
2559         inbound_region.addr = DRAM_PHYS_BASE;
2560         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2561         if (rc)
2562                 return rc;
2563
2564         /* Outbound Region 0 - Point to Host */
2565         outbound_region.addr = HOST_PHYS_BASE_0;
2566         outbound_region.size = HOST_PHYS_SIZE_0;
2567         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2568
2569         return rc;
2570 }
2571
2572 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2573 {
2574         return RREG32(mmHW_STATE);
2575 }
2576
2577 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2578 {
2579         struct asic_fixed_properties *prop = &hdev->asic_prop;
2580
2581         /*
2582          * check for error condition in which number of binning candidates
2583          * is higher than the maximum supported by the driver
2584          */
2585         if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2586                 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2587                                         MAX_CLUSTER_BINNING_FAULTY_TPCS,
2588                                         hdev->tpc_binning);
2589                 return -EINVAL;
2590         }
2591
2592         prop->tpc_binning_mask = hdev->tpc_binning;
2593         prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2594
2595         return 0;
2596 }
2597
2598 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2599 {
2600         struct asic_fixed_properties *prop = &hdev->asic_prop;
2601         struct hw_queue_properties *q_props = prop->hw_queues_props;
2602         u64 tpc_binning_mask;
2603         u8 subst_idx = 0;
2604         int i, rc;
2605
2606         rc = gaudi2_tpc_binning_init_prop(hdev);
2607         if (rc)
2608                 return rc;
2609
2610         tpc_binning_mask = prop->tpc_binning_mask;
2611
2612         for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2613                 u8 subst_seq, binned, qid_base;
2614
2615                 if (tpc_binning_mask == 0)
2616                         break;
2617
2618                 if (subst_idx == 0) {
2619                         subst_seq = TPC_ID_DCORE0_TPC6;
2620                         qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2621                 } else {
2622                         subst_seq = TPC_ID_DCORE3_TPC5;
2623                         qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2624                 }
2625
2626
2627                 /* clear bit from mask */
2628                 binned = __ffs(tpc_binning_mask);
2629                 /*
2630                  * Coverity complains about possible out-of-bound access in
2631                  * clear_bit
2632                  */
2633                 if (binned >= TPC_ID_SIZE) {
2634                         dev_err(hdev->dev,
2635                                 "Invalid binned TPC (binning mask: %llx)\n",
2636                                 tpc_binning_mask);
2637                         return -EINVAL;
2638                 }
2639                 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2640
2641                 /* also clear replacing TPC bit from enabled mask */
2642                 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2643
2644                 /* bin substite TPC's Qs */
2645                 q_props[qid_base].binned = 1;
2646                 q_props[qid_base + 1].binned = 1;
2647                 q_props[qid_base + 2].binned = 1;
2648                 q_props[qid_base + 3].binned = 1;
2649
2650                 subst_idx++;
2651         }
2652
2653         return 0;
2654 }
2655
2656 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2657 {
2658         struct asic_fixed_properties *prop = &hdev->asic_prop;
2659         u8 num_faulty;
2660
2661         num_faulty = hweight32(hdev->decoder_binning);
2662
2663         /*
2664          * check for error condition in which number of binning candidates
2665          * is higher than the maximum supported by the driver
2666          */
2667         if (num_faulty > MAX_FAULTY_DECODERS) {
2668                 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2669                                                 hdev->decoder_binning);
2670                 return -EINVAL;
2671         }
2672
2673         prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2674
2675         if (prop->decoder_binning_mask)
2676                 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2677         else
2678                 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2679
2680         return 0;
2681 }
2682
2683 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2684 {
2685         struct asic_fixed_properties *prop = &hdev->asic_prop;
2686
2687         /* check if we should override default binning */
2688         if (!hdev->dram_binning) {
2689                 prop->dram_binning_mask = 0;
2690                 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2691                 return;
2692         }
2693
2694         /* set DRAM binning constraints */
2695         prop->faulty_dram_cluster_map |= hdev->dram_binning;
2696         prop->dram_binning_mask = hdev->dram_binning;
2697         prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2698 }
2699
2700 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2701 {
2702         struct asic_fixed_properties *prop = &hdev->asic_prop;
2703         struct hw_queue_properties *q_props;
2704         u8 seq, num_faulty;
2705
2706         num_faulty = hweight32(hdev->edma_binning);
2707
2708         /*
2709          * check for error condition in which number of binning candidates
2710          * is higher than the maximum supported by the driver
2711          */
2712         if (num_faulty > MAX_FAULTY_EDMAS) {
2713                 dev_err(hdev->dev,
2714                         "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2715                         hdev->edma_binning);
2716                 return -EINVAL;
2717         }
2718
2719         if (!hdev->edma_binning) {
2720                 prop->edma_binning_mask = 0;
2721                 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2722                 return 0;
2723         }
2724
2725         seq = __ffs((unsigned long)hdev->edma_binning);
2726
2727         /* set binning constraints */
2728         prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2729         prop->edma_binning_mask = hdev->edma_binning;
2730         prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2731
2732         /* bin substitute EDMA's queue */
2733         q_props = prop->hw_queues_props;
2734         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2735         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2736         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2737         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2738
2739         return 0;
2740 }
2741
2742 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2743 {
2744         struct asic_fixed_properties *prop = &hdev->asic_prop;
2745         u8 num_faulty, seq;
2746
2747         /* check if we should override default binning */
2748         if (!xbar_edge_iso_mask) {
2749                 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2750                 return 0;
2751         }
2752
2753         /*
2754          * note that it can be set to value other than 0 only after cpucp packet (i.e.
2755          * only the FW can set a redundancy value). for user it'll always be 0.
2756          */
2757         num_faulty = hweight32(xbar_edge_iso_mask);
2758
2759         /*
2760          * check for error condition in which number of binning candidates
2761          * is higher than the maximum supported by the driver
2762          */
2763         if (num_faulty > MAX_FAULTY_XBARS) {
2764                 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2765                                                                         MAX_FAULTY_XBARS);
2766                 return -EINVAL;
2767         }
2768
2769         seq = __ffs((unsigned long)xbar_edge_iso_mask);
2770
2771         /* set binning constraints */
2772         prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2773         prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2774
2775         return 0;
2776 }
2777
2778 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2779 {
2780         int rc;
2781
2782         /*
2783          * mark all clusters as good, each component will "fail" cluster
2784          * based on eFuse/user values.
2785          * If more than single cluster is faulty- the chip is unusable
2786          */
2787         hdev->asic_prop.faulty_dram_cluster_map = 0;
2788
2789         gaudi2_set_dram_binning_masks(hdev);
2790
2791         rc = gaudi2_set_edma_binning_masks(hdev);
2792         if (rc)
2793                 return rc;
2794
2795         rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2796         if (rc)
2797                 return rc;
2798
2799
2800         /* always initially set to full mask */
2801         hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2802
2803         return 0;
2804 }
2805
2806 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2807 {
2808         struct asic_fixed_properties *prop = &hdev->asic_prop;
2809         int rc;
2810
2811         rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2812         if (rc)
2813                 return rc;
2814
2815         /* if we have DRAM binning reported by FW we should perform cluster config  */
2816         if (prop->faulty_dram_cluster_map) {
2817                 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2818
2819                 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2820         }
2821
2822         return 0;
2823 }
2824
2825 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2826 {
2827         int rc;
2828
2829         rc = gaudi2_set_cluster_binning_masks(hdev);
2830         if (rc)
2831                 return rc;
2832
2833         rc = gaudi2_set_tpc_binning_masks(hdev);
2834         if (rc)
2835                 return rc;
2836
2837         rc = gaudi2_set_dec_binning_masks(hdev);
2838         if (rc)
2839                 return rc;
2840
2841         return 0;
2842 }
2843
2844 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2845 {
2846         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2847         struct asic_fixed_properties *prop = &hdev->asic_prop;
2848         long max_power;
2849         u64 dram_size;
2850         int rc;
2851
2852         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2853                 return 0;
2854
2855         /* No point of asking this information again when not doing hard reset, as the device
2856          * CPU hasn't been reset
2857          */
2858         if (hdev->reset_info.in_compute_reset)
2859                 return 0;
2860
2861         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2862                                                                                 mmCPU_BOOT_ERR1);
2863         if (rc)
2864                 return rc;
2865
2866         dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2867         if (dram_size) {
2868                 /* we can have wither 5 or 6 HBMs. other values are invalid */
2869
2870                 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2871                                         (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2872                         dev_err(hdev->dev,
2873                                 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2874                                 dram_size, prop->dram_size);
2875                         dram_size = prop->dram_size;
2876                 }
2877
2878                 prop->dram_size = dram_size;
2879                 prop->dram_end_address = prop->dram_base_address + dram_size;
2880         }
2881
2882         if (!strlen(prop->cpucp_info.card_name))
2883                 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2884
2885         /* Overwrite binning masks with the actual binning values from F/W */
2886         hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2887         hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2888         hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2889         hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2890
2891         dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2892                         hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2893                         hdev->decoder_binning);
2894
2895         /*
2896          * at this point the DRAM parameters need to be updated according to data obtained
2897          * from the FW
2898          */
2899         rc = hdev->asic_funcs->set_dram_properties(hdev);
2900         if (rc)
2901                 return rc;
2902
2903         rc = hdev->asic_funcs->set_binning_masks(hdev);
2904         if (rc)
2905                 return rc;
2906
2907         max_power = hl_fw_get_max_power(hdev);
2908         if (max_power < 0)
2909                 return max_power;
2910
2911         prop->max_power_default = (u64) max_power;
2912
2913         return 0;
2914 }
2915
2916 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2917 {
2918         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2919         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2920         int rc;
2921
2922         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2923                 return 0;
2924
2925         rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2926         if (rc)
2927                 return rc;
2928
2929         hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2930
2931         return 0;
2932 }
2933
2934 static int gaudi2_early_init(struct hl_device *hdev)
2935 {
2936         struct asic_fixed_properties *prop = &hdev->asic_prop;
2937         struct pci_dev *pdev = hdev->pdev;
2938         resource_size_t pci_bar_size;
2939         int rc;
2940
2941         rc = gaudi2_set_fixed_properties(hdev);
2942         if (rc)
2943                 return rc;
2944
2945         /* Check BAR sizes */
2946         pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2947
2948         if (pci_bar_size != CFG_BAR_SIZE) {
2949                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2950                         SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2951                 rc = -ENODEV;
2952                 goto free_queue_props;
2953         }
2954
2955         pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2956         if (pci_bar_size != MSIX_BAR_SIZE) {
2957                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2958                         MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2959                 rc = -ENODEV;
2960                 goto free_queue_props;
2961         }
2962
2963         prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2964         hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2965
2966         /*
2967          * Only in pldm driver config iATU
2968          */
2969         if (hdev->pldm)
2970                 hdev->asic_prop.iatu_done_by_fw = false;
2971         else
2972                 hdev->asic_prop.iatu_done_by_fw = true;
2973
2974         rc = hl_pci_init(hdev);
2975         if (rc)
2976                 goto free_queue_props;
2977
2978         /* Before continuing in the initialization, we need to read the preboot
2979          * version to determine whether we run with a security-enabled firmware
2980          */
2981         rc = hl_fw_read_preboot_status(hdev);
2982         if (rc) {
2983                 if (hdev->reset_on_preboot_fail)
2984                         /* we are already on failure flow, so don't check if hw_fini fails. */
2985                         hdev->asic_funcs->hw_fini(hdev, true, false);
2986                 goto pci_fini;
2987         }
2988
2989         if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2990                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2991                 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2992                 if (rc) {
2993                         dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2994                         goto pci_fini;
2995                 }
2996         }
2997
2998         return 0;
2999
3000 pci_fini:
3001         hl_pci_fini(hdev);
3002 free_queue_props:
3003         kfree(hdev->asic_prop.hw_queues_props);
3004         return rc;
3005 }
3006
3007 static int gaudi2_early_fini(struct hl_device *hdev)
3008 {
3009         kfree(hdev->asic_prop.hw_queues_props);
3010         hl_pci_fini(hdev);
3011
3012         return 0;
3013 }
3014
3015 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3016 {
3017         switch (arc_id) {
3018         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3019                 return true;
3020         default:
3021                 return false;
3022         }
3023 }
3024
3025 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3026 {
3027         switch (arc_id) {
3028         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3029                 return true;
3030         default:
3031                 return false;
3032         }
3033 }
3034
3035 static void gaudi2_init_arcs(struct hl_device *hdev)
3036 {
3037         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3038         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3039         u64 arc_id;
3040         u32 i;
3041
3042         for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3043                 if (gaudi2_is_arc_enabled(hdev, i))
3044                         continue;
3045
3046                 gaudi2_set_arc_id_cap(hdev, i);
3047         }
3048
3049         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3050                 if (!gaudi2_is_queue_enabled(hdev, i))
3051                         continue;
3052
3053                 arc_id = gaudi2_queue_id_to_arc_id[i];
3054                 if (gaudi2_is_arc_enabled(hdev, arc_id))
3055                         continue;
3056
3057                 if (gaudi2_is_arc_nic_owned(arc_id) &&
3058                                 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3059                         continue;
3060
3061                 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3062                                                         BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3063                         continue;
3064
3065                 gaudi2_set_arc_id_cap(hdev, arc_id);
3066         }
3067
3068         /* Fetch ARC scratchpad address */
3069         hdev->asic_prop.engine_core_interrupt_reg_addr =
3070                 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3071 }
3072
3073 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3074 {
3075         u32 reg_base, reg_val;
3076         int rc;
3077
3078         switch (cpu_id) {
3079         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3080                 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3081                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3082                                                 ARC_DCCM_BLOCK_SIZE * 2, true);
3083                 if (rc)
3084                         return rc;
3085                 break;
3086         case CPU_ID_SCHED_ARC4:
3087         case CPU_ID_SCHED_ARC5:
3088         case CPU_ID_MME_QMAN_ARC0:
3089         case CPU_ID_MME_QMAN_ARC1:
3090                 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3091
3092                 /* Scrub lower DCCM block */
3093                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3094                                                 ARC_DCCM_BLOCK_SIZE, true);
3095                 if (rc)
3096                         return rc;
3097
3098                 /* Switch to upper DCCM block */
3099                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3100                 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3101
3102                 /* Scrub upper DCCM block */
3103                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3104                                                 ARC_DCCM_BLOCK_SIZE, true);
3105                 if (rc)
3106                         return rc;
3107
3108                 /* Switch to lower DCCM block */
3109                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3110                 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3111                 break;
3112         default:
3113                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3114                                                 ARC_DCCM_BLOCK_SIZE, true);
3115                 if (rc)
3116                         return rc;
3117         }
3118
3119         return 0;
3120 }
3121
3122 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3123 {
3124         u16 arc_id;
3125         int rc;
3126
3127         for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3128                 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3129                         continue;
3130
3131                 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3132                 if (rc)
3133                         return rc;
3134         }
3135
3136         return 0;
3137 }
3138
3139 static int gaudi2_late_init(struct hl_device *hdev)
3140 {
3141         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3142         int rc;
3143
3144         hdev->asic_prop.supports_advanced_cpucp_rc = true;
3145
3146         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3147                                         gaudi2->virt_msix_db_dma_addr);
3148         if (rc) {
3149                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3150                 return rc;
3151         }
3152
3153         rc = gaudi2_fetch_psoc_frequency(hdev);
3154         if (rc) {
3155                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3156                 goto disable_pci_access;
3157         }
3158
3159         gaudi2_init_arcs(hdev);
3160
3161         rc = gaudi2_scrub_arcs_dccm(hdev);
3162         if (rc) {
3163                 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3164                 goto disable_pci_access;
3165         }
3166
3167         gaudi2_init_security(hdev);
3168
3169         return 0;
3170
3171 disable_pci_access:
3172         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3173
3174         return rc;
3175 }
3176
3177 static void gaudi2_late_fini(struct hl_device *hdev)
3178 {
3179         hl_hwmon_release_resources(hdev);
3180 }
3181
3182 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3183 {
3184         struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3185
3186         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3187         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3188         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3189         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3190         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3191         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3192         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3193         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3194         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3195         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3196 }
3197
3198 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3199 {
3200         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201         struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3202         u32 block_size, umr_start_idx, num_umr_blocks;
3203         int i;
3204
3205         for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3206                 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3207                         block_size = ARC_DCCM_BLOCK_SIZE * 2;
3208                 else
3209                         block_size = ARC_DCCM_BLOCK_SIZE;
3210
3211                 blocks[i].address = gaudi2_arc_dccm_bases[i];
3212                 blocks[i].size = block_size;
3213         }
3214
3215         blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3216         blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3217
3218         blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3219         blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3220
3221         blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3222         blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3223
3224         blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3225         blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3226
3227         blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3228         blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3229
3230         blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3231         blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3232
3233         blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3234         blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3235
3236         blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3237         blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3238
3239         umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3240         num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3241         for (i = 0 ; i < num_umr_blocks ; i++) {
3242                 u8 nic_id, umr_block_id;
3243
3244                 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3245                 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3246
3247                 blocks[umr_start_idx + i].address =
3248                         mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3249                         (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3250                         (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3251                         umr_block_id * NIC_UMR_OFFSET;
3252                 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3253         }
3254
3255         /* Expose decoder HW configuration block to user */
3256         gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3257
3258         for (i = 1; i < NUM_OF_DCORES; ++i) {
3259                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3260                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3261
3262                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3263                                                 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3264
3265                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3266                                                 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3267         }
3268 }
3269
3270 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3271 {
3272         dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3273         void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3274         int i, j, rc = 0;
3275
3276         /* The device ARC works with 32-bits addresses, and because there is a single HW register
3277          * that holds the extension bits (49..28), these bits must be identical in all the allocated
3278          * range.
3279          */
3280
3281         for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3282                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3283                                                         &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3284                 if (!virt_addr_arr[i]) {
3285                         rc = -ENOMEM;
3286                         goto free_dma_mem_arr;
3287                 }
3288
3289                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3290                 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3291                         break;
3292         }
3293
3294         if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3295                 dev_err(hdev->dev,
3296                         "MSB of ARC accessible DMA memory are not identical in all range\n");
3297                 rc = -EFAULT;
3298                 goto free_dma_mem_arr;
3299         }
3300
3301         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3302         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3303
3304 free_dma_mem_arr:
3305         for (j = 0 ; j < i ; j++)
3306                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3307                                                 dma_addr_arr[j]);
3308
3309         return rc;
3310 }
3311
3312 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3313 {
3314         struct asic_fixed_properties *prop = &hdev->asic_prop;
3315         struct pci_mem_region *region;
3316
3317         /* CFG */
3318         region = &hdev->pci_mem_region[PCI_REGION_CFG];
3319         region->region_base = CFG_BASE;
3320         region->region_size = CFG_SIZE;
3321         region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3322         region->bar_size = CFG_BAR_SIZE;
3323         region->bar_id = SRAM_CFG_BAR_ID;
3324         region->used = 1;
3325
3326         /* SRAM */
3327         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3328         region->region_base = SRAM_BASE_ADDR;
3329         region->region_size = SRAM_SIZE;
3330         region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3331         region->bar_size = CFG_BAR_SIZE;
3332         region->bar_id = SRAM_CFG_BAR_ID;
3333         region->used = 1;
3334
3335         /* DRAM */
3336         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3337         region->region_base = DRAM_PHYS_BASE;
3338         region->region_size = hdev->asic_prop.dram_size;
3339         region->offset_in_bar = 0;
3340         region->bar_size = prop->dram_pci_bar_size;
3341         region->bar_id = DRAM_BAR_ID;
3342         region->used = 1;
3343 }
3344
3345 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3346 {
3347         struct asic_fixed_properties *prop = &hdev->asic_prop;
3348         int i, j, k;
3349
3350         /* Initialize TPC interrupt */
3351         HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3352
3353         /* Initialize unexpected error interrupt */
3354         HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3355                                                 HL_USR_INTERRUPT_UNEXPECTED);
3356
3357         /* Initialize common user CQ interrupt */
3358         HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3359                                 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3360
3361         /* Initialize common decoder interrupt */
3362         HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3363                                 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3364
3365         /* User interrupts structure holds both decoder and user interrupts from various engines.
3366          * We first initialize the decoder interrupts and then we add the user interrupts.
3367          * The only limitation is that the last decoder interrupt id must be smaller
3368          * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3369          */
3370
3371         /* Initialize decoder interrupts, expose only normal interrupts,
3372          * error interrupts to be handled by driver
3373          */
3374         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3375                                                                                 i += 2, j++)
3376                 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3377                                                 HL_USR_INTERRUPT_DECODER);
3378
3379         for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3380                 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3381 }
3382
3383 static inline int gaudi2_get_non_zero_random_int(void)
3384 {
3385         int rand = get_random_u32();
3386
3387         return rand ? rand : 1;
3388 }
3389
3390 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3391 {
3392         struct asic_fixed_properties *prop = &hdev->asic_prop;
3393         struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3394                         &prop->skip_special_blocks_cfg;
3395
3396         kfree(prop->special_blocks);
3397         kfree(skip_special_blocks_cfg->block_types);
3398         kfree(skip_special_blocks_cfg->block_ranges);
3399 }
3400
3401 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3402 {
3403         gaudi2_special_blocks_free(hdev);
3404 }
3405
3406 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3407                 struct hl_special_blocks_cfg *special_blocks_cfg,
3408                 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3409 {
3410         return false;
3411 }
3412
3413 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3414 {
3415         struct asic_fixed_properties *prop = &hdev->asic_prop;
3416         int i, rc;
3417
3418         /* Configure Special blocks */
3419         prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3420         prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3421         prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3422                         sizeof(*prop->special_blocks), GFP_KERNEL);
3423         if (!prop->special_blocks)
3424                 return -ENOMEM;
3425
3426         for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3427                 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3428                                 sizeof(*prop->special_blocks));
3429
3430         /* Configure when to skip Special blocks */
3431         memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3432         prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3433
3434         if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3435                 prop->skip_special_blocks_cfg.block_types =
3436                                 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3437                                         sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3438                 if (!prop->skip_special_blocks_cfg.block_types) {
3439                         rc = -ENOMEM;
3440                         goto free_special_blocks;
3441                 }
3442
3443                 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3444                                 sizeof(gaudi2_iterator_skip_block_types));
3445
3446                 prop->skip_special_blocks_cfg.block_types_len =
3447                                         ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3448         }
3449
3450         if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3451                 prop->skip_special_blocks_cfg.block_ranges =
3452                                 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3453                                         sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3454                 if (!prop->skip_special_blocks_cfg.block_ranges) {
3455                         rc = -ENOMEM;
3456                         goto free_skip_special_blocks_types;
3457                 }
3458
3459                 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3460                         memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3461                                         &gaudi2_iterator_skip_block_ranges[i],
3462                                         sizeof(struct range));
3463
3464                 prop->skip_special_blocks_cfg.block_ranges_len =
3465                                         ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3466         }
3467
3468         return 0;
3469
3470 free_skip_special_blocks_types:
3471         kfree(prop->skip_special_blocks_cfg.block_types);
3472 free_special_blocks:
3473         kfree(prop->special_blocks);
3474
3475         return rc;
3476 }
3477
3478 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3479 {
3480         return gaudi2_special_blocks_config(hdev);
3481 }
3482
3483 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3484 {
3485         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3486         struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3487         int i;
3488
3489         for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3490                 /* bail-out if this is an allocation failure point */
3491                 if (!msg_info[i].kern_addr)
3492                         break;
3493
3494                 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3495                 msg_info[i].kern_addr = NULL;
3496         }
3497 }
3498
3499 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3500 {
3501         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3502         struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3503         int i, rc;
3504
3505         /* allocate a message-short buf for each Q we intend to test */
3506         for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3507                 msg_info[i].kern_addr =
3508                         (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3509                                                         GFP_KERNEL, &msg_info[i].dma_addr);
3510                 if (!msg_info[i].kern_addr) {
3511                         dev_err(hdev->dev,
3512                                 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3513                         rc = -ENOMEM;
3514                         goto err_exit;
3515                 }
3516         }
3517
3518         return 0;
3519
3520 err_exit:
3521         gaudi2_test_queues_msgs_free(hdev);
3522         return rc;
3523 }
3524
3525 static int gaudi2_sw_init(struct hl_device *hdev)
3526 {
3527         struct asic_fixed_properties *prop = &hdev->asic_prop;
3528         struct gaudi2_device *gaudi2;
3529         int i, rc;
3530
3531         /* Allocate device structure */
3532         gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3533         if (!gaudi2)
3534                 return -ENOMEM;
3535
3536         for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3537                 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3538                         continue;
3539
3540                 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3541                         dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3542                                 GAUDI2_EVENT_SIZE);
3543                         rc = -EINVAL;
3544                         goto free_gaudi2_device;
3545                 }
3546
3547                 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3548         }
3549
3550         for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3551                 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3552
3553         gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3554
3555         hdev->asic_specific = gaudi2;
3556
3557         /* Create DMA pool for small allocations.
3558          * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3559          * PI/CI registers allocated from this pool have this restriction
3560          */
3561         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3562                                         GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3563         if (!hdev->dma_pool) {
3564                 dev_err(hdev->dev, "failed to create DMA pool\n");
3565                 rc = -ENOMEM;
3566                 goto free_gaudi2_device;
3567         }
3568
3569         rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3570         if (rc)
3571                 goto free_dma_pool;
3572
3573         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3574         if (!hdev->cpu_accessible_dma_pool) {
3575                 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3576                 rc = -ENOMEM;
3577                 goto free_cpu_dma_mem;
3578         }
3579
3580         rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3581                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3582         if (rc) {
3583                 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3584                 rc = -EFAULT;
3585                 goto free_cpu_accessible_dma_pool;
3586         }
3587
3588         gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3589                                                                 &gaudi2->virt_msix_db_dma_addr);
3590         if (!gaudi2->virt_msix_db_cpu_addr) {
3591                 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3592                 rc = -ENOMEM;
3593                 goto free_cpu_accessible_dma_pool;
3594         }
3595
3596         spin_lock_init(&gaudi2->hw_queues_lock);
3597
3598         gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3599                                                         &gaudi2->scratchpad_bus_address,
3600                                                         GFP_KERNEL | __GFP_ZERO);
3601         if (!gaudi2->scratchpad_kernel_address) {
3602                 rc = -ENOMEM;
3603                 goto free_virt_msix_db_mem;
3604         }
3605
3606         gaudi2_user_mapped_blocks_init(hdev);
3607
3608         /* Initialize user interrupts */
3609         gaudi2_user_interrupt_setup(hdev);
3610
3611         hdev->supports_coresight = true;
3612         hdev->supports_sync_stream = true;
3613         hdev->supports_cb_mapping = true;
3614         hdev->supports_wait_for_multi_cs = false;
3615
3616         prop->supports_compute_reset = true;
3617
3618         hdev->asic_funcs->set_pci_memory_regions(hdev);
3619
3620         rc = gaudi2_special_blocks_iterator_config(hdev);
3621         if (rc)
3622                 goto free_scratchpad_mem;
3623
3624         rc = gaudi2_test_queues_msgs_alloc(hdev);
3625         if (rc)
3626                 goto special_blocks_free;
3627
3628         return 0;
3629
3630 special_blocks_free:
3631         gaudi2_special_blocks_iterator_free(hdev);
3632 free_scratchpad_mem:
3633         hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3634                                   gaudi2->scratchpad_bus_address);
3635 free_virt_msix_db_mem:
3636         hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3637 free_cpu_accessible_dma_pool:
3638         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3639 free_cpu_dma_mem:
3640         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3641                                         hdev->cpu_accessible_dma_address);
3642 free_dma_pool:
3643         dma_pool_destroy(hdev->dma_pool);
3644 free_gaudi2_device:
3645         kfree(gaudi2);
3646         return rc;
3647 }
3648
3649 static int gaudi2_sw_fini(struct hl_device *hdev)
3650 {
3651         struct asic_fixed_properties *prop = &hdev->asic_prop;
3652         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3653
3654         gaudi2_test_queues_msgs_free(hdev);
3655
3656         gaudi2_special_blocks_iterator_free(hdev);
3657
3658         hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3659
3660         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3661
3662         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3663                                                 hdev->cpu_accessible_dma_address);
3664
3665         hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3666                                         gaudi2->scratchpad_bus_address);
3667
3668         dma_pool_destroy(hdev->dma_pool);
3669
3670         kfree(gaudi2);
3671
3672         return 0;
3673 }
3674
3675 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3676 {
3677         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3678                                                 QM_GLBL_CFG1_CQF_STOP |
3679                                                 QM_GLBL_CFG1_CP_STOP);
3680
3681         /* stop also the ARC */
3682         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3683 }
3684
3685 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3686 {
3687         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3688                                                 QM_GLBL_CFG1_CQF_FLUSH |
3689                                                 QM_GLBL_CFG1_CP_FLUSH);
3690 }
3691
3692 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3693 {
3694         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3695 }
3696
3697 /**
3698  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3699  *
3700  * @hdev: pointer to the habanalabs device structure
3701  * @queue_id: queue to clear fence counters to
3702  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3703  *              getting stuck on any fence value. otherwise set all fence
3704  *              counters to 0 (standard clear of fence counters)
3705  */
3706 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3707                                                 bool skip_fence)
3708 {
3709         u32 size, reg_base;
3710         u32 addr, val;
3711
3712         reg_base = gaudi2_qm_blocks_bases[queue_id];
3713
3714         addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3715         size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3716
3717         /*
3718          * in case we want to make sure that QM that is stuck on a fence will
3719          * be released we should set the fence counter to a higher value that
3720          * the value the QM waiting for. to comply with any fence counter of
3721          * any value we set maximum fence value to all counters
3722          */
3723         val = skip_fence ? U32_MAX : 0;
3724         gaudi2_memset_device_lbw(hdev, addr, size, val);
3725 }
3726
3727 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3728 {
3729         u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3730
3731         gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3732         gaudi2_flush_qman_common(hdev, reg_base);
3733         gaudi2_flush_qman_arc_common(hdev, reg_base);
3734 }
3735
3736 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3737 {
3738         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3739         int dcore, inst;
3740
3741         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3742                 goto stop_edma_qmans;
3743
3744         /* Stop CPs of PDMA QMANs */
3745         gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3746         gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3747
3748 stop_edma_qmans:
3749         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3750                 return;
3751
3752         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3753                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3754                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3755                         u32 qm_base;
3756
3757                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3758                                 continue;
3759
3760                         qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3761                                         inst * DCORE_EDMA_OFFSET;
3762
3763                         /* Stop CPs of EDMA QMANs */
3764                         gaudi2_stop_qman_common(hdev, qm_base);
3765                 }
3766         }
3767 }
3768
3769 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3770 {
3771         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3772         u32 offset, i;
3773
3774         offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3775
3776         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3777                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3778                         continue;
3779
3780                 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3781         }
3782 }
3783
3784 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3785 {
3786         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3787         u32 reg_base;
3788         int i;
3789
3790         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3791                 return;
3792
3793         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3794                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3795                         continue;
3796
3797                 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3798                 gaudi2_stop_qman_common(hdev, reg_base);
3799         }
3800 }
3801
3802 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3803 {
3804         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3805         u32 reg_base;
3806         int i;
3807
3808         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3809                 return;
3810
3811         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3812                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3813                         continue;
3814
3815                 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3816                 gaudi2_stop_qman_common(hdev, reg_base);
3817         }
3818 }
3819
3820 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3821 {
3822         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3823         u32 reg_base, queue_id;
3824         int i;
3825
3826         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3827                 return;
3828
3829         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3830
3831         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3832                 if (!(hdev->nic_ports_mask & BIT(i)))
3833                         continue;
3834
3835                 reg_base = gaudi2_qm_blocks_bases[queue_id];
3836                 gaudi2_stop_qman_common(hdev, reg_base);
3837         }
3838 }
3839
3840 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3841 {
3842         u32 reg_val;
3843
3844         reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3845         WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3846 }
3847
3848 static void gaudi2_dma_stall(struct hl_device *hdev)
3849 {
3850         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3851         int dcore, inst;
3852
3853         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3854                 goto stall_edma;
3855
3856         gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3857         gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3858
3859 stall_edma:
3860         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3861                 return;
3862
3863         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3864                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3865                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3866                         u32 core_base;
3867
3868                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3869                                 continue;
3870
3871                         core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3872                                         inst * DCORE_EDMA_OFFSET;
3873
3874                         /* Stall CPs of EDMA QMANs */
3875                         gaudi2_stall_dma_common(hdev, core_base);
3876                 }
3877         }
3878 }
3879
3880 static void gaudi2_mme_stall(struct hl_device *hdev)
3881 {
3882         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3883         u32 offset, i;
3884
3885         offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3886
3887         for (i = 0 ; i < NUM_OF_DCORES ; i++)
3888                 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3889                         WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3890 }
3891
3892 static void gaudi2_tpc_stall(struct hl_device *hdev)
3893 {
3894         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3895         u32 reg_base;
3896         int i;
3897
3898         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3899                 return;
3900
3901         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3902                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3903                         continue;
3904
3905                 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3906                 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3907         }
3908 }
3909
3910 static void gaudi2_rotator_stall(struct hl_device *hdev)
3911 {
3912         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3913         u32 reg_val;
3914         int i;
3915
3916         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3917                 return;
3918
3919         reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3920                         FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3921                         FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3922
3923         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3924                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3925                         continue;
3926
3927                 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3928         }
3929 }
3930
3931 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3932 {
3933         WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3934 }
3935
3936 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3937 {
3938         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3939         int dcore, inst;
3940
3941         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3942                 goto stop_edma_qmans;
3943
3944         gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3945         gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3946
3947 stop_edma_qmans:
3948         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3949                 return;
3950
3951         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3952                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3953                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3954                         u32 qm_base;
3955
3956                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3957                                 continue;
3958
3959                         qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3960                                         inst * DCORE_EDMA_OFFSET;
3961
3962                         /* Disable CPs of EDMA QMANs */
3963                         gaudi2_disable_qman_common(hdev, qm_base);
3964                 }
3965         }
3966 }
3967
3968 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3969 {
3970         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3971         u32 offset, i;
3972
3973         offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3974
3975         for (i = 0 ; i < NUM_OF_DCORES ; i++)
3976                 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3977                         gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3978 }
3979
3980 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3981 {
3982         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3983         u32 reg_base;
3984         int i;
3985
3986         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3987                 return;
3988
3989         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3990                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3991                         continue;
3992
3993                 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3994                 gaudi2_disable_qman_common(hdev, reg_base);
3995         }
3996 }
3997
3998 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3999 {
4000         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4001         u32 reg_base;
4002         int i;
4003
4004         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4005                 return;
4006
4007         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4008                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4009                         continue;
4010
4011                 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4012                 gaudi2_disable_qman_common(hdev, reg_base);
4013         }
4014 }
4015
4016 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4017 {
4018         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4019         u32 reg_base, queue_id;
4020         int i;
4021
4022         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4023                 return;
4024
4025         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4026
4027         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4028                 if (!(hdev->nic_ports_mask & BIT(i)))
4029                         continue;
4030
4031                 reg_base = gaudi2_qm_blocks_bases[queue_id];
4032                 gaudi2_disable_qman_common(hdev, reg_base);
4033         }
4034 }
4035
4036 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4037 {
4038         /* Disable the timestamp counter */
4039         WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4040
4041         /* Zero the lower/upper parts of the 64-bit counter */
4042         WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4043         WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4044
4045         /* Enable the counter */
4046         WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4047 }
4048
4049 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4050 {
4051         /* Disable the timestamp counter */
4052         WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4053 }
4054
4055 static const char *gaudi2_irq_name(u16 irq_number)
4056 {
4057         switch (irq_number) {
4058         case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4059                 return "gaudi2 cpu eq";
4060         case GAUDI2_IRQ_NUM_COMPLETION:
4061                 return "gaudi2 completion";
4062         case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4063                 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4064         case GAUDI2_IRQ_NUM_TPC_ASSERT:
4065                 return "gaudi2 tpc assert";
4066         case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4067                 return "gaudi2 unexpected error";
4068         case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4069                 return "gaudi2 user completion";
4070         default:
4071                 return "invalid";
4072         }
4073 }
4074
4075 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4076 {
4077         int i, irq, relative_idx;
4078         struct hl_dec *dec;
4079
4080         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4081                 irq = pci_irq_vector(hdev->pdev, i);
4082                 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4083
4084                 dec = hdev->dec + relative_idx / 2;
4085
4086                 /* We pass different structures depending on the irq handler. For the abnormal
4087                  * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4088                  * user_interrupt entry
4089                  */
4090                 free_irq(irq, ((relative_idx % 2) ?
4091                                 (void *) dec :
4092                                 (void *) &hdev->user_interrupt[dec->core_id]));
4093         }
4094 }
4095
4096 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4097 {
4098         int rc, i, irq_init_cnt, irq, relative_idx;
4099         struct hl_dec *dec;
4100
4101         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4102                         i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4103                         i++, irq_init_cnt++) {
4104
4105                 irq = pci_irq_vector(hdev->pdev, i);
4106                 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4107
4108                 /* We pass different structures depending on the irq handler. For the abnormal
4109                  * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4110                  * user_interrupt entry
4111                  *
4112                  * TODO: change the dec abnrm to threaded irq
4113                  */
4114
4115                 dec = hdev->dec + relative_idx / 2;
4116                 if (relative_idx % 2) {
4117                         rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4118                                                 gaudi2_irq_name(i), (void *) dec);
4119                 } else {
4120                         rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4121                                         hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4122                                         gaudi2_irq_name(i),
4123                                         (void *) &hdev->user_interrupt[dec->core_id]);
4124                 }
4125
4126                 if (rc) {
4127                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4128                         goto free_dec_irqs;
4129                 }
4130         }
4131
4132         return 0;
4133
4134 free_dec_irqs:
4135         gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4136         return rc;
4137 }
4138
4139 static int gaudi2_enable_msix(struct hl_device *hdev)
4140 {
4141         struct asic_fixed_properties *prop = &hdev->asic_prop;
4142         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4143         int rc, irq, i, j, user_irq_init_cnt;
4144         struct hl_cq *cq;
4145
4146         if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4147                 return 0;
4148
4149         rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4150                                         PCI_IRQ_MSIX);
4151         if (rc < 0) {
4152                 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4153                         GAUDI2_MSIX_ENTRIES, rc);
4154                 return rc;
4155         }
4156
4157         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4158         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4159         rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4160         if (rc) {
4161                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4162                 goto free_irq_vectors;
4163         }
4164
4165         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4166         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4167                         &hdev->event_queue);
4168         if (rc) {
4169                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4170                 goto free_completion_irq;
4171         }
4172
4173         rc = gaudi2_dec_enable_msix(hdev);
4174         if (rc) {
4175                 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4176                 goto free_event_irq;
4177         }
4178
4179         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4180         rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4181                         hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4182                         gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4183         if (rc) {
4184                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4185                 goto free_dec_irq;
4186         }
4187
4188         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4189         rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4190                         gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4191                                         &hdev->unexpected_error_interrupt);
4192         if (rc) {
4193                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4194                 goto free_tpc_irq;
4195         }
4196
4197         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4198                         user_irq_init_cnt < prop->user_interrupt_count;
4199                         i++, j++, user_irq_init_cnt++) {
4200
4201                 irq = pci_irq_vector(hdev->pdev, i);
4202                 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4203                                                 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4204                                                 gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4205
4206                 if (rc) {
4207                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4208                         goto free_user_irq;
4209                 }
4210         }
4211
4212         gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4213
4214         return 0;
4215
4216 free_user_irq:
4217         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4218                         i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4219
4220                 irq = pci_irq_vector(hdev->pdev, i);
4221                 free_irq(irq, &hdev->user_interrupt[j]);
4222         }
4223         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4224         free_irq(irq, &hdev->unexpected_error_interrupt);
4225 free_tpc_irq:
4226         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4227         free_irq(irq, &hdev->tpc_interrupt);
4228 free_dec_irq:
4229         gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4230 free_event_irq:
4231         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4232         free_irq(irq, cq);
4233
4234 free_completion_irq:
4235         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4236         free_irq(irq, cq);
4237
4238 free_irq_vectors:
4239         pci_free_irq_vectors(hdev->pdev);
4240
4241         return rc;
4242 }
4243
4244 static void gaudi2_sync_irqs(struct hl_device *hdev)
4245 {
4246         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4247         int i, j;
4248         int irq;
4249
4250         if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4251                 return;
4252
4253         /* Wait for all pending IRQs to be finished */
4254         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4255
4256         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4257                 irq = pci_irq_vector(hdev->pdev, i);
4258                 synchronize_irq(irq);
4259         }
4260
4261         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4262         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4263
4264         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4265                                                                                 i++, j++) {
4266                 irq = pci_irq_vector(hdev->pdev, i);
4267                 synchronize_irq(irq);
4268         }
4269
4270         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4271 }
4272
4273 static void gaudi2_disable_msix(struct hl_device *hdev)
4274 {
4275         struct asic_fixed_properties *prop = &hdev->asic_prop;
4276         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4277         struct hl_cq *cq;
4278         int irq, i, j, k;
4279
4280         if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4281                 return;
4282
4283         gaudi2_sync_irqs(hdev);
4284
4285         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4286         free_irq(irq, &hdev->event_queue);
4287
4288         gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4289
4290         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4291         free_irq(irq, &hdev->tpc_interrupt);
4292
4293         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4294         free_irq(irq, &hdev->unexpected_error_interrupt);
4295
4296         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4297                         k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4298
4299                 irq = pci_irq_vector(hdev->pdev, i);
4300                 free_irq(irq, &hdev->user_interrupt[j]);
4301         }
4302
4303         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4304         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4305         free_irq(irq, cq);
4306
4307         pci_free_irq_vectors(hdev->pdev);
4308
4309         gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4310 }
4311
4312 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4313 {
4314         u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4315         u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4316         u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4317         int rc;
4318
4319         if (hdev->pldm)
4320                 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4321         else
4322                 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4323
4324         for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4325                 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4326                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4327                         continue;
4328
4329                 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4330
4331                 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4332
4333                 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4334
4335                 /* Wait till all traffic from decoder stops
4336                  * before apply core reset.
4337                  */
4338                 rc = hl_poll_timeout(
4339                                 hdev,
4340                                 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4341                                 graceful,
4342                                 (graceful & graceful_pend_mask),
4343                                 100,
4344                                 timeout_usec);
4345                 if (rc)
4346                         dev_err(hdev->dev,
4347                                 "Failed to stop traffic from DCORE%d Decoder %d\n",
4348                                 dcore_id, dec_id);
4349         }
4350 }
4351
4352 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4353 {
4354         u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4355         u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4356         u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4357         int rc;
4358
4359         if (hdev->pldm)
4360                 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4361         else
4362                 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4363
4364         for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4365                 dec_bit = PCIE_DEC_SHIFT + dec_id;
4366                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4367                         continue;
4368
4369                 offset = dec_id * PCIE_VDEC_OFFSET;
4370
4371                 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4372
4373                 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4374
4375                 /* Wait till all traffic from decoder stops
4376                  * before apply core reset.
4377                  */
4378                 rc = hl_poll_timeout(
4379                                 hdev,
4380                                 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4381                                 graceful,
4382                                 (graceful & graceful_pend_mask),
4383                                 100,
4384                                 timeout_usec);
4385                 if (rc)
4386                         dev_err(hdev->dev,
4387                                 "Failed to stop traffic from PCIe Decoder %d\n",
4388                                 dec_id);
4389         }
4390 }
4391
4392 static void gaudi2_stop_dec(struct hl_device *hdev)
4393 {
4394         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4395         int dcore_id;
4396
4397         if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4398                 return;
4399
4400         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4401                 gaudi2_stop_dcore_dec(hdev, dcore_id);
4402
4403         gaudi2_stop_pcie_dec(hdev);
4404 }
4405
4406 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4407 {
4408         u32 reg_base, reg_val;
4409
4410         reg_base = gaudi2_arc_blocks_bases[cpu_id];
4411         if (run_mode == HL_ENGINE_CORE_RUN)
4412                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4413         else
4414                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4415
4416         WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4417 }
4418
4419 static void gaudi2_halt_arcs(struct hl_device *hdev)
4420 {
4421         u16 arc_id;
4422
4423         for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4424                 if (gaudi2_is_arc_enabled(hdev, arc_id))
4425                         gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4426         }
4427 }
4428
4429 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4430 {
4431         int rc;
4432         u32 reg_base, val, ack_mask, timeout_usec = 100000;
4433
4434         if (hdev->pldm)
4435                 timeout_usec *= 100;
4436
4437         reg_base = gaudi2_arc_blocks_bases[cpu_id];
4438         if (run_mode == HL_ENGINE_CORE_RUN)
4439                 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4440         else
4441                 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4442
4443         rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4444                                 val, ((val & ack_mask) == ack_mask),
4445                                 1000, timeout_usec);
4446
4447         if (!rc) {
4448                 /* Clear */
4449                 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4450                 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4451         }
4452
4453         return rc;
4454 }
4455
4456 static void gaudi2_reset_arcs(struct hl_device *hdev)
4457 {
4458         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4459         u16 arc_id;
4460
4461         if (!gaudi2)
4462                 return;
4463
4464         for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4465                 if (gaudi2_is_arc_enabled(hdev, arc_id))
4466                         gaudi2_clr_arc_id_cap(hdev, arc_id);
4467 }
4468
4469 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4470 {
4471         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4472         u32 queue_id;
4473         int i;
4474
4475         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4476                 return;
4477
4478         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4479
4480         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4481                 if (!(hdev->nic_ports_mask & BIT(i)))
4482                         continue;
4483
4484                 gaudi2_qman_manual_flush_common(hdev, queue_id);
4485         }
4486 }
4487
4488 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4489                                         u32 num_cores, u32 core_command)
4490 {
4491         int i, rc;
4492
4493         for (i = 0 ; i < num_cores ; i++) {
4494                 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4495                         gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4496         }
4497
4498         for (i = 0 ; i < num_cores ; i++) {
4499                 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4500                         rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4501
4502                         if (rc) {
4503                                 dev_err(hdev->dev, "failed to %s arc: %d\n",
4504                                         (core_command == HL_ENGINE_CORE_HALT) ?
4505                                         "HALT" : "RUN", core_ids[i]);
4506                                 return -1;
4507                         }
4508                 }
4509         }
4510
4511         return 0;
4512 }
4513
4514 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4515 {
4516         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4517         u32 reg_base, reg_addr, reg_val, tpc_id;
4518
4519         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4520                 return 0;
4521
4522         tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4523         if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4524                 return 0;
4525
4526         reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4527         reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4528         reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4529                         !!(engine_command == HL_ENGINE_STALL));
4530         WREG32(reg_addr, reg_val);
4531
4532         if (engine_command == HL_ENGINE_RESUME) {
4533                 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4534                 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4535                 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4536         }
4537
4538         return 0;
4539 }
4540
4541 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4542 {
4543         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4544         u32 reg_base, reg_addr, reg_val, mme_id;
4545
4546         mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4547         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4548                 return 0;
4549
4550         reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4551         reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4552         reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4553                         !!(engine_command == HL_ENGINE_STALL));
4554         WREG32(reg_addr, reg_val);
4555
4556         return 0;
4557 }
4558
4559 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4560 {
4561         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4562         u32 reg_base, reg_addr, reg_val, edma_id;
4563
4564         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4565                 return 0;
4566
4567         edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4568         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4569                 return 0;
4570
4571         reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4572         reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4573         reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4574                         !!(engine_command == HL_ENGINE_STALL));
4575         WREG32(reg_addr, reg_val);
4576
4577         if (engine_command == HL_ENGINE_STALL) {
4578                 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4579                                 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4580                 WREG32(reg_addr, reg_val);
4581         }
4582
4583         return 0;
4584 }
4585
4586 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4587                 u32 *engine_ids, u32 num_engines, u32 engine_command)
4588 {
4589         int i, rc;
4590
4591         for (i = 0 ; i < num_engines ; ++i) {
4592                 switch (engine_ids[i]) {
4593                 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4594                 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4595                 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4596                 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4597                         rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4598                         if (rc)
4599                                 return rc;
4600
4601                         break;
4602                 case GAUDI2_DCORE0_ENGINE_ID_MME:
4603                 case GAUDI2_DCORE1_ENGINE_ID_MME:
4604                 case GAUDI2_DCORE2_ENGINE_ID_MME:
4605                 case GAUDI2_DCORE3_ENGINE_ID_MME:
4606                         rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4607                         if (rc)
4608                                 return rc;
4609
4610                         break;
4611                 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4612                 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4613                 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4614                 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4615                         rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4616                         if (rc)
4617                                 return rc;
4618
4619                         break;
4620                 default:
4621                         dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4622                         return -EINVAL;
4623                 }
4624         }
4625
4626         return 0;
4627 }
4628
4629 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4630                                         u32 num_engines, u32 engine_command)
4631 {
4632         switch (engine_command) {
4633         case HL_ENGINE_CORE_HALT:
4634         case HL_ENGINE_CORE_RUN:
4635                 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4636
4637         case HL_ENGINE_STALL:
4638         case HL_ENGINE_RESUME:
4639                 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4640
4641         default:
4642                 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4643                 return -EINVAL;
4644         }
4645 }
4646
4647 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4648 {
4649         u32 wait_timeout_ms;
4650
4651         if (hdev->pldm)
4652                 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4653         else
4654                 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4655
4656         if (fw_reset)
4657                 goto skip_engines;
4658
4659         gaudi2_stop_dma_qmans(hdev);
4660         gaudi2_stop_mme_qmans(hdev);
4661         gaudi2_stop_tpc_qmans(hdev);
4662         gaudi2_stop_rot_qmans(hdev);
4663         gaudi2_stop_nic_qmans(hdev);
4664         msleep(wait_timeout_ms);
4665
4666         gaudi2_halt_arcs(hdev);
4667         gaudi2_dma_stall(hdev);
4668         gaudi2_mme_stall(hdev);
4669         gaudi2_tpc_stall(hdev);
4670         gaudi2_rotator_stall(hdev);
4671
4672         msleep(wait_timeout_ms);
4673
4674         gaudi2_stop_dec(hdev);
4675
4676         /*
4677          * in case of soft reset do a manual flush for QMANs (currently called
4678          * only for NIC QMANs
4679          */
4680         if (!hard_reset)
4681                 gaudi2_nic_qmans_manual_flush(hdev);
4682
4683         gaudi2_disable_dma_qmans(hdev);
4684         gaudi2_disable_mme_qmans(hdev);
4685         gaudi2_disable_tpc_qmans(hdev);
4686         gaudi2_disable_rot_qmans(hdev);
4687         gaudi2_disable_nic_qmans(hdev);
4688         gaudi2_disable_timestamp(hdev);
4689
4690 skip_engines:
4691         if (hard_reset) {
4692                 gaudi2_disable_msix(hdev);
4693                 return;
4694         }
4695
4696         gaudi2_sync_irqs(hdev);
4697 }
4698
4699 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4700 {
4701         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4702
4703         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4704         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4705         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4706         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4707         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4708         pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4709 }
4710
4711 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4712 {
4713         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4714         struct dynamic_fw_load_mgr *dynamic_loader;
4715         struct cpu_dyn_regs *dyn_regs;
4716
4717         /* fill common fields */
4718         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4719         fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4720         fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4721         fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4722         fw_loader->skip_bmc = false;
4723         fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4724         fw_loader->dram_bar_id = DRAM_BAR_ID;
4725         fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4726
4727         /* here we update initial values for few specific dynamic regs (as
4728          * before reading the first descriptor from FW those value has to be
4729          * hard-coded). in later stages of the protocol those values will be
4730          * updated automatically by reading the FW descriptor so data there
4731          * will always be up-to-date
4732          */
4733         dynamic_loader = &hdev->fw_loader.dynamic_loader;
4734         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4735         dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4736         dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4737         dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4738 }
4739
4740 static int gaudi2_init_cpu(struct hl_device *hdev)
4741 {
4742         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4743         int rc;
4744
4745         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4746                 return 0;
4747
4748         if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4749                 return 0;
4750
4751         rc = hl_fw_init_cpu(hdev);
4752         if (rc)
4753                 return rc;
4754
4755         gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4756
4757         return 0;
4758 }
4759
4760 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4761 {
4762         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4763         struct asic_fixed_properties *prop = &hdev->asic_prop;
4764         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4765         struct cpu_dyn_regs *dyn_regs;
4766         struct hl_eq *eq;
4767         u32 status;
4768         int err;
4769
4770         if (!hdev->cpu_queues_enable)
4771                 return 0;
4772
4773         if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4774                 return 0;
4775
4776         eq = &hdev->event_queue;
4777
4778         dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4779
4780         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4781         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4782
4783         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4784         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4785
4786         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4787         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4788
4789         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4790         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4791         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4792
4793         /* Used for EQ CI */
4794         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4795
4796         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4797
4798         WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4799
4800         /* Let the ARC know we are ready as it is now handling those queues  */
4801
4802         WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4803                 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4804
4805         err = hl_poll_timeout(
4806                 hdev,
4807                 mmCPU_IF_QUEUE_INIT,
4808                 status,
4809                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4810                 1000,
4811                 cpu_timeout);
4812
4813         if (err) {
4814                 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4815                 return -EIO;
4816         }
4817
4818         /* update FW application security bits */
4819         if (prop->fw_cpu_boot_dev_sts0_valid)
4820                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4821
4822         if (prop->fw_cpu_boot_dev_sts1_valid)
4823                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4824
4825         gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4826         return 0;
4827 }
4828
4829 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4830                                 u32 queue_id_base)
4831 {
4832         struct hl_hw_queue *q;
4833         u32 pq_id, pq_offset;
4834
4835         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4836                 q = &hdev->kernel_queues[queue_id_base + pq_id];
4837                 pq_offset = pq_id * 4;
4838
4839                 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4840                                 lower_32_bits(q->bus_address));
4841                 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4842                                 upper_32_bits(q->bus_address));
4843                 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4844                 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4845                 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4846         }
4847 }
4848
4849 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4850 {
4851         u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4852
4853         mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4854         mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4855         so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4856         so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4857
4858         for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4859                 cp_offset = cp_id * 4;
4860
4861                 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4862                 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4863                 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4864                 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4865         }
4866
4867         /* allow QMANs to accept work from ARC CQF */
4868         WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4869 }
4870
4871 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4872                                 u32 queue_id_base)
4873 {
4874         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4875         u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4876
4877         so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4878         so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4879
4880         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4881                 pq_offset = pq_id * 4;
4882
4883                 /* Configure QMAN HBW to scratchpad as it is not needed */
4884                 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4885                                 lower_32_bits(gaudi2->scratchpad_bus_address));
4886                 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4887                                 upper_32_bits(gaudi2->scratchpad_bus_address));
4888                 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4889                                 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4890
4891                 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4892                 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4893                 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4894                 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4895         }
4896
4897         /* Enable QMAN H/W completion */
4898         WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4899 }
4900
4901 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4902 {
4903         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4904         u32 sp_reg_addr;
4905
4906         switch (queue_id_base) {
4907         case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4908                 fallthrough;
4909         case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4910                 fallthrough;
4911         case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4912                 fallthrough;
4913         case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4914                 fallthrough;
4915         case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4916                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4917                 break;
4918         case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4919                 fallthrough;
4920         case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4921                 fallthrough;
4922         case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4923                 fallthrough;
4924         case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4925                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4926                 break;
4927         case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4928                 fallthrough;
4929         case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4930                 fallthrough;
4931         case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4932                 fallthrough;
4933         case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4934                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4935                 break;
4936         case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4937                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4938                 break;
4939         case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4940                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4941                 break;
4942         default:
4943                 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4944                 return 0;
4945         }
4946
4947         return sp_reg_addr;
4948 }
4949
4950 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4951                                         u32 queue_id_base)
4952 {
4953         u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4954         int map_table_entry;
4955
4956         WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4957
4958         irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4959         WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4960         WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4961
4962         map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4963         WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4964                 gaudi2_irq_map_table[map_table_entry].cpu_id);
4965
4966         WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4967
4968         WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4969         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4970         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4971
4972         /* Enable the QMAN channel.
4973          * PDMA QMAN configuration is different, as we do not allow user to
4974          * access some of the CPs.
4975          * PDMA0: CP2/3 are reserved for the ARC usage.
4976          * PDMA1: CP1/2/3 are reserved for the ARC usage.
4977          */
4978         if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4979                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4980         else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4981                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4982         else
4983                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4984 }
4985
4986 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4987                 u32 queue_id_base)
4988 {
4989         u32 pq_id;
4990
4991         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4992                 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4993
4994         gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4995         gaudi2_init_qman_cp(hdev, reg_base);
4996         gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4997         gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4998 }
4999
5000 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5001                                 u32 dma_core_id, bool is_secure)
5002 {
5003         u32 prot, irq_handler_offset;
5004         struct cpu_dyn_regs *dyn_regs;
5005         int map_table_entry;
5006
5007         prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5008         if (is_secure)
5009                 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5010
5011         WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5012
5013         dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5014         irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5015
5016         WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5017                         lower_32_bits(CFG_BASE + irq_handler_offset));
5018
5019         WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5020                         upper_32_bits(CFG_BASE + irq_handler_offset));
5021
5022         map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5023         WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5024                 gaudi2_irq_map_table[map_table_entry].cpu_id);
5025
5026         /* Enable the DMA channel */
5027         WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5028 }
5029
5030 static void gaudi2_init_kdma(struct hl_device *hdev)
5031 {
5032         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5033         u32 reg_base;
5034
5035         if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5036                 return;
5037
5038         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5039
5040         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5041
5042         gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5043 }
5044
5045 static void gaudi2_init_pdma(struct hl_device *hdev)
5046 {
5047         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5048         u32 reg_base;
5049
5050         if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5051                 return;
5052
5053         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5054         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5055
5056         reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5057         gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5058
5059         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5060         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5061
5062         reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5063         gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5064
5065         gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5066 }
5067
5068 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5069 {
5070         u32 reg_base, base_edma_core_id, base_edma_qman_id;
5071
5072         base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5073         base_edma_qman_id = edma_stream_base[seq];
5074
5075         reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5076         gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5077
5078         reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5079         gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5080 }
5081
5082 static void gaudi2_init_edma(struct hl_device *hdev)
5083 {
5084         struct asic_fixed_properties *prop = &hdev->asic_prop;
5085         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5086         int dcore, inst;
5087
5088         if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5089                 return;
5090
5091         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5092                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5093                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5094
5095                         if (!(prop->edma_enabled_mask & BIT(seq)))
5096                                 continue;
5097
5098                         gaudi2_init_edma_instance(hdev, seq);
5099
5100                         gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5101                 }
5102         }
5103 }
5104
5105 /*
5106  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5107  * @hdev: pointer to habanalabs device structure.
5108  * @sob_id: sync object ID.
5109  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5110  * @interrupt_id: interrupt ID.
5111  *
5112  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5113  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5114  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5115  *
5116  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5117  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5118  * completion, by decrementing the sync object value and re-arming the monitor.
5119  */
5120 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5121                                                         u32 first_mon_id, u32 interrupt_id)
5122 {
5123         u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5124         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5125         u64 addr;
5126         u8 mask;
5127
5128         /* Reset the SOB value */
5129         sob_offset = sob_id * sizeof(u32);
5130         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5131
5132         /* Configure 3 monitors:
5133          * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5134          * 2. Decrement SOB value by 1.
5135          * 3. Re-arm the master monitor.
5136          */
5137
5138         first_mon_offset = first_mon_id * sizeof(u32);
5139
5140         /* 2nd monitor: Decrement SOB value by 1 */
5141         mon_offset = first_mon_offset + sizeof(u32);
5142
5143         addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5144         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5145         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5146
5147         payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5148                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5149                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5150         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5151
5152         /* 3rd monitor: Re-arm the master monitor */
5153         mon_offset = first_mon_offset + 2 * sizeof(u32);
5154
5155         addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5156         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5157         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5158
5159         sob_group = sob_id / 8;
5160         mask = ~BIT(sob_id & 0x7);
5161         mode = 0; /* comparison mode is "greater than or equal to" */
5162         arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5163                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5164                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5165                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5166
5167         payload = arm;
5168         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5169
5170         /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5171         mon_offset = first_mon_offset;
5172
5173         config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5174         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5175
5176         addr = gaudi2->virt_msix_db_dma_addr;
5177         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5178         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5179
5180         payload = interrupt_id;
5181         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5182
5183         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5184 }
5185
5186 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5187 {
5188         u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5189         struct asic_fixed_properties *prop = &hdev->asic_prop;
5190
5191         /* Decoder normal/abnormal interrupts */
5192         for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5193                 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5194                         continue;
5195
5196                 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5197                 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5198                 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5199                 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5200
5201                 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5202                 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5203                 interrupt_id += 1;
5204                 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5205         }
5206 }
5207
5208 static void gaudi2_init_sm(struct hl_device *hdev)
5209 {
5210         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5211         u64 cq_address;
5212         u32 reg_val;
5213         int i;
5214
5215         /* Enable HBW/LBW CQ for completion monitors */
5216         reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5217         reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5218
5219         for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5220                 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5221
5222         /* Enable only HBW CQ for KDMA completion monitor */
5223         reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5224         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5225
5226         /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5227         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5228         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5229         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5230
5231         for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5232                 cq_address =
5233                         hdev->completion_queue[i].bus_address;
5234
5235                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5236                                                         lower_32_bits(cq_address));
5237                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5238                                                         upper_32_bits(cq_address));
5239                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5240                                                         ilog2(HL_CQ_SIZE_IN_BYTES));
5241         }
5242
5243         /* Configure kernel ASID and MMU BP*/
5244         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5245         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5246
5247         /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5248         gaudi2_prepare_sm_for_virt_msix_db(hdev);
5249 }
5250
5251 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5252 {
5253         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5254         u32 reg_val;
5255         int i;
5256
5257         reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5258         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5259         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5260         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5261         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5262         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5263
5264         WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5265         WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5266
5267         for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5268                 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5269                 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5270         }
5271 }
5272
5273 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5274                                                         bool config_qman_only)
5275 {
5276         u32 queue_id_base, reg_base;
5277
5278         switch (dcore_id) {
5279         case 0:
5280                 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5281                 break;
5282         case 1:
5283                 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5284                 break;
5285         case 2:
5286                 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5287                 break;
5288         case 3:
5289                 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5290                 break;
5291         default:
5292                 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5293                 return;
5294         }
5295
5296         if (!config_qman_only) {
5297                 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5298                 gaudi2_init_mme_acc(hdev, reg_base);
5299         }
5300
5301         reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5302         gaudi2_init_qman(hdev, reg_base, queue_id_base);
5303 }
5304
5305 static void gaudi2_init_mme(struct hl_device *hdev)
5306 {
5307         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5308         int i;
5309
5310         if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5311                 return;
5312
5313         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5314                 gaudi2_init_dcore_mme(hdev, i, false);
5315
5316                 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5317         }
5318 }
5319
5320 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5321 {
5322         /* Mask arithmetic and QM interrupts in TPC */
5323         WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5324
5325         /* Set 16 cache lines */
5326         WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5327                         2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5328 }
5329
5330 struct gaudi2_tpc_init_cfg_data {
5331         enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5332 };
5333
5334 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5335                                         u32 offset, struct iterate_module_ctx *ctx)
5336 {
5337         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5338         struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5339         u32 queue_id_base;
5340         u8 seq;
5341
5342         queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5343
5344         if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5345                 /* gets last sequence number */
5346                 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5347         else
5348                 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5349
5350         gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5351         gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5352
5353         gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5354 }
5355
5356 static void gaudi2_init_tpc(struct hl_device *hdev)
5357 {
5358         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5359         struct gaudi2_tpc_init_cfg_data init_cfg_data;
5360         struct iterate_module_ctx tpc_iter;
5361
5362         if (!hdev->asic_prop.tpc_enabled_mask)
5363                 return;
5364
5365         if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5366                 return;
5367
5368         init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5369         init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5370         init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5371         init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5372         tpc_iter.fn = &gaudi2_init_tpc_config;
5373         tpc_iter.data = &init_cfg_data;
5374         gaudi2_iterate_tpcs(hdev, &tpc_iter);
5375 }
5376
5377 static void gaudi2_init_rotator(struct hl_device *hdev)
5378 {
5379         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5380         u32 i, reg_base, queue_id;
5381
5382         queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5383
5384         for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5385                 reg_base = gaudi2_qm_blocks_bases[queue_id];
5386                 gaudi2_init_qman(hdev, reg_base, queue_id);
5387
5388                 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5389         }
5390 }
5391
5392 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5393 {
5394         u32 sob_id;
5395
5396         /* VCMD normal interrupt */
5397         sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5398         WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5399                         mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5400         WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5401
5402         /* VCMD abnormal interrupt */
5403         sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5404         WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5405                         mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5406         WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5407 }
5408
5409 static void gaudi2_init_dec(struct hl_device *hdev)
5410 {
5411         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5412         u32 dcore_id, dec_id, dec_bit;
5413         u64 base_addr;
5414
5415         if (!hdev->asic_prop.decoder_enabled_mask)
5416                 return;
5417
5418         if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5419                 return;
5420
5421         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5422                 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5423                         dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5424
5425                         if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5426                                 continue;
5427
5428                         base_addr =  mmDCORE0_DEC0_CMD_BASE +
5429                                         BRDG_CTRL_BLOCK_OFFSET +
5430                                         dcore_id * DCORE_OFFSET +
5431                                         dec_id * DCORE_VDEC_OFFSET;
5432
5433                         gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5434
5435                         gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5436                 }
5437
5438         for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5439                 dec_bit = PCIE_DEC_SHIFT + dec_id;
5440                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5441                         continue;
5442
5443                 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5444                                 dec_id * DCORE_VDEC_OFFSET;
5445
5446                 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5447
5448                 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5449         }
5450 }
5451
5452 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5453                                         u32 stlb_base, u32 asid, u64 phys_addr)
5454 {
5455         u32 status, timeout_usec;
5456         int rc;
5457
5458         if (hdev->pldm || !hdev->pdev)
5459                 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5460         else
5461                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5462
5463         WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5464         WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5465         WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5466         WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5467
5468         rc = hl_poll_timeout(
5469                 hdev,
5470                 stlb_base + STLB_BUSY_OFFSET,
5471                 status,
5472                 !(status & 0x80000000),
5473                 1000,
5474                 timeout_usec);
5475
5476         if (rc) {
5477                 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5478                 return rc;
5479         }
5480
5481         return 0;
5482 }
5483
5484 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5485                                         u32 start_offset, u32 inv_start_val,
5486                                         u32 flags)
5487 {
5488         /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5489         if (flags & MMU_OP_CLEAR_MEMCACHE)
5490                 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5491
5492         if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5493                 return;
5494
5495         WREG32(stlb_base + start_offset, inv_start_val);
5496 }
5497
5498 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5499                                                 struct gaudi2_cache_invld_params *inv_params)
5500 {
5501         u32 status, timeout_usec, start_offset;
5502         int rc;
5503
5504         timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5505                                         GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5506
5507         /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5508         if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5509                 rc = hl_poll_timeout(
5510                         hdev,
5511                         mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5512                         status,
5513                         status & 0x1,
5514                         1000,
5515                         timeout_usec);
5516
5517                 if (rc)
5518                         return rc;
5519
5520                 /* Need to manually reset the status to 0 */
5521                 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5522         }
5523
5524         /* Lower cache does not work with cache lines, hence we can skip its
5525          * invalidation upon map and invalidate only upon unmap
5526          */
5527         if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5528                 return 0;
5529
5530         start_offset = inv_params->range_invalidation ?
5531                         STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5532
5533         rc = hl_poll_timeout(
5534                 hdev,
5535                 stlb_base + start_offset,
5536                 status,
5537                 !(status & 0x1),
5538                 1000,
5539                 timeout_usec);
5540
5541         return rc;
5542 }
5543
5544 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5545 {
5546         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5547         u32 hw_cap;
5548
5549         hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5550
5551         if (gaudi2->hw_cap_initialized & hw_cap)
5552                 return true;
5553
5554         return false;
5555 }
5556
5557 /* this function shall be called only for HMMUs for which capability bit is set */
5558 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5559 {
5560         u32 offset;
5561
5562         offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5563         return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5564 }
5565
5566 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5567                                                 struct gaudi2_cache_invld_params *inv_params)
5568 {
5569         u32 start_offset;
5570
5571         if (inv_params->range_invalidation) {
5572                 /* Set the addresses range
5573                  * Note: that the start address we set in register, is not included in
5574                  * the range of the invalidation, by design.
5575                  * that's why we need to set lower address than the one we actually
5576                  * want to be included in the range invalidation.
5577                  */
5578                 u64 start = inv_params->start_va - 1;
5579
5580                 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5581
5582                 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5583                                 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5584
5585                 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5586                                 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5587
5588                 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5589                                 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5590
5591                 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5592                                 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5593         } else {
5594                 start_offset = STLB_INV_ALL_START_OFFSET;
5595         }
5596
5597         gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5598                                                 inv_params->inv_start_val, inv_params->flags);
5599 }
5600
5601 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5602                                                 int dcore_id, int hmmu_id,
5603                                                 struct gaudi2_cache_invld_params *inv_params)
5604 {
5605         u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5606
5607         gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5608 }
5609
5610 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5611                                                 int dcore_id, int hmmu_id,
5612                                                 struct gaudi2_cache_invld_params *inv_params)
5613 {
5614         u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5615
5616         return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5617 }
5618
5619 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5620                                                 struct gaudi2_cache_invld_params *inv_params)
5621 {
5622         int dcore_id, hmmu_id;
5623
5624         /* first send all invalidation commands */
5625         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5626                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5627                         if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5628                                 continue;
5629
5630                         gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5631                 }
5632         }
5633
5634         /* next, poll all invalidations status */
5635         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5636                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5637                         int rc;
5638
5639                         if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5640                                 continue;
5641
5642                         rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5643                                                                                 inv_params);
5644                         if (rc)
5645                                 return rc;
5646                 }
5647         }
5648
5649         return 0;
5650 }
5651
5652 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5653 {
5654         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5655         struct gaudi2_cache_invld_params invld_params;
5656         int rc = 0;
5657
5658         if (hdev->reset_info.hard_reset_pending)
5659                 return rc;
5660
5661         invld_params.range_invalidation = false;
5662         invld_params.inv_start_val = 1;
5663
5664         if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5665                 invld_params.flags = flags;
5666                 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5667                 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5668                                                                                 &invld_params);
5669         } else if (flags & MMU_OP_PHYS_PACK) {
5670                 invld_params.flags = 0;
5671                 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5672         }
5673
5674         return rc;
5675 }
5676
5677 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5678                                 u32 flags, u32 asid, u64 va, u64 size)
5679 {
5680         struct gaudi2_cache_invld_params invld_params = {0};
5681         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5682         u64 start_va, end_va;
5683         u32 inv_start_val;
5684         int rc = 0;
5685
5686         if (hdev->reset_info.hard_reset_pending)
5687                 return 0;
5688
5689         inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5690                         1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5691                         asid << MMU_RANGE_INV_ASID_SHIFT);
5692         start_va = va;
5693         end_va = start_va + size;
5694
5695         if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5696                 /* As range invalidation does not support zero address we will
5697                  * do full invalidation in this case
5698                  */
5699                 if (start_va) {
5700                         invld_params.range_invalidation = true;
5701                         invld_params.start_va = start_va;
5702                         invld_params.end_va = end_va;
5703                         invld_params.inv_start_val = inv_start_val;
5704                         invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5705                 } else {
5706                         invld_params.range_invalidation = false;
5707                         invld_params.inv_start_val = 1;
5708                         invld_params.flags = flags;
5709                 }
5710
5711
5712                 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5713                 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5714                                                                                 &invld_params);
5715                 if (rc)
5716                         return rc;
5717
5718         } else if (flags & MMU_OP_PHYS_PACK) {
5719                 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5720                 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5721                 invld_params.inv_start_val = inv_start_val;
5722                 invld_params.flags = flags;
5723                 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5724         }
5725
5726         return rc;
5727 }
5728
5729 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5730 {
5731         struct asic_fixed_properties *prop = &hdev->asic_prop;
5732         u64 hop0_addr;
5733         u32 asid, max_asid = prop->max_asid;
5734         int rc;
5735
5736         /* it takes too much time to init all of the ASIDs on palladium */
5737         if (hdev->pldm)
5738                 max_asid = min((u32) 8, max_asid);
5739
5740         for (asid = 0 ; asid < max_asid ; asid++) {
5741                 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5742                 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5743                 if (rc) {
5744                         dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5745                         return rc;
5746                 }
5747         }
5748
5749         return 0;
5750 }
5751
5752 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5753 {
5754         u32 status, timeout_usec;
5755         int rc;
5756
5757         if (hdev->pldm || !hdev->pdev)
5758                 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5759         else
5760                 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5761
5762         WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5763
5764         rc = hl_poll_timeout(
5765                 hdev,
5766                 stlb_base + STLB_SRAM_INIT_OFFSET,
5767                 status,
5768                 !status,
5769                 1000,
5770                 timeout_usec);
5771
5772         if (rc)
5773                 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5774
5775         rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5776         if (rc)
5777                 return rc;
5778
5779         WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5780
5781         rc = hl_poll_timeout(
5782                 hdev,
5783                 stlb_base + STLB_INV_ALL_START_OFFSET,
5784                 status,
5785                 !status,
5786                 1000,
5787                 timeout_usec);
5788
5789         if (rc)
5790                 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5791
5792         WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5793
5794         return rc;
5795 }
5796
5797 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5798 {
5799         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5800         u32 mmu_base, stlb_base;
5801         int rc;
5802
5803         if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5804                 return 0;
5805
5806         mmu_base = mmPMMU_HBW_MMU_BASE;
5807         stlb_base = mmPMMU_HBW_STLB_BASE;
5808
5809         RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5810                 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5811                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5812                 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5813                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5814                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5815                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5816                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5817                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5818                 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5819                 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5820
5821         WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5822
5823         if (PAGE_SIZE == SZ_64K) {
5824                 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5825                 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5826                         FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5827                         FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5828                         FIELD_PREP(
5829                                 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5830                                 1),
5831                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5832                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5833                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5834         }
5835
5836         WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5837
5838         rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5839         if (rc)
5840                 return rc;
5841
5842         gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5843
5844         return 0;
5845 }
5846
5847 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5848                                 int hmmu_id)
5849 {
5850         struct asic_fixed_properties *prop = &hdev->asic_prop;
5851         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852         u32 offset, mmu_base, stlb_base, hw_cap;
5853         u8 dmmu_seq;
5854         int rc;
5855
5856         dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5857         hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5858
5859         /*
5860          * return if DMMU is already initialized or if it's not out of
5861          * isolation (due to cluster binning)
5862          */
5863         if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5864                 return 0;
5865
5866         offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5867         mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5868         stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5869
5870         RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5871                         MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5872
5873         RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5874                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5875                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5876                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5877                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5878                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5879                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5880                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5881                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5882                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5883                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5884
5885         RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5886                         STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5887
5888         WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5889
5890         rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5891         if (rc)
5892                 return rc;
5893
5894         gaudi2->hw_cap_initialized |= hw_cap;
5895
5896         return 0;
5897 }
5898
5899 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5900 {
5901         int rc, dcore_id, hmmu_id;
5902
5903         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5904                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5905                         rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5906                         if (rc)
5907                                 return rc;
5908                 }
5909
5910         return 0;
5911 }
5912
5913 static int gaudi2_mmu_init(struct hl_device *hdev)
5914 {
5915         int rc;
5916
5917         rc = gaudi2_pci_mmu_init(hdev);
5918         if (rc)
5919                 return rc;
5920
5921         rc = gaudi2_hbm_mmu_init(hdev);
5922         if (rc)
5923                 return rc;
5924
5925         return 0;
5926 }
5927
5928 static int gaudi2_hw_init(struct hl_device *hdev)
5929 {
5930         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5931         int rc;
5932
5933         /* Let's mark in the H/W that we have reached this point. We check
5934          * this value in the reset_before_init function to understand whether
5935          * we need to reset the chip before doing H/W init. This register is
5936          * cleared by the H/W upon H/W reset
5937          */
5938         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5939
5940         /* Perform read from the device to make sure device is up */
5941         RREG32(mmHW_STATE);
5942
5943         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5944          * So we set it here and if anyone tries to move it later to
5945          * a different address, there will be an error
5946          */
5947         if (hdev->asic_prop.iatu_done_by_fw)
5948                 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5949
5950         /*
5951          * Before pushing u-boot/linux to device, need to set the hbm bar to
5952          * base address of dram
5953          */
5954         if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5955                 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5956                 return -EIO;
5957         }
5958
5959         rc = gaudi2_init_cpu(hdev);
5960         if (rc) {
5961                 dev_err(hdev->dev, "failed to initialize CPU\n");
5962                 return rc;
5963         }
5964
5965         gaudi2_init_scrambler_hbm(hdev);
5966         gaudi2_init_kdma(hdev);
5967
5968         rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5969         if (rc) {
5970                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5971                 return rc;
5972         }
5973
5974         rc = gaudi2->cpucp_info_get(hdev);
5975         if (rc) {
5976                 dev_err(hdev->dev, "Failed to get cpucp info\n");
5977                 return rc;
5978         }
5979
5980         rc = gaudi2_mmu_init(hdev);
5981         if (rc)
5982                 return rc;
5983
5984         gaudi2_init_pdma(hdev);
5985         gaudi2_init_edma(hdev);
5986         gaudi2_init_sm(hdev);
5987         gaudi2_init_tpc(hdev);
5988         gaudi2_init_mme(hdev);
5989         gaudi2_init_rotator(hdev);
5990         gaudi2_init_dec(hdev);
5991         gaudi2_enable_timestamp(hdev);
5992
5993         rc = gaudi2_coresight_init(hdev);
5994         if (rc)
5995                 goto disable_queues;
5996
5997         rc = gaudi2_enable_msix(hdev);
5998         if (rc)
5999                 goto disable_queues;
6000
6001         /* Perform read from the device to flush all configuration */
6002         RREG32(mmHW_STATE);
6003
6004         return 0;
6005
6006 disable_queues:
6007         gaudi2_disable_dma_qmans(hdev);
6008         gaudi2_disable_mme_qmans(hdev);
6009         gaudi2_disable_tpc_qmans(hdev);
6010         gaudi2_disable_rot_qmans(hdev);
6011         gaudi2_disable_nic_qmans(hdev);
6012
6013         gaudi2_disable_timestamp(hdev);
6014
6015         return rc;
6016 }
6017
6018 /**
6019  * gaudi2_send_hard_reset_cmd - common function to handle reset
6020  *
6021  * @hdev: pointer to the habanalabs device structure
6022  *
6023  * This function handles the various possible scenarios for reset.
6024  * It considers if reset is handled by driver\FW and what FW components are loaded
6025  */
6026 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6027 {
6028         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6029         bool heartbeat_reset, preboot_only, cpu_initialized = false;
6030         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6031         u32 cpu_boot_status;
6032
6033         preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6034         heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6035
6036         /*
6037          * Handle corner case where failure was at cpu management app load,
6038          * and driver didn't detect any failure while loading the FW,
6039          * then at such scenario driver will send only HALT_MACHINE
6040          * and no one will respond to this request since FW already back to preboot
6041          * and it cannot handle such cmd.
6042          * In this case next time the management app loads it'll check on events register
6043          * which will still have the halt indication, and will reboot the device.
6044          * The solution is to let preboot clear all relevant registers before next boot
6045          * once driver send COMMS_RST_DEV.
6046          */
6047         cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6048
6049         if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6050                         (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6051                 cpu_initialized = true;
6052
6053         /*
6054          * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6055          * 1. FW reset: FW initiate the reset sequence
6056          * 2. driver reset: FW will start HALT sequence (the preparations for the
6057          *                  reset but not the reset itself as it is not implemented
6058          *                  on their part) and LKD will wait to let FW complete the
6059          *                  sequence before issuing the reset
6060          */
6061         if (!preboot_only && cpu_initialized) {
6062                 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6063                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6064
6065                 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6066         }
6067
6068         /*
6069          * When working with preboot (without Linux/Boot fit) we can
6070          * communicate only using the COMMS commands to issue halt/reset.
6071          *
6072          * For the case in which we are working with Linux/Bootfit this is a hail-mary
6073          * attempt to revive the card in the small chance that the f/w has
6074          * experienced a watchdog event, which caused it to return back to preboot.
6075          * In that case, triggering reset through GIC won't help. We need to
6076          * trigger the reset as if Linux wasn't loaded.
6077          *
6078          * We do it only if the reset cause was HB, because that would be the
6079          * indication of such an event.
6080          *
6081          * In case watchdog hasn't expired but we still got HB, then this won't
6082          * do any damage.
6083          */
6084
6085         if (heartbeat_reset || preboot_only || !cpu_initialized) {
6086                 if (hdev->asic_prop.hard_reset_done_by_fw)
6087                         hl_fw_ask_hard_reset_without_linux(hdev);
6088                 else
6089                         hl_fw_ask_halt_machine_without_linux(hdev);
6090         }
6091 }
6092
6093 /**
6094  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6095  *
6096  * @hdev: pointer to the habanalabs device structure
6097  *
6098  * This function executes hard reset based on if driver/FW should do the reset
6099  */
6100 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6101 {
6102         if (hdev->asic_prop.hard_reset_done_by_fw) {
6103                 gaudi2_send_hard_reset_cmd(hdev);
6104                 return;
6105         }
6106
6107         /* Set device to handle FLR by H/W as we will put the device
6108          * CPU to halt mode
6109          */
6110         WREG32(mmPCIE_AUX_FLR_CTRL,
6111                         (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6112
6113         gaudi2_send_hard_reset_cmd(hdev);
6114
6115         WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6116 }
6117
6118 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6119 {
6120         int i, rc = 0;
6121         u32 reg_val;
6122
6123         for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6124                 rc = hl_poll_timeout(
6125                         hdev,
6126                         mmCPU_RST_STATUS_TO_HOST,
6127                         reg_val,
6128                         reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6129                         1000,
6130                         poll_timeout_us);
6131
6132         if (rc)
6133                 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6134                                 reg_val);
6135         return rc;
6136 }
6137
6138 /**
6139  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6140  *
6141  * @hdev: pointer to the habanalabs device structure
6142  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6143  * @poll_timeout_us: time to wait for response from f/w.
6144  *
6145  * This function executes soft reset based on if driver/FW should do the reset
6146  */
6147 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6148                                                 u32 poll_timeout_us)
6149 {
6150         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6151
6152         if (!driver_performs_reset) {
6153                 /* set SP to indicate reset request sent to FW */
6154                 if (dyn_regs->cpu_rst_status)
6155                         WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6156                 else
6157                         WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6158
6159                 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6160                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6161
6162                 return gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6163         }
6164
6165         /* Block access to engines, QMANs and SM during reset, these
6166          * RRs will be reconfigured after soft reset.
6167          * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6168          */
6169         gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6170                                         mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6171
6172         gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6173                                 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6174                                 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6175
6176         WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6177         return 0;
6178 }
6179
6180 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6181 {
6182         int i, rc = 0;
6183         u32 reg_val;
6184
6185         /* We poll the BTM done indication multiple times after reset due to
6186          * a HW errata 'GAUDI2_0300'
6187          */
6188         for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6189                 rc = hl_poll_timeout(
6190                         hdev,
6191                         mmPSOC_GLOBAL_CONF_BTM_FSM,
6192                         reg_val,
6193                         reg_val == 0,
6194                         1000,
6195                         poll_timeout_us);
6196
6197         if (rc)
6198                 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6199 }
6200
6201 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6202 {
6203         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6204         u32 poll_timeout_us, reset_sleep_ms;
6205         bool driver_performs_reset = false;
6206         int rc;
6207
6208         if (hdev->pldm) {
6209                 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6210                                                 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6211                 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6212         } else {
6213                 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6214                 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6215         }
6216
6217         if (fw_reset)
6218                 goto skip_reset;
6219
6220         gaudi2_reset_arcs(hdev);
6221
6222         if (hard_reset) {
6223                 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6224                 gaudi2_execute_hard_reset(hdev);
6225         } else {
6226                 /*
6227                  * As we have to support also work with preboot only (which does not supports
6228                  * soft reset) we have to make sure that security is disabled before letting driver
6229                  * do the reset. user shall control the BFE flags to avoid asking soft reset in
6230                  * secured device with preboot only.
6231                  */
6232                 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6233                                                         !hdev->asic_prop.fw_security_enabled);
6234                 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6235                 if (rc)
6236                         return rc;
6237         }
6238
6239 skip_reset:
6240         if (driver_performs_reset || hard_reset) {
6241                 /*
6242                  * Instead of waiting for BTM indication we should wait for preboot ready:
6243                  * Consider the below scenario:
6244                  * 1. FW update is being triggered
6245                  *        - setting the dirty bit
6246                  * 2. hard reset will be triggered due to the dirty bit
6247                  * 3. FW initiates the reset:
6248                  *        - dirty bit cleared
6249                  *        - BTM indication cleared
6250                  *        - preboot ready indication cleared
6251                  * 4. during hard reset:
6252                  *        - BTM indication will be set
6253                  *        - BIST test performed and another reset triggered
6254                  * 5. only after this reset the preboot will set the preboot ready
6255                  *
6256                  * when polling on BTM indication alone we can lose sync with FW while trying to
6257                  * communicate with FW that is during reset.
6258                  * to overcome this we will always wait to preboot ready indication
6259                  */
6260
6261                 /* without this sleep reset will not work */
6262                 msleep(reset_sleep_ms);
6263
6264                 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6265                         hl_fw_wait_preboot_ready(hdev);
6266                 else
6267                         gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6268         }
6269
6270         if (!gaudi2)
6271                 return 0;
6272
6273         gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6274         gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6275
6276         /*
6277          * Clear NIC capability mask in order for driver to re-configure
6278          * NIC QMANs. NIC ports will not be re-configured during soft
6279          * reset as we call gaudi2_nic_init only during hard reset
6280          */
6281         gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6282
6283         if (hard_reset) {
6284                 gaudi2->hw_cap_initialized &=
6285                         ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6286                         HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6287                         HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6288                         HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6289                         HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6290
6291                 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6292         } else {
6293                 gaudi2->hw_cap_initialized &=
6294                         ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6295                         HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6296                         HW_CAP_ROT_MASK);
6297         }
6298         return 0;
6299 }
6300
6301 static int gaudi2_suspend(struct hl_device *hdev)
6302 {
6303         int rc;
6304
6305         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6306         if (rc)
6307                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6308
6309         return rc;
6310 }
6311
6312 static int gaudi2_resume(struct hl_device *hdev)
6313 {
6314         return gaudi2_init_iatu(hdev);
6315 }
6316
6317 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6318                 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6319 {
6320         int rc;
6321
6322         vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6323                         VM_DONTCOPY | VM_NORESERVE);
6324
6325 #ifdef _HAS_DMA_MMAP_COHERENT
6326
6327         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6328         if (rc)
6329                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6330
6331 #else
6332
6333         rc = remap_pfn_range(vma, vma->vm_start,
6334                                 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6335                                 size, vma->vm_page_prot);
6336         if (rc)
6337                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6338
6339 #endif
6340
6341         return rc;
6342 }
6343
6344 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6345 {
6346         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6347         u64 hw_cap_mask = 0;
6348         u64 hw_tpc_cap_bit = 0;
6349         u64 hw_nic_cap_bit = 0;
6350         u64 hw_test_cap_bit = 0;
6351
6352         switch (hw_queue_id) {
6353         case GAUDI2_QUEUE_ID_PDMA_0_0:
6354         case GAUDI2_QUEUE_ID_PDMA_0_1:
6355         case GAUDI2_QUEUE_ID_PDMA_1_0:
6356                 hw_cap_mask = HW_CAP_PDMA_MASK;
6357                 break;
6358         case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6359                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6360                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6361                 break;
6362         case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6363                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6364                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6365                 break;
6366         case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6367                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6368                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6369                 break;
6370         case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6371                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6372                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6373                 break;
6374
6375         case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6376                 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6377                 break;
6378
6379         case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6380                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6381                 break;
6382
6383         case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6384                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6385                 break;
6386
6387         case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6388                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6389                 break;
6390
6391         case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6392                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6393                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6394
6395                 /* special case where cap bit refers to the first queue id */
6396                 if (!hw_tpc_cap_bit)
6397                         return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6398                 break;
6399
6400         case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6401                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6402                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6403                 break;
6404
6405         case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6406                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6407                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6408                 break;
6409
6410         case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6411                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6412                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6413                 break;
6414
6415         case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6416                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6417                 break;
6418
6419         case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6420                 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6421                 break;
6422
6423         case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6424                 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6425
6426                 /* special case where cap bit refers to the first queue id */
6427                 if (!hw_nic_cap_bit)
6428                         return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6429                 break;
6430
6431         case GAUDI2_QUEUE_ID_CPU_PQ:
6432                 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6433
6434         default:
6435                 return false;
6436         }
6437
6438         if (hw_tpc_cap_bit)
6439                 return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6440
6441         if (hw_nic_cap_bit)
6442                 return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6443
6444         if (hw_test_cap_bit)
6445                 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6446
6447         return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6448 }
6449
6450 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6451 {
6452         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6453
6454         switch (arc_id) {
6455         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6456         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6457                 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6458
6459         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6460                 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6461
6462         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6463                 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6464
6465         default:
6466                 return false;
6467         }
6468 }
6469
6470 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6471 {
6472         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6473
6474         switch (arc_id) {
6475         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6476         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6477                 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6478                 break;
6479
6480         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6481                 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6482                 break;
6483
6484         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6485                 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6486                 break;
6487
6488         default:
6489                 return;
6490         }
6491 }
6492
6493 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6494 {
6495         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6496
6497         switch (arc_id) {
6498         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6499         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6500                 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6501                 break;
6502
6503         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6504                 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6505                 break;
6506
6507         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6508                 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6509                 break;
6510
6511         default:
6512                 return;
6513         }
6514 }
6515
6516 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6517 {
6518         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6519         u32 pq_offset, reg_base, db_reg_offset, db_value;
6520
6521         if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6522                 /*
6523                  * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6524                  * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6525                  * number.
6526                  */
6527                 pq_offset = (hw_queue_id & 0x3) * 4;
6528                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6529                 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6530         } else {
6531                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6532         }
6533
6534         db_value = pi;
6535
6536         /* ring the doorbell */
6537         WREG32(db_reg_offset, db_value);
6538
6539         if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6540                 /* make sure device CPU will read latest data from host */
6541                 mb();
6542                 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6543                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6544         }
6545 }
6546
6547 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6548 {
6549         __le64 *pbd = (__le64 *) bd;
6550
6551         /* The QMANs are on the host memory so a simple copy suffice */
6552         pqe[0] = pbd[0];
6553         pqe[1] = pbd[1];
6554 }
6555
6556 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6557                                 dma_addr_t *dma_handle, gfp_t flags)
6558 {
6559         return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6560 }
6561
6562 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6563                                 void *cpu_addr, dma_addr_t dma_handle)
6564 {
6565         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6566 }
6567
6568 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6569                                 u32 timeout, u64 *result)
6570 {
6571         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6572
6573         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6574                 if (result)
6575                         *result = 0;
6576                 return 0;
6577         }
6578
6579         if (!timeout)
6580                 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6581
6582         return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6583 }
6584
6585 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6586                                 gfp_t mem_flags, dma_addr_t *dma_handle)
6587 {
6588         if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6589                 return NULL;
6590
6591         return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6592 }
6593
6594 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6595 {
6596         dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6597 }
6598
6599 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6600                                                 dma_addr_t *dma_handle)
6601 {
6602         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6603 }
6604
6605 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6606 {
6607         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6608 }
6609
6610 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6611                                         enum dma_data_direction dir)
6612 {
6613         dma_addr_t dma_addr;
6614
6615         dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6616         if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6617                 return 0;
6618
6619         return dma_addr;
6620 }
6621
6622 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6623                                         enum dma_data_direction dir)
6624 {
6625         dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6626 }
6627
6628 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6629 {
6630         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6631         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6632
6633         if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6634                 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6635                 return -EINVAL;
6636         }
6637
6638         /* Just check if CB address is valid */
6639
6640         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6641                                         parser->user_cb_size,
6642                                         asic_prop->sram_user_base_address,
6643                                         asic_prop->sram_end_address))
6644                 return 0;
6645
6646         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6647                                         parser->user_cb_size,
6648                                         asic_prop->dram_user_base_address,
6649                                         asic_prop->dram_end_address))
6650                 return 0;
6651
6652         if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6653                 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6654                                                 parser->user_cb_size,
6655                                                 asic_prop->dmmu.start_addr,
6656                                                 asic_prop->dmmu.end_addr))
6657                 return 0;
6658
6659         if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6660                 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6661                                         parser->user_cb_size,
6662                                         asic_prop->pmmu.start_addr,
6663                                         asic_prop->pmmu.end_addr) ||
6664                         hl_mem_area_inside_range(
6665                                         (u64) (uintptr_t) parser->user_cb,
6666                                         parser->user_cb_size,
6667                                         asic_prop->pmmu_huge.start_addr,
6668                                         asic_prop->pmmu_huge.end_addr))
6669                         return 0;
6670
6671         } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6672                 if (!hdev->pdev)
6673                         return 0;
6674
6675                 if (!device_iommu_mapped(&hdev->pdev->dev))
6676                         return 0;
6677         }
6678
6679         dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6680                 parser->user_cb, parser->user_cb_size);
6681
6682         return -EFAULT;
6683 }
6684
6685 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6686 {
6687         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6688
6689         if (!parser->is_kernel_allocated_cb)
6690                 return gaudi2_validate_cb_address(hdev, parser);
6691
6692         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6693                 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6694                 return -EINVAL;
6695         }
6696
6697         return 0;
6698 }
6699
6700 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6701 {
6702         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6703
6704         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6705                 return 0;
6706
6707         return hl_fw_send_heartbeat(hdev);
6708 }
6709
6710 /* This is an internal helper function, used to update the KDMA mmu props.
6711  * Should be called with a proper kdma lock.
6712  */
6713 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6714                                            bool mmu_bypass, u32 asid)
6715 {
6716         u32 rw_asid, rw_mmu_bp;
6717
6718         rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6719                       (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6720
6721         rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6722                         (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6723
6724         WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6725         WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6726 }
6727
6728 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6729                                                 u32 mon_payload, u32 sync_value)
6730 {
6731         u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6732         u8 mask;
6733
6734         sob_offset = sob_id * 4;
6735         mon_offset = mon_id * 4;
6736
6737         /* Reset the SOB value */
6738         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6739
6740         /* Configure this address with CQ_ID 0 because CQ_EN is set */
6741         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6742
6743         /* Configure this address with CS index because CQ_EN is set */
6744         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6745
6746         sync_group_id = sob_id / 8;
6747         mask = ~(1 << (sob_id & 0x7));
6748         mode = 1; /* comparison mode is "equal to" */
6749
6750         mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6751         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6752         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6753         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6754         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6755 }
6756
6757 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6758 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6759                                         u64 src_addr, u64 dst_addr,
6760                                         u32 size, bool is_memset)
6761 {
6762         u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6763         struct hl_cq_entry *cq_base;
6764         struct hl_cq *cq;
6765         u64 comp_addr;
6766         int rc;
6767
6768         gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6769                                 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6770                                 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6771
6772         comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6773                         (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6774
6775         comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6776                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6777
6778         WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6779         WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6780         WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6781         WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6782         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6783         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6784         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6785         WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6786
6787         commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6788                                 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6789
6790         if (is_memset)
6791                 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6792
6793         WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6794
6795         /* Wait for completion */
6796         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6797         cq_base = cq->kernel_address;
6798         polling_addr = (u32 *)&cq_base[cq->ci];
6799
6800         if (hdev->pldm)
6801                 /* for each 1MB 20 second of timeout */
6802                 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6803         else
6804                 timeout = KDMA_TIMEOUT_USEC;
6805
6806         /* Polling */
6807         rc = hl_poll_timeout_memory(
6808                         hdev,
6809                         polling_addr,
6810                         status,
6811                         (status == 1),
6812                         1000,
6813                         timeout,
6814                         true);
6815
6816         *polling_addr = 0;
6817
6818         if (rc) {
6819                 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6820                 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6821                 return rc;
6822         }
6823
6824         cq->ci = hl_cq_inc_ptr(cq->ci);
6825
6826         return 0;
6827 }
6828
6829 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6830 {
6831         u32 i;
6832
6833         for (i = 0 ; i < size ; i += sizeof(u32))
6834                 WREG32(addr + i, val);
6835 }
6836
6837 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6838 {
6839         u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6840
6841         if (enable) {
6842                 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6843                 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6844         } else {
6845                 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6846                 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6847         }
6848 }
6849
6850 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6851 {
6852         return hdev->asic_prop.first_available_user_sob[0] +
6853                                 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6854 }
6855
6856 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6857 {
6858         u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6859         u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6860
6861         /* Reset the SOB value */
6862         WREG32(sob_addr, 0);
6863 }
6864
6865 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6866                                             struct gaudi2_queues_test_info *msg_info)
6867 {
6868         u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6869         u32 tmp, sob_base = 1;
6870         struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6871         size_t pkt_size = sizeof(struct packet_msg_short);
6872         int rc;
6873
6874         tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6875                 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6876                 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6877                 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6878                 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6879
6880         msg_short_pkt->value = cpu_to_le32(sob_val);
6881         msg_short_pkt->ctl = cpu_to_le32(tmp);
6882
6883         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6884         if (rc)
6885                 dev_err(hdev->dev,
6886                         "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6887
6888         return rc;
6889 }
6890
6891 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6892 {
6893         u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6894         u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6895         u32 timeout_usec, tmp;
6896         int rc;
6897
6898         if (hdev->pldm)
6899                 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6900         else
6901                 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6902
6903         rc = hl_poll_timeout(
6904                         hdev,
6905                         sob_addr,
6906                         tmp,
6907                         (tmp == sob_val),
6908                         1000,
6909                         timeout_usec);
6910
6911         if (rc == -ETIMEDOUT) {
6912                 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6913                         hw_queue_id, tmp);
6914                 rc = -EIO;
6915         }
6916
6917         return rc;
6918 }
6919
6920 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6921 {
6922         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6923
6924         /*
6925          * check capability here as send_cpu_message() won't update the result
6926          * value if no capability
6927          */
6928         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6929                 return 0;
6930
6931         return hl_fw_test_cpu_queue(hdev);
6932 }
6933
6934 static int gaudi2_test_queues(struct hl_device *hdev)
6935 {
6936         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6937         struct gaudi2_queues_test_info *msg_info;
6938         u32 sob_val = 0x5a5a;
6939         int i, rc;
6940
6941         /* send test message on all enabled Qs */
6942         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6943                 if (!gaudi2_is_queue_enabled(hdev, i))
6944                         continue;
6945
6946                 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6947                 gaudi2_qman_set_test_mode(hdev, i, true);
6948                 gaudi2_test_queue_clear(hdev, i);
6949                 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6950                 if (rc)
6951                         goto done;
6952         }
6953
6954         rc = gaudi2_test_cpu_queue(hdev);
6955         if (rc)
6956                 goto done;
6957
6958         /* verify that all messages were processed */
6959         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6960                 if (!gaudi2_is_queue_enabled(hdev, i))
6961                         continue;
6962
6963                 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6964                 if (rc)
6965                         /* chip is not usable, no need for cleanups, just bail-out with error */
6966                         goto done;
6967
6968                 gaudi2_test_queue_clear(hdev, i);
6969                 gaudi2_qman_set_test_mode(hdev, i, false);
6970         }
6971
6972 done:
6973         return rc;
6974 }
6975
6976 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6977 {
6978         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6979         size_t irq_arr_size;
6980         int rc;
6981
6982         gaudi2_init_arcs(hdev);
6983
6984         rc = gaudi2_scrub_arcs_dccm(hdev);
6985         if (rc) {
6986                 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
6987                 return rc;
6988         }
6989
6990         gaudi2_init_security(hdev);
6991
6992         /* Unmask all IRQs since some could have been received during the soft reset */
6993         irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6994         return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6995 }
6996
6997 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6998                 struct engines_data *e)
6999 {
7000         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7001         struct asic_fixed_properties *prop = &hdev->asic_prop;
7002         unsigned long *mask = (unsigned long *) mask_arr;
7003         const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7004         bool is_idle = true, is_eng_idle;
7005         int engine_idx, i, j;
7006         u64 offset;
7007
7008         if (e)
7009                 hl_engine_data_sprintf(e,
7010                         "\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7011                         "----  ----  -------  ------------  -------------  -------------\n");
7012
7013         for (i = 0; i < NUM_OF_DCORES; i++) {
7014                 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7015                         int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7016
7017                         if (!(prop->edma_enabled_mask & BIT(seq)))
7018                                 continue;
7019
7020                         engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7021                                         i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7022                         offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7023
7024                         dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7025                         dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7026
7027                         qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7028                         qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7029                         qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7030
7031                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7032                                         IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7033                         is_idle &= is_eng_idle;
7034
7035                         if (mask && !is_eng_idle)
7036                                 set_bit(engine_idx, mask);
7037
7038                         if (e)
7039                                 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7040                                                         qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7041                 }
7042         }
7043
7044         return is_idle;
7045 }
7046
7047 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7048                 struct engines_data *e)
7049 {
7050         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7051         unsigned long *mask = (unsigned long *) mask_arr;
7052         const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7053         bool is_idle = true, is_eng_idle;
7054         int engine_idx, i;
7055         u64 offset;
7056
7057         if (e)
7058                 hl_engine_data_sprintf(e,
7059                                         "\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7060                                         "----  -------  ------------  -------------  -------------\n");
7061
7062         for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7063                 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7064                 offset = i * PDMA_OFFSET;
7065                 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7066                 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7067
7068                 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7069                 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7070                 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7071
7072                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7073                                 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7074                 is_idle &= is_eng_idle;
7075
7076                 if (mask && !is_eng_idle)
7077                         set_bit(engine_idx, mask);
7078
7079                 if (e)
7080                         hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7081                                                 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7082         }
7083
7084         return is_idle;
7085 }
7086
7087 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7088                 struct engines_data *e)
7089 {
7090         unsigned long *mask = (unsigned long *) mask_arr;
7091         const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7092         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7093         bool is_idle = true, is_eng_idle;
7094         int engine_idx, i;
7095         u64 offset = 0;
7096
7097         /* NIC, twelve macros in Full chip */
7098         if (e && hdev->nic_ports_mask)
7099                 hl_engine_data_sprintf(e,
7100                                         "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7101                                         "---  -------  ------------  ----------\n");
7102
7103         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7104                 if (!(i & 1))
7105                         offset = i / 2 * NIC_OFFSET;
7106                 else
7107                         offset += NIC_QM_OFFSET;
7108
7109                 if (!(hdev->nic_ports_mask & BIT(i)))
7110                         continue;
7111
7112                 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7113
7114
7115                 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7116                 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7117                 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7118
7119                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7120                 is_idle &= is_eng_idle;
7121
7122                 if (mask && !is_eng_idle)
7123                         set_bit(engine_idx, mask);
7124
7125                 if (e)
7126                         hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7127                                                 qm_glbl_sts0, qm_cgm_sts);
7128         }
7129
7130         return is_idle;
7131 }
7132
7133 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7134                 struct engines_data *e)
7135 {
7136         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7137         unsigned long *mask = (unsigned long *) mask_arr;
7138         const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7139         bool is_idle = true, is_eng_idle;
7140         int engine_idx, i;
7141         u64 offset;
7142
7143         if (e)
7144                 hl_engine_data_sprintf(e,
7145                                         "\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7146                                         "---  ----  -------  ------------  ---------------\n");
7147         /* MME, one per Dcore */
7148         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7149                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7150                 offset = i * DCORE_OFFSET;
7151
7152                 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7153                 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7154                 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7155
7156                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7157                 is_idle &= is_eng_idle;
7158
7159                 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7160                 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7161                 is_idle &= is_eng_idle;
7162
7163                 if (e)
7164                         hl_engine_data_sprintf(e, mme_fmt, i, "N",
7165                                 is_eng_idle ? "Y" : "N",
7166                                 qm_glbl_sts0,
7167                                 mme_arch_sts);
7168
7169                 if (mask && !is_eng_idle)
7170                         set_bit(engine_idx, mask);
7171         }
7172
7173         return is_idle;
7174 }
7175
7176 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7177                                         struct iterate_module_ctx *ctx)
7178 {
7179         struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7180         u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7181         bool is_eng_idle;
7182         int engine_idx;
7183
7184         if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7185                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7186         else
7187                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7188                                 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7189
7190         tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7191         qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7192         qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7193         qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7194
7195         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7196                                                 IS_TPC_IDLE(tpc_cfg_sts);
7197         *(idle_data->is_idle) &= is_eng_idle;
7198
7199         if (idle_data->mask && !is_eng_idle)
7200                 set_bit(engine_idx, idle_data->mask);
7201
7202         if (idle_data->e)
7203                 hl_engine_data_sprintf(idle_data->e,
7204                                         idle_data->tpc_fmt, dcore, inst,
7205                                         is_eng_idle ? "Y" : "N",
7206                                         qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7207 }
7208
7209 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7210                 struct engines_data *e)
7211 {
7212         struct asic_fixed_properties *prop = &hdev->asic_prop;
7213         unsigned long *mask = (unsigned long *) mask_arr;
7214         bool is_idle = true;
7215
7216         struct gaudi2_tpc_idle_data tpc_idle_data = {
7217                 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7218                 .e = e,
7219                 .mask = mask,
7220                 .is_idle = &is_idle,
7221         };
7222         struct iterate_module_ctx tpc_iter = {
7223                 .fn = &gaudi2_is_tpc_engine_idle,
7224                 .data = &tpc_idle_data,
7225         };
7226
7227         if (e && prop->tpc_enabled_mask)
7228                 hl_engine_data_sprintf(e,
7229                         "\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7230                         "----  ---  -------  ------------  ----------  ------\n");
7231
7232         gaudi2_iterate_tpcs(hdev, &tpc_iter);
7233
7234         return tpc_idle_data.is_idle;
7235 }
7236
7237 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7238                 struct engines_data *e)
7239 {
7240         struct asic_fixed_properties *prop = &hdev->asic_prop;
7241         unsigned long *mask = (unsigned long *) mask_arr;
7242         const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7243         const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7244         bool is_idle = true, is_eng_idle;
7245         u32 dec_swreg15, dec_enabled_bit;
7246         int engine_idx, i, j;
7247         u64 offset;
7248
7249         /* Decoders, two each Dcore and two shared PCIe decoders */
7250         if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7251                 hl_engine_data_sprintf(e,
7252                         "\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7253                         "----  ---  -------  ---------------\n");
7254
7255         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7256                 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7257                         dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7258                         if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7259                                 continue;
7260
7261                         engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7262                                         i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7263                         offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7264
7265                         dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7266                         is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7267                         is_idle &= is_eng_idle;
7268
7269                         if (mask && !is_eng_idle)
7270                                 set_bit(engine_idx, mask);
7271
7272                         if (e)
7273                                 hl_engine_data_sprintf(e, dec_fmt, i, j,
7274                                                         is_eng_idle ? "Y" : "N", dec_swreg15);
7275                 }
7276         }
7277
7278         if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7279                 hl_engine_data_sprintf(e,
7280                         "\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7281                         "--------  -------  ---------------\n");
7282
7283         /* Check shared(PCIe) decoders */
7284         for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7285                 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7286                 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7287                         continue;
7288
7289                 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7290                 offset = i * DCORE_DEC_OFFSET;
7291                 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7292                 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7293                 is_idle &= is_eng_idle;
7294
7295                 if (mask && !is_eng_idle)
7296                         set_bit(engine_idx, mask);
7297
7298                 if (e)
7299                         hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7300                                                 is_eng_idle ? "Y" : "N", dec_swreg15);
7301         }
7302
7303         return is_idle;
7304 }
7305
7306 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7307                 struct engines_data *e)
7308 {
7309         const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7310         unsigned long *mask = (unsigned long *) mask_arr;
7311         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7312         bool is_idle = true, is_eng_idle;
7313         int engine_idx, i;
7314         u64 offset;
7315
7316         if (e)
7317                 hl_engine_data_sprintf(e,
7318                         "\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7319                         "----  ---  -------  ------------  ------------  ----------\n");
7320
7321         for (i = 0 ; i < NUM_OF_ROT ; i++) {
7322                 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7323
7324                 offset = i * ROT_OFFSET;
7325
7326                 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7327                 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7328                 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7329
7330                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7331                 is_idle &= is_eng_idle;
7332
7333                 if (mask && !is_eng_idle)
7334                         set_bit(engine_idx, mask);
7335
7336                 if (e)
7337                         hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7338                                                 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7339         }
7340
7341         return is_idle;
7342 }
7343
7344 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7345                                         struct engines_data *e)
7346 {
7347         bool is_idle = true;
7348
7349         is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7350         is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7351         is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7352         is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7353         is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7354         is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7355         is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7356
7357         return is_idle;
7358 }
7359
7360 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7361         __acquires(&gaudi2->hw_queues_lock)
7362 {
7363         struct gaudi2_device *gaudi2 = hdev->asic_specific;
7364
7365         spin_lock(&gaudi2->hw_queues_lock);
7366 }
7367
7368 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7369         __releases(&gaudi2->hw_queues_lock)
7370 {
7371         struct gaudi2_device *gaudi2 = hdev->asic_specific;
7372
7373         spin_unlock(&gaudi2->hw_queues_lock);
7374 }
7375
7376 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7377 {
7378         return hdev->pdev->device;
7379 }
7380
7381 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7382 {
7383         struct gaudi2_device *gaudi2 = hdev->asic_specific;
7384
7385         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7386                 return 0;
7387
7388         return hl_fw_get_eeprom_data(hdev, data, max_size);
7389 }
7390
7391 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7392 {
7393         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7394 }
7395
7396 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7397 {
7398         struct gaudi2_device *gaudi2 = hdev->asic_specific;
7399
7400         if (aggregate) {
7401                 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7402                 return gaudi2->events_stat_aggregate;
7403         }
7404
7405         *size = (u32) sizeof(gaudi2->events_stat);
7406         return gaudi2->events_stat;
7407 }
7408
7409 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7410                                 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7411 {
7412         u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7413                         dcore_vdec_id + DCORE_OFFSET * dcore_id;
7414
7415         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7416         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7417
7418         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7419         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7420
7421         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7422         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7423
7424         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7425         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7426
7427         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7428         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7429 }
7430
7431 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7432 {
7433         u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7434                         (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7435         struct asic_fixed_properties *prop = &hdev->asic_prop;
7436         u32 dcore_offset = dcore_id * DCORE_OFFSET;
7437         u32 vdec_id, i, ports_offset, reg_val;
7438         u8 edma_seq_base;
7439
7440         /* EDMA */
7441         edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7442         if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7443                 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7444                 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7445                 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7446                 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7447         }
7448
7449         if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7450                 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7451                 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7452                 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7453                 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7454         }
7455
7456         /* Sync Mngr */
7457         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7458         /*
7459          * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7460          * for any access type
7461          */
7462         if (dcore_id > 0) {
7463                 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7464                           (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7465                 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7466                 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7467         }
7468
7469         WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7470         WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7471
7472         for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7473                 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7474                 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7475                                 dcore_offset + ports_offset, 0);
7476                 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7477                                 dcore_offset + ports_offset, rw_asid);
7478         }
7479
7480         for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7481                 ports_offset = i * DCORE_MME_WB_OFFSET;
7482                 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7483                                 dcore_offset + ports_offset, 0);
7484                 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7485                                 dcore_offset + ports_offset, rw_asid);
7486         }
7487
7488         WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7489         WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7490
7491         /*
7492          * Decoders
7493          */
7494         for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7495                 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7496                         gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7497         }
7498 }
7499
7500 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7501                                 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7502 {
7503         u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7504
7505         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7506         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7507
7508         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7509         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7510
7511         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7512         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7513
7514         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7515         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7516
7517         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7518         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7519 }
7520
7521 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7522                                                         u32 rw_asid, u32 rw_mmu_bp)
7523 {
7524         u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7525
7526         WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7527         WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7528 }
7529
7530 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7531 {
7532         u32 reg_base, reg_offset, reg_val = 0;
7533
7534         reg_base = gaudi2_arc_blocks_bases[cpu_id];
7535
7536         /* Enable MMU and configure asid for all relevant ARC regions */
7537         reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7538         reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7539
7540         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7541         WREG32(reg_base + reg_offset, reg_val);
7542
7543         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7544         WREG32(reg_base + reg_offset, reg_val);
7545
7546         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7547         WREG32(reg_base + reg_offset, reg_val);
7548
7549         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7550         WREG32(reg_base + reg_offset, reg_val);
7551
7552         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7553         WREG32(reg_base + reg_offset, reg_val);
7554
7555         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7556         WREG32(reg_base + reg_offset, reg_val);
7557
7558         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7559         WREG32(reg_base + reg_offset, reg_val);
7560
7561         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7562         WREG32(reg_base + reg_offset, reg_val);
7563
7564         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7565         WREG32(reg_base + reg_offset, reg_val);
7566
7567         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7568         WREG32(reg_base + reg_offset, reg_val);
7569
7570         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7571         WREG32(reg_base + reg_offset, reg_val);
7572 }
7573
7574 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7575 {
7576         int i;
7577
7578         if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7579                 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7580
7581         for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7582                 gaudi2_arc_mmu_prepare(hdev, i, asid);
7583
7584         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7585                 if (!gaudi2_is_queue_enabled(hdev, i))
7586                         continue;
7587
7588                 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7589         }
7590
7591         return 0;
7592 }
7593
7594 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7595 {
7596         struct asic_fixed_properties *prop = &hdev->asic_prop;
7597         u32 rw_asid, offset;
7598         int rc, i;
7599
7600         rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7601                         FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7602
7603         WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7604         WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7605         WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7606         WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7607
7608         WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7609         WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7610         WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7611         WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7612
7613         /* ROT */
7614         for (i = 0 ; i < NUM_OF_ROT ; i++) {
7615                 offset = i * ROT_OFFSET;
7616                 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7617                 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7618                 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7619                 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7620                 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7621         }
7622
7623         /* Shared Decoders are the last bits in the decoders mask */
7624         if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7625                 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7626
7627         if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7628                 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7629
7630         /* arc farm arc dup eng */
7631         for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7632                 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7633
7634         rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7635         if (rc)
7636                 return rc;
7637
7638         return 0;
7639 }
7640
7641 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7642                                         struct iterate_module_ctx *ctx)
7643 {
7644         struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7645
7646         WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7647         WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7648         WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7649         WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7650 }
7651
7652 /* zero the MMUBP and set the ASID */
7653 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7654 {
7655         struct gaudi2_device *gaudi2 = hdev->asic_specific;
7656         struct gaudi2_tpc_mmu_data tpc_mmu_data;
7657         struct iterate_module_ctx tpc_iter = {
7658                 .fn = &gaudi2_tpc_mmu_prepare,
7659                 .data = &tpc_mmu_data,
7660         };
7661         int rc, i;
7662
7663         if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7664                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7665                 return -EINVAL;
7666         }
7667
7668         if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7669                 return 0;
7670
7671         rc = gaudi2_mmu_shared_prepare(hdev, asid);
7672         if (rc)
7673                 return rc;
7674
7675         /* configure DCORE MMUs */
7676         tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7677                                 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7678         gaudi2_iterate_tpcs(hdev, &tpc_iter);
7679         for (i = 0 ; i < NUM_OF_DCORES ; i++)
7680                 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7681
7682         return 0;
7683 }
7684
7685 static inline bool is_info_event(u32 event)
7686 {
7687         switch (event) {
7688         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7689         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7690
7691         /* return in case of NIC status event - these events are received periodically and not as
7692          * an indication to an error.
7693          */
7694         case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7695                 return true;
7696         default:
7697                 return false;
7698         }
7699 }
7700
7701 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7702                         bool ratelimited, const char *fmt, ...)
7703 {
7704         struct va_format vaf;
7705         va_list args;
7706
7707         va_start(args, fmt);
7708         vaf.fmt = fmt;
7709         vaf.va = &args;
7710
7711         if (ratelimited)
7712                 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7713                         gaudi2_irq_map_table[event_type].valid ?
7714                         gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7715         else
7716                 dev_err(hdev->dev, "%s: %pV\n",
7717                         gaudi2_irq_map_table[event_type].valid ?
7718                         gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7719
7720         va_end(args);
7721 }
7722
7723 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7724                 struct hl_eq_ecc_data *ecc_data)
7725 {
7726         u64 ecc_address = 0, ecc_syndrom = 0;
7727         u8 memory_wrapper_idx = 0;
7728
7729         ecc_address = le64_to_cpu(ecc_data->ecc_address);
7730         ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7731         memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7732
7733         gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7734                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7735                 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7736
7737         return !!ecc_data->is_critical;
7738 }
7739
7740 /*
7741  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7742  *
7743  * @idx: the current pi/ci value
7744  * @q_len: the queue length (power of 2)
7745  *
7746  * @return the cyclically decremented index
7747  */
7748 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7749 {
7750         u32 mask = q_len - 1;
7751
7752         /*
7753          * modular decrement is equivalent to adding (queue_size -1)
7754          * later we take LSBs to make sure the value is in the
7755          * range [0, queue_len - 1]
7756          */
7757         return (idx + q_len - 1) & mask;
7758 }
7759
7760 /**
7761  * gaudi2_print_sw_config_stream_data - print SW config stream data
7762  *
7763  * @hdev: pointer to the habanalabs device structure
7764  * @stream: the QMAN's stream
7765  * @qman_base: base address of QMAN registers block
7766  */
7767 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7768                                                 u32 stream, u64 qman_base)
7769 {
7770         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7771         u32 cq_ptr_lo_off, size;
7772
7773         cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7774
7775         cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7776                                                                         stream * cq_ptr_lo_off;
7777
7778         cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7779
7780         cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7781
7782         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7783         size = RREG32(cq_tsize);
7784         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7785                 stream, cq_ptr, size);
7786 }
7787
7788 /**
7789  * gaudi2_print_last_pqes_on_err - print last PQEs on error
7790  *
7791  * @hdev: pointer to the habanalabs device structure
7792  * @qid_base: first QID of the QMAN (out of 4 streams)
7793  * @stream: the QMAN's stream
7794  * @qman_base: base address of QMAN registers block
7795  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7796  */
7797 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7798                                                 u64 qman_base, bool pr_sw_conf)
7799 {
7800         u32 ci, qm_ci_stream_off;
7801         struct hl_hw_queue *q;
7802         u64 pq_ci;
7803         int i;
7804
7805         q = &hdev->kernel_queues[qid_base + stream];
7806
7807         qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7808         pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7809                                                 stream * qm_ci_stream_off;
7810
7811         hdev->asic_funcs->hw_queues_lock(hdev);
7812
7813         if (pr_sw_conf)
7814                 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7815
7816         ci = RREG32(pq_ci);
7817
7818         /* we should start printing form ci -1 */
7819         ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7820
7821         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7822                 struct hl_bd *bd;
7823                 u64 addr;
7824                 u32 len;
7825
7826                 bd = q->kernel_address;
7827                 bd += ci;
7828
7829                 len = le32_to_cpu(bd->len);
7830                 /* len 0 means uninitialized entry- break */
7831                 if (!len)
7832                         break;
7833
7834                 addr = le64_to_cpu(bd->ptr);
7835
7836                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7837                         stream, ci, addr, len);
7838
7839                 /* get previous ci, wrap if needed */
7840                 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7841         }
7842
7843         hdev->asic_funcs->hw_queues_unlock(hdev);
7844 }
7845
7846 /**
7847  * print_qman_data_on_err - extract QMAN data on error
7848  *
7849  * @hdev: pointer to the habanalabs device structure
7850  * @qid_base: first QID of the QMAN (out of 4 streams)
7851  * @stream: the QMAN's stream
7852  * @qman_base: base address of QMAN registers block
7853  *
7854  * This function attempt to extract as much data as possible on QMAN error.
7855  * On upper CP print the SW config stream data and last 8 PQEs.
7856  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7857  */
7858 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7859 {
7860         u32 i;
7861
7862         if (stream != QMAN_STREAMS) {
7863                 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7864                 return;
7865         }
7866
7867         gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7868
7869         for (i = 0 ; i < QMAN_STREAMS ; i++)
7870                 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7871 }
7872
7873 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7874                                                         u64 qman_base, u32 qid_base)
7875 {
7876         u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7877         u64 glbl_sts_addr, arb_err_addr;
7878         char reg_desc[32];
7879
7880         glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7881         arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7882
7883         /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7884         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7885                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7886
7887                 if (!glbl_sts_val)
7888                         continue;
7889
7890                 if (i == QMAN_STREAMS) {
7891                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7892                         num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7893                 } else {
7894                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7895                         num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7896                 }
7897
7898                 for (j = 0 ; j < num_error_causes ; j++)
7899                         if (glbl_sts_val & BIT(j)) {
7900                                 gaudi2_print_event(hdev, event_type, true,
7901                                         "%s. err cause: %s", reg_desc,
7902                                         i == QMAN_STREAMS ?
7903                                         gaudi2_qman_lower_cp_error_cause[j] :
7904                                         gaudi2_qman_error_cause[j]);
7905                                 error_count++;
7906                         }
7907
7908                 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7909         }
7910
7911         arb_err_val = RREG32(arb_err_addr);
7912
7913         if (!arb_err_val)
7914                 goto out;
7915
7916         for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7917                 if (arb_err_val & BIT(j)) {
7918                         gaudi2_print_event(hdev, event_type, true,
7919                                 "ARB_ERR. err cause: %s",
7920                                 gaudi2_qman_arb_error_cause[j]);
7921                         error_count++;
7922                 }
7923         }
7924
7925 out:
7926         return error_count;
7927 }
7928
7929 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7930                         u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7931                         enum gaudi2_engine_id id, u64 *event_mask)
7932 {
7933         u32 razwi_hi, razwi_lo, razwi_xy;
7934         u16 eng_id = id;
7935         u8 rd_wr_flag;
7936
7937         if (is_write) {
7938                 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7939                 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7940                 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7941                 rd_wr_flag = HL_RAZWI_WRITE;
7942         } else {
7943                 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7944                 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7945                 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7946                 rd_wr_flag = HL_RAZWI_READ;
7947         }
7948
7949         hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7950                                 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7951
7952         dev_err_ratelimited(hdev->dev,
7953                 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7954                 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7955 }
7956
7957 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7958                         u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7959                         enum gaudi2_engine_id id, u64 *event_mask)
7960 {
7961         u64 razwi_addr = CFG_BASE;
7962         u32 razwi_xy;
7963         u16 eng_id = id;
7964         u8 rd_wr_flag;
7965
7966         if (is_write) {
7967                 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7968                 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7969                 rd_wr_flag = HL_RAZWI_WRITE;
7970         } else {
7971                 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7972                 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7973                 rd_wr_flag = HL_RAZWI_READ;
7974         }
7975
7976         hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7977         dev_err_ratelimited(hdev->dev,
7978                                 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7979                                 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7980                                                 razwi_xy);
7981 }
7982
7983 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7984                                                 enum razwi_event_sources module, u8 module_idx)
7985 {
7986         switch (module) {
7987         case RAZWI_TPC:
7988                 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7989                         return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7990                 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7991                                 (module_idx % NUM_OF_TPC_PER_DCORE) +
7992                                 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7993
7994         case RAZWI_MME:
7995                 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7996                         (module_idx * ENGINE_ID_DCORE_OFFSET));
7997
7998         case RAZWI_EDMA:
7999                 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8000                         (module_idx % NUM_OF_EDMA_PER_DCORE));
8001
8002         case RAZWI_PDMA:
8003                 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8004
8005         case RAZWI_NIC:
8006                 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8007
8008         case RAZWI_DEC:
8009                 if (module_idx == 8)
8010                         return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8011
8012                 if (module_idx == 9)
8013                         return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8014                                         ;
8015                 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8016                                 (module_idx % NUM_OF_DEC_PER_DCORE) +
8017                                 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8018
8019         case RAZWI_ROT:
8020                 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8021
8022         default:
8023                 return GAUDI2_ENGINE_ID_SIZE;
8024         }
8025 }
8026
8027 /*
8028  * This function handles RR(Range register) hit events.
8029  * raised be initiators not PSOC RAZWI.
8030  */
8031 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8032                                 enum razwi_event_sources module, u8 module_idx,
8033                                 u8 module_sub_idx, u64 *event_mask)
8034 {
8035         bool via_sft = false;
8036         u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
8037         u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8038         u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8039         u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8040         char initiator_name[64];
8041
8042         switch (module) {
8043         case RAZWI_TPC:
8044                 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8045
8046                 if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
8047                                 !hdev->asic_prop.fw_security_enabled &&
8048                                 ((module_idx == 0) || (module_idx == 1)))
8049                         lbw_rtr_id = DCORE0_RTR0;
8050                 else
8051                         lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8052                 sprintf(initiator_name, "TPC_%u", module_idx);
8053                 break;
8054         case RAZWI_MME:
8055                 sprintf(initiator_name, "MME_%u", module_idx);
8056                 switch (module_sub_idx) {
8057                 case MME_WAP0:
8058                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8059                         break;
8060                 case MME_WAP1:
8061                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8062                         break;
8063                 case MME_WRITE:
8064                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8065                         break;
8066                 case MME_READ:
8067                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8068                         break;
8069                 case MME_SBTE0:
8070                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8071                         break;
8072                 case MME_SBTE1:
8073                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8074                         break;
8075                 case MME_SBTE2:
8076                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8077                         break;
8078                 case MME_SBTE3:
8079                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8080                         break;
8081                 case MME_SBTE4:
8082                         hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8083                         break;
8084                 default:
8085                         return;
8086                 }
8087                 lbw_rtr_id = hbw_rtr_id;
8088                 break;
8089         case RAZWI_EDMA:
8090                 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8091                 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8092                 /* SFT has separate MSTR_IF for LBW, only there we can
8093                  * read the LBW razwi related registers
8094                  */
8095                 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8096                                                                 dcore_id * SFT_DCORE_OFFSET;
8097                 via_sft = true;
8098                 sprintf(initiator_name, "EDMA_%u", module_idx);
8099                 break;
8100         case RAZWI_PDMA:
8101                 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8102                 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8103                 sprintf(initiator_name, "PDMA_%u", module_idx);
8104                 break;
8105         case RAZWI_NIC:
8106                 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8107                 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8108                 sprintf(initiator_name, "NIC_%u", module_idx);
8109                 break;
8110         case RAZWI_DEC:
8111                 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8112                 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8113                 sprintf(initiator_name, "DEC_%u", module_idx);
8114                 break;
8115         case RAZWI_ROT:
8116                 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8117                 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8118                 sprintf(initiator_name, "ROT_%u", module_idx);
8119                 break;
8120         default:
8121                 return;
8122         }
8123
8124         /* Find router mstr_if register base */
8125         if (!via_sft) {
8126                 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8127                 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8128                 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8129                                 dcore_id * DCORE_OFFSET +
8130                                 dcore_rtr_id * DCORE_RTR_OFFSET +
8131                                 RTR_MSTR_IF_OFFSET;
8132                 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8133                                 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8134         }
8135
8136         /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8137         hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8138         hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8139         lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8140         lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8141
8142         eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8143         if (hbw_shrd_aw) {
8144                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8145                                                 initiator_name, eng_id, event_mask);
8146
8147                 /* Clear event indication */
8148                 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8149         }
8150
8151         if (hbw_shrd_ar) {
8152                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8153                                                 initiator_name, eng_id, event_mask);
8154
8155                 /* Clear event indication */
8156                 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8157         }
8158
8159         if (lbw_shrd_aw) {
8160                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8161                                                 initiator_name, eng_id, event_mask);
8162
8163                 /* Clear event indication */
8164                 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8165         }
8166
8167         if (lbw_shrd_ar) {
8168                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8169                                                 initiator_name, eng_id, event_mask);
8170
8171                 /* Clear event indication */
8172                 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8173         }
8174 }
8175
8176 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8177 {
8178         struct asic_fixed_properties *prop = &hdev->asic_prop;
8179         u8 mod_idx, sub_mod;
8180
8181         /* check all TPCs */
8182         for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8183                 if (prop->tpc_enabled_mask & BIT(mod_idx))
8184                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8185         }
8186
8187         /* check all MMEs */
8188         for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8189                 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8190                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8191                                                                         sub_mod, NULL);
8192
8193         /* check all EDMAs */
8194         for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8195                 if (prop->edma_enabled_mask & BIT(mod_idx))
8196                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8197
8198         /* check all PDMAs */
8199         for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8200                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8201
8202         /* check all NICs */
8203         for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8204                 if (hdev->nic_ports_mask & BIT(mod_idx))
8205                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8206                                                                 NULL);
8207
8208         /* check all DECs */
8209         for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8210                 if (prop->decoder_enabled_mask & BIT(mod_idx))
8211                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8212
8213         /* check all ROTs */
8214         for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8215                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8216 }
8217
8218 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8219                                                 u32 axuser_xy, u32 *base, u16 *eng_id,
8220                                                 char *eng_name)
8221 {
8222
8223         int i, num_of_eng = 0;
8224         u16 str_size = 0;
8225
8226         for (i = 0 ; i < array_size ; i++) {
8227                 if (axuser_xy != razwi_info[i].axuser_xy)
8228                         continue;
8229
8230                 eng_id[num_of_eng] = razwi_info[i].eng_id;
8231                 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8232                 if (!num_of_eng)
8233                         str_size += snprintf(eng_name + str_size,
8234                                                 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8235                                                 razwi_info[i].eng_name);
8236                 else
8237                         str_size += snprintf(eng_name + str_size,
8238                                                 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8239                                                 razwi_info[i].eng_name);
8240                 num_of_eng++;
8241         }
8242
8243         return num_of_eng;
8244 }
8245
8246 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8247                                                 u64 *event_mask)
8248 {
8249         u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8250         u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8251         u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8252         char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8253         bool razwi_happened = false;
8254         u64 addr;
8255         int i;
8256
8257         num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8258                                                         axuser_xy, base, eng_id, eng_name_str);
8259
8260         /* If no match for XY coordinates, try to find it in MME razwi table */
8261         if (!num_of_eng) {
8262                 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8263                 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8264                                                                 ARRAY_SIZE(mme_razwi_info),
8265                                                                 axuser_xy, base, eng_id,
8266                                                                 eng_name_str);
8267         }
8268
8269         for  (i = 0 ; i < num_of_eng ; i++) {
8270                 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8271                         addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8272                         addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8273                         addr = ((u64)addr_hi << 32) + addr_lo;
8274                         if (addr) {
8275                                 dev_err(hdev->dev,
8276                                         "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8277                                         eng_name_str, addr);
8278                                 hl_handle_razwi(hdev, addr, &eng_id[0],
8279                                         num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8280                                 razwi_happened = true;
8281                         }
8282                 }
8283
8284                 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8285                         addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8286                         addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8287                         addr = ((u64)addr_hi << 32) + addr_lo;
8288                         if (addr) {
8289                                 dev_err(hdev->dev,
8290                                         "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8291                                         eng_name_str, addr);
8292                                 hl_handle_razwi(hdev, addr, &eng_id[0],
8293                                         num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8294                                 razwi_happened = true;
8295                         }
8296                 }
8297
8298                 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8299                         addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8300                         if (addr_lo) {
8301                                 dev_err(hdev->dev,
8302                                         "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8303                                         eng_name_str, addr_lo);
8304                                 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8305                                         num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8306                                 razwi_happened = true;
8307                         }
8308                 }
8309
8310                 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8311                         addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8312                         if (addr_lo) {
8313                                 dev_err(hdev->dev,
8314                                                 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8315                                                 eng_name_str, addr_lo);
8316                                 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8317                                         num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8318                                 razwi_happened = true;
8319                         }
8320                 }
8321                 /* In common case the loop will break, when there is only one engine id, or
8322                  * several engines with the same router. The exceptional case is with psoc razwi
8323                  * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8324                  * interfaces of sft router). In this case, maybe the first router won't hold info
8325                  * and we will need to iterate on the other router.
8326                  */
8327                 if (razwi_happened)
8328                         break;
8329         }
8330
8331         return razwi_happened;
8332 }
8333
8334 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8335 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8336 {
8337         u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8338
8339         if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8340                 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8341                 if (!razwi_intr)
8342                         return 0;
8343         }
8344
8345         razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8346
8347         dev_err_ratelimited(hdev->dev,
8348                 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8349                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8350                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8351                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8352                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8353                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8354
8355         if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8356                 error_count++;
8357         else
8358                 dev_err_ratelimited(hdev->dev,
8359                                 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8360                                 razwi_mask_info);
8361
8362         /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8363         if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8364                 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8365
8366         return error_count;
8367 }
8368
8369 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8370 {
8371         u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8372
8373         sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8374
8375         for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8376                 if (sts_val & BIT(i)) {
8377                         gaudi2_print_event(hdev, event_type, true,
8378                                 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8379                         sts_clr_val |= BIT(i);
8380                         error_count++;
8381                 }
8382         }
8383
8384         WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8385
8386         return error_count;
8387 }
8388
8389 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8390                                         bool extended_err_check, u64 *event_mask)
8391 {
8392         enum razwi_event_sources module;
8393         u32 error_count = 0;
8394         u64 qman_base;
8395         u8 index;
8396
8397         switch (event_type) {
8398         case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8399                 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8400                 qman_base = mmDCORE0_TPC0_QM_BASE +
8401                                 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8402                                 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8403                 module = RAZWI_TPC;
8404                 break;
8405         case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8406                 qman_base = mmDCORE0_TPC6_QM_BASE;
8407                 module = RAZWI_TPC;
8408                 break;
8409         case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8410         case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8411         case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8412         case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8413                 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8414                                 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8415                                                 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8416                 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8417                 module = RAZWI_MME;
8418                 break;
8419         case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8420         case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8421                 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8422                 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8423                 module = RAZWI_PDMA;
8424                 break;
8425         case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8426         case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8427                 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8428                 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8429                 module = RAZWI_ROT;
8430                 break;
8431         default:
8432                 return 0;
8433         }
8434
8435         error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8436
8437         /* There is a single event per NIC macro, so should check its both QMAN blocks */
8438         if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8439                         event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8440                 error_count += _gaudi2_handle_qm_sei_err(hdev,
8441                                         qman_base + NIC_QM_OFFSET, event_type);
8442
8443         if (extended_err_check) {
8444                 /* check if RAZWI happened */
8445                 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8446                 hl_check_for_glbl_errors(hdev);
8447         }
8448
8449         return error_count;
8450 }
8451
8452 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8453 {
8454         u32 qid_base, error_count = 0;
8455         u64 qman_base;
8456         u8 index = 0;
8457
8458         switch (event_type) {
8459         case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8460                 index = event_type - GAUDI2_EVENT_TPC0_QM;
8461                 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8462                 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8463                 break;
8464         case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8465                 index = event_type - GAUDI2_EVENT_TPC6_QM;
8466                 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8467                 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8468                 break;
8469         case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8470                 index = event_type - GAUDI2_EVENT_TPC12_QM;
8471                 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8472                 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8473                 break;
8474         case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8475                 index = event_type - GAUDI2_EVENT_TPC18_QM;
8476                 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8477                 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8478                 break;
8479         case GAUDI2_EVENT_TPC24_QM:
8480                 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8481                 qman_base = mmDCORE0_TPC6_QM_BASE;
8482                 break;
8483         case GAUDI2_EVENT_MME0_QM:
8484                 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8485                 qman_base = mmDCORE0_MME_QM_BASE;
8486                 break;
8487         case GAUDI2_EVENT_MME1_QM:
8488                 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8489                 qman_base = mmDCORE1_MME_QM_BASE;
8490                 break;
8491         case GAUDI2_EVENT_MME2_QM:
8492                 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8493                 qman_base = mmDCORE2_MME_QM_BASE;
8494                 break;
8495         case GAUDI2_EVENT_MME3_QM:
8496                 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8497                 qman_base = mmDCORE3_MME_QM_BASE;
8498                 break;
8499         case GAUDI2_EVENT_HDMA0_QM:
8500                 index = 0;
8501                 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8502                 qman_base = mmDCORE0_EDMA0_QM_BASE;
8503                 break;
8504         case GAUDI2_EVENT_HDMA1_QM:
8505                 index = 1;
8506                 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8507                 qman_base = mmDCORE0_EDMA1_QM_BASE;
8508                 break;
8509         case GAUDI2_EVENT_HDMA2_QM:
8510                 index = 2;
8511                 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8512                 qman_base = mmDCORE1_EDMA0_QM_BASE;
8513                 break;
8514         case GAUDI2_EVENT_HDMA3_QM:
8515                 index = 3;
8516                 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8517                 qman_base = mmDCORE1_EDMA1_QM_BASE;
8518                 break;
8519         case GAUDI2_EVENT_HDMA4_QM:
8520                 index = 4;
8521                 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8522                 qman_base = mmDCORE2_EDMA0_QM_BASE;
8523                 break;
8524         case GAUDI2_EVENT_HDMA5_QM:
8525                 index = 5;
8526                 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8527                 qman_base = mmDCORE2_EDMA1_QM_BASE;
8528                 break;
8529         case GAUDI2_EVENT_HDMA6_QM:
8530                 index = 6;
8531                 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8532                 qman_base = mmDCORE3_EDMA0_QM_BASE;
8533                 break;
8534         case GAUDI2_EVENT_HDMA7_QM:
8535                 index = 7;
8536                 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8537                 qman_base = mmDCORE3_EDMA1_QM_BASE;
8538                 break;
8539         case GAUDI2_EVENT_PDMA0_QM:
8540                 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8541                 qman_base = mmPDMA0_QM_BASE;
8542                 break;
8543         case GAUDI2_EVENT_PDMA1_QM:
8544                 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8545                 qman_base = mmPDMA1_QM_BASE;
8546                 break;
8547         case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8548                 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8549                 qman_base = mmROT0_QM_BASE;
8550                 break;
8551         case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8552                 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8553                 qman_base = mmROT1_QM_BASE;
8554                 break;
8555         default:
8556                 return 0;
8557         }
8558
8559         error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8560
8561         /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8562         if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8563                 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8564                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8565         }
8566
8567         hl_check_for_glbl_errors(hdev);
8568
8569         return error_count;
8570 }
8571
8572 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8573 {
8574         u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8575
8576         for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8577                 sts_clr_val = 0;
8578                 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8579                                 (arc_farm * ARC_FARM_OFFSET));
8580
8581                 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8582                         if (sts_val & BIT(i)) {
8583                                 gaudi2_print_event(hdev, event_type, true,
8584                                                 "ARC FARM ARC %u err cause: %s",
8585                                                 arc_farm, gaudi2_arc_sei_error_cause[i]);
8586                                 sts_clr_val |= BIT(i);
8587                                 error_count++;
8588                         }
8589                 }
8590                 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8591                                 sts_clr_val);
8592         }
8593
8594         hl_check_for_glbl_errors(hdev);
8595
8596         return error_count;
8597 }
8598
8599 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8600 {
8601         u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8602
8603         sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8604
8605         for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8606                 if (sts_val & BIT(i)) {
8607                         gaudi2_print_event(hdev, event_type, true,
8608                                 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8609                         sts_clr_val |= BIT(i);
8610                         error_count++;
8611                 }
8612         }
8613
8614         hl_check_for_glbl_errors(hdev);
8615
8616         WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8617
8618         return error_count;
8619 }
8620
8621 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8622                                         struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8623                                         u64 *event_mask)
8624 {
8625         u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8626         u32 error_count = 0;
8627         int i;
8628
8629         for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8630                 if (intr_cause_data & BIT(i)) {
8631                         gaudi2_print_event(hdev, event_type, true,
8632                                 "err cause: %s", guadi2_rot_error_cause[i]);
8633                         error_count++;
8634                 }
8635
8636         /* check if RAZWI happened */
8637         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8638         hl_check_for_glbl_errors(hdev);
8639
8640         return error_count;
8641 }
8642
8643 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8644                                         struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8645                                         u64 *event_mask)
8646 {
8647         u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8648         u32 error_count = 0;
8649         int i;
8650
8651         for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8652                 if (intr_cause_data & BIT(i)) {
8653                         gaudi2_print_event(hdev, event_type, true,
8654                                 "interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8655                         error_count++;
8656                 }
8657
8658         /* check if RAZWI happened */
8659         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8660         hl_check_for_glbl_errors(hdev);
8661
8662         return error_count;
8663 }
8664
8665 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8666                                         u64 *event_mask)
8667 {
8668         u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8669         int i;
8670
8671         if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8672                 /* DCORE DEC */
8673                 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8674                                 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8675                                 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8676         else
8677                 /* PCIE DEC */
8678                 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8679                                 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8680
8681         sts_val = RREG32(sts_addr);
8682
8683         for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8684                 if (sts_val & BIT(i)) {
8685                         gaudi2_print_event(hdev, event_type, true,
8686                                 "err cause: %s", gaudi2_dec_error_cause[i]);
8687                         sts_clr_val |= BIT(i);
8688                         error_count++;
8689                 }
8690         }
8691
8692         /* check if RAZWI happened */
8693         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8694         hl_check_for_glbl_errors(hdev);
8695
8696         /* Write 1 clear errors */
8697         WREG32(sts_addr, sts_clr_val);
8698
8699         return error_count;
8700 }
8701
8702 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8703                                         u64 *event_mask)
8704 {
8705         u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8706         int i;
8707
8708         sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8709         sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8710
8711         sts_val = RREG32(sts_addr);
8712
8713         for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8714                 if (sts_val & BIT(i)) {
8715                         gaudi2_print_event(hdev, event_type, true,
8716                                 "err cause: %s", guadi2_mme_error_cause[i]);
8717                         sts_clr_val |= BIT(i);
8718                         error_count++;
8719                 }
8720         }
8721
8722         /* check if RAZWI happened */
8723         for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8724                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8725
8726         hl_check_for_glbl_errors(hdev);
8727
8728         WREG32(sts_clr_addr, sts_clr_val);
8729
8730         return error_count;
8731 }
8732
8733 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8734                                         u64 intr_cause_data)
8735 {
8736         int i, error_count = 0;
8737
8738         for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8739                 if (intr_cause_data & BIT(i)) {
8740                         gaudi2_print_event(hdev, event_type, true,
8741                                 "err cause: %s", guadi2_mme_sbte_error_cause[i]);
8742                         error_count++;
8743                 }
8744
8745         hl_check_for_glbl_errors(hdev);
8746
8747         return error_count;
8748 }
8749
8750 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8751                                         u64 *event_mask)
8752 {
8753         u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8754         int i;
8755
8756         sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8757         sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8758
8759         sts_val = RREG32(sts_addr);
8760
8761         for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8762                 if (sts_val & BIT(i)) {
8763                         gaudi2_print_event(hdev, event_type, true,
8764                                 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8765                         sts_clr_val |= BIT(i);
8766                         error_count++;
8767                 }
8768         }
8769
8770         /* check if RAZWI happened on WAP0/1 */
8771         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8772         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8773         hl_check_for_glbl_errors(hdev);
8774
8775         WREG32(sts_clr_addr, sts_clr_val);
8776
8777         return error_count;
8778 }
8779
8780 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8781                                         u64 intr_cause_data)
8782 {
8783         u32 error_count = 0;
8784         int i;
8785
8786         /* If an AXI read or write error is received, an error is reported and
8787          * interrupt message is sent. Due to an HW errata, when reading the cause
8788          * register of the KDMA engine, the reported error is always HBW even if
8789          * the actual error caused by a LBW KDMA transaction.
8790          */
8791         for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8792                 if (intr_cause_data & BIT(i)) {
8793                         gaudi2_print_event(hdev, event_type, true,
8794                                 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8795                         error_count++;
8796                 }
8797
8798         hl_check_for_glbl_errors(hdev);
8799
8800         return error_count;
8801 }
8802
8803 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr)
8804 {
8805         u32 error_count = 0, sts_val = RREG32(sts_addr);
8806         int i;
8807
8808         for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8809                 if (sts_val & BIT(i)) {
8810                         gaudi2_print_event(hdev, event_type, true,
8811                                 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8812                         error_count++;
8813                 }
8814
8815         hl_check_for_glbl_errors(hdev);
8816
8817         return error_count;
8818 }
8819
8820 static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx)
8821 {
8822         u32 sts_addr;
8823
8824         sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET;
8825         return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8826 }
8827
8828 static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx)
8829 {
8830         static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5};
8831         u32 sts_addr, index;
8832
8833         index = edma_event_index_map[edma_idx];
8834
8835         sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE +
8836                                 DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) +
8837                                 DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE);
8838         return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8839 }
8840
8841 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8842 {
8843         u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8844
8845         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8846         if (RREG32(razwi_happened_addr)) {
8847                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8848                                                         GAUDI2_ENGINE_ID_PCIE, event_mask);
8849                 WREG32(razwi_happened_addr, 0x1);
8850         }
8851
8852         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8853         if (RREG32(razwi_happened_addr)) {
8854                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8855                                                         GAUDI2_ENGINE_ID_PCIE, event_mask);
8856                 WREG32(razwi_happened_addr, 0x1);
8857         }
8858
8859         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8860         if (RREG32(razwi_happened_addr)) {
8861                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8862                                                         GAUDI2_ENGINE_ID_PCIE, event_mask);
8863                 WREG32(razwi_happened_addr, 0x1);
8864         }
8865
8866         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8867         if (RREG32(razwi_happened_addr)) {
8868                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8869                                                         GAUDI2_ENGINE_ID_PCIE, event_mask);
8870                 WREG32(razwi_happened_addr, 0x1);
8871         }
8872 }
8873
8874 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8875                                         u64 intr_cause_data, u64 *event_mask)
8876 {
8877         u32 error_count = 0;
8878         int i;
8879
8880         for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8881                 if (!(intr_cause_data & BIT_ULL(i)))
8882                         continue;
8883
8884                 gaudi2_print_event(hdev, event_type, true,
8885                         "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8886                 error_count++;
8887
8888                 switch (intr_cause_data & BIT_ULL(i)) {
8889                 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8890                         hl_check_for_glbl_errors(hdev);
8891                         break;
8892                 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8893                         gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8894                         break;
8895                 }
8896         }
8897
8898         return error_count;
8899 }
8900
8901 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8902                                 u64 intr_cause_data)
8903
8904 {
8905         u32 error_count = 0;
8906         int i;
8907
8908         for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8909                 if (intr_cause_data & BIT_ULL(i)) {
8910                         gaudi2_print_event(hdev, event_type, true,
8911                                 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8912                         error_count++;
8913                 }
8914         }
8915
8916         return error_count;
8917 }
8918
8919 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8920 {
8921         u32 error_count = 0;
8922         int i;
8923
8924         for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8925                 if (intr_cause_data & BIT_ULL(i)) {
8926                         gaudi2_print_event(hdev, event_type, true,
8927                                 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8928                         error_count++;
8929                 }
8930         }
8931
8932         return error_count;
8933 }
8934
8935 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8936                                         u64 *event_mask)
8937 {
8938         u32 valid, val;
8939         u64 addr;
8940
8941         valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8942
8943         if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8944                 return;
8945
8946         val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8947         addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8948         addr <<= 32;
8949         addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8950
8951         if (!is_pmmu)
8952                 addr = gaudi2_mmu_descramble_addr(hdev, addr);
8953
8954         dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8955                                 is_pmmu ? "PMMU" : "HMMU", addr);
8956         hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8957
8958         WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8959 }
8960
8961 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8962 {
8963         u32 valid, val;
8964         u64 addr;
8965
8966         valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8967
8968         if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8969                 return;
8970
8971         val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8972         addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8973         addr <<= 32;
8974         addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8975
8976         if (!is_pmmu)
8977                 addr = gaudi2_mmu_descramble_addr(hdev, addr);
8978
8979         dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8980                                 is_pmmu ? "PMMU" : "HMMU", addr);
8981         WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8982 }
8983
8984 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8985                                                 u64 mmu_base, bool is_pmmu, u64 *event_mask)
8986 {
8987         u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8988         int i;
8989
8990         spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8991
8992         for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8993                 if (spi_sei_cause & BIT(i)) {
8994                         gaudi2_print_event(hdev, event_type, true,
8995                                 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8996
8997                         if (i == 0)
8998                                 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8999                         else if (i == 1)
9000                                 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9001
9002                         if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9003                                 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9004
9005                         error_count++;
9006                 }
9007         }
9008
9009         /* Clear cause */
9010         WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9011
9012         /* Clear interrupt */
9013         WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9014
9015         return error_count;
9016 }
9017
9018 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9019 {
9020         u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9021                 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9022         int i;
9023
9024         sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9025         cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9026
9027         sei_cause_val = RREG32(sei_cause_addr);
9028         sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9029         cq_intr_val = RREG32(cq_intr_addr);
9030
9031         /* SEI interrupt */
9032         if (sei_cause_cause) {
9033                 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9034                 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9035                                         sei_cause_val);
9036
9037                 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9038                         if (!(sei_cause_cause & BIT(i)))
9039                                 continue;
9040
9041                         gaudi2_print_event(hdev, event_type, true,
9042                                 "err cause: %s. %s: 0x%X",
9043                                 gaudi2_sm_sei_cause[i].cause_name,
9044                                 gaudi2_sm_sei_cause[i].log_name,
9045                                 sei_cause_log);
9046                         error_count++;
9047                         break;
9048                 }
9049
9050                 /* Clear SM_SEI_CAUSE */
9051                 WREG32(sei_cause_addr, 0);
9052         }
9053
9054         /* CQ interrupt */
9055         if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9056                 cq_intr_queue_index =
9057                                 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9058                                         cq_intr_val);
9059
9060                 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9061                                 sm_index, cq_intr_queue_index);
9062                 error_count++;
9063
9064                 /* Clear CQ_INTR */
9065                 WREG32(cq_intr_addr, 0);
9066         }
9067
9068         hl_check_for_glbl_errors(hdev);
9069
9070         return error_count;
9071 }
9072
9073 static u64 get_hmmu_base(u16 event_type)
9074 {
9075         u8 dcore, index_in_dcore;
9076
9077         switch (event_type) {
9078         case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9079         case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9080                 dcore = 0;
9081                 index_in_dcore = 0;
9082         break;
9083         case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9084         case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9085                 dcore = 1;
9086                 index_in_dcore = 0;
9087         break;
9088         case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9089         case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9090                 dcore = 0;
9091                 index_in_dcore = 1;
9092         break;
9093         case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9094         case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9095                 dcore = 1;
9096                 index_in_dcore = 1;
9097         break;
9098         case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9099         case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9100                 dcore = 3;
9101                 index_in_dcore = 2;
9102         break;
9103         case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9104         case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9105                 dcore = 2;
9106                 index_in_dcore = 2;
9107         break;
9108         case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9109         case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9110                 dcore = 3;
9111                 index_in_dcore = 3;
9112         break;
9113         case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9114         case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9115                 dcore = 2;
9116                 index_in_dcore = 3;
9117         break;
9118         case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9119         case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9120                 dcore = 0;
9121                 index_in_dcore = 2;
9122         break;
9123         case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9124         case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9125                 dcore = 1;
9126                 index_in_dcore = 2;
9127         break;
9128         case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9129         case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9130                 dcore = 0;
9131                 index_in_dcore = 3;
9132         break;
9133         case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9134         case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9135                 dcore = 1;
9136                 index_in_dcore = 3;
9137         break;
9138         case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9139         case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9140                 dcore = 3;
9141                 index_in_dcore = 0;
9142         break;
9143         case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9144         case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9145                 dcore = 2;
9146                 index_in_dcore = 0;
9147         break;
9148         case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9149         case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9150                 dcore = 3;
9151                 index_in_dcore = 1;
9152         break;
9153         case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9154         case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9155                 dcore = 2;
9156                 index_in_dcore = 1;
9157         break;
9158         default:
9159                 return ULONG_MAX;
9160         }
9161
9162         return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9163 }
9164
9165 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9166 {
9167         bool is_pmmu = false;
9168         u32 error_count = 0;
9169         u64 mmu_base;
9170
9171         switch (event_type) {
9172         case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9173         case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9174                 mmu_base = get_hmmu_base(event_type);
9175                 break;
9176
9177         case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9178         case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9179                 is_pmmu = true;
9180                 mmu_base = mmPMMU_HBW_MMU_BASE;
9181                 break;
9182         default:
9183                 return 0;
9184         }
9185
9186         if (mmu_base == ULONG_MAX)
9187                 return 0;
9188
9189         error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9190                                                         is_pmmu, event_mask);
9191         hl_check_for_glbl_errors(hdev);
9192
9193         return error_count;
9194 }
9195
9196
9197 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9198 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9199                         struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9200 {
9201         u32 addr, beat, beat_shift;
9202         bool rc = false;
9203
9204         dev_err_ratelimited(hdev->dev,
9205                         "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9206                         FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9207                         FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9208                         FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9209
9210         addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9211         dev_err_ratelimited(hdev->dev,
9212                         "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9213                         FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9214                         FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9215                         FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9216                         FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9217                         FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9218
9219         /* For each beat (RDQS edge), look for possible errors and print relevant info */
9220         for (beat = 0 ; beat < 4 ; beat++) {
9221                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9222                         (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9223                         dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9224                                                 beat,
9225                                                 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9226                                                 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9227
9228                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9229                         (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9230                         dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9231                                                 beat,
9232                                                 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9233                                                 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9234                         rc |= true;
9235                 }
9236
9237                 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9238                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9239                         (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9240                         dev_err_ratelimited(hdev->dev,
9241                                         "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9242                                         beat,
9243                                         le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9244                                         (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9245                                                 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9246                                                 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9247                         rc |= true;
9248                 }
9249
9250                 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9251                 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9252                                         le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9253                 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9254                                         le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9255         }
9256
9257         return rc;
9258 }
9259
9260 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9261                         struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9262 {
9263         struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9264         u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9265
9266         dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9267
9268         dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9269                                 derr & 0x3, derr & 0xc);
9270
9271         /* JIRA H6-3286 - the following prints may not be valid */
9272         dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9273         for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9274                 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9275                 dev_err_ratelimited(hdev->dev,
9276                                 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9277                                 i,
9278                                 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9279                                 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9280                                 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9281                                 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9282         }
9283 }
9284
9285 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9286                 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9287 {
9288         __le32 *col_cmd = ca_par_err_data->dbg_col;
9289         __le16 *row_cmd = ca_par_err_data->dbg_row;
9290         u32 i;
9291
9292         dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9293
9294         dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9295         for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9296                 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9297                         le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9298                         le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9299 }
9300
9301 /* Returns true if hard reset is needed or false otherwise */
9302 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9303                                         struct hl_eq_hbm_sei_data *sei_data)
9304 {
9305         bool require_hard_reset = false;
9306         u32 hbm_id, mc_id, cause_idx;
9307
9308         hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9309         mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9310
9311         cause_idx = sei_data->hdr.sei_cause;
9312         if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9313                 gaudi2_print_event(hdev, event_type, true,
9314                         "err cause: %s",
9315                         "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9316                 return true;
9317         }
9318
9319         gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9320                 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9321                 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9322                 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9323                 hbm_mc_sei_cause[cause_idx]);
9324
9325         /* Print error-specific info */
9326         switch (cause_idx) {
9327         case HBM_SEI_CATTRIP:
9328                 require_hard_reset = true;
9329                 break;
9330
9331         case  HBM_SEI_CMD_PARITY_EVEN:
9332                 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9333                                                 le32_to_cpu(sei_data->hdr.cnt));
9334                 require_hard_reset = true;
9335                 break;
9336
9337         case  HBM_SEI_CMD_PARITY_ODD:
9338                 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9339                                                 le32_to_cpu(sei_data->hdr.cnt));
9340                 require_hard_reset = true;
9341                 break;
9342
9343         case HBM_SEI_WRITE_DATA_PARITY_ERR:
9344                 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9345                                                 le32_to_cpu(sei_data->hdr.cnt));
9346                 require_hard_reset = true;
9347                 break;
9348
9349         case HBM_SEI_READ_ERR:
9350                 /* Unlike other SEI events, read error requires further processing of the
9351                  * raw data in order to determine the root cause.
9352                  */
9353                 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9354                                                                 &sei_data->read_err_info,
9355                                                                 le32_to_cpu(sei_data->hdr.cnt));
9356                 break;
9357
9358         default:
9359                 break;
9360         }
9361
9362         require_hard_reset |= !!sei_data->hdr.is_critical;
9363
9364         return require_hard_reset;
9365 }
9366
9367 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9368                                 u64 intr_cause_data)
9369 {
9370         if (intr_cause_data) {
9371                 gaudi2_print_event(hdev, event_type, true,
9372                         "temperature error cause: %#llx", intr_cause_data);
9373                 return 1;
9374         }
9375
9376         return 0;
9377 }
9378
9379 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9380 {
9381         u32 i, error_count = 0;
9382
9383         for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9384                 if (intr_cause_data & hbm_mc_spi[i].mask) {
9385                         dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9386                                 hbm_mc_spi[i].cause);
9387                         error_count++;
9388                 }
9389
9390         return error_count;
9391 }
9392
9393 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9394 {
9395         ktime_t zero_time = ktime_set(0, 0);
9396
9397         mutex_lock(&hdev->clk_throttling.lock);
9398
9399         switch (event_type) {
9400         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9401                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9402                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9403                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9404                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9405                 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9406                 break;
9407
9408         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9409                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9410                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9411                 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9412                 break;
9413
9414         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9415                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9416                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9417                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9418                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9419                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9420                 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9421                 break;
9422
9423         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9424                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9425                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9426                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9427                 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9428                 break;
9429
9430         default:
9431                 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9432                 break;
9433         }
9434
9435         mutex_unlock(&hdev->clk_throttling.lock);
9436 }
9437
9438 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9439                                         struct cpucp_pkt_sync_err *sync_err)
9440 {
9441         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9442
9443         gaudi2_print_event(hdev, event_type, false,
9444                 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9445                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9446                 q->pi, atomic_read(&q->ci));
9447 }
9448
9449 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9450 {
9451         u32 p2p_intr, msix_gw_intr, error_count = 0;
9452
9453         p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9454         msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9455
9456         if (p2p_intr) {
9457                 gaudi2_print_event(hdev, event_type, true,
9458                         "pcie p2p transaction terminated due to security, req_id(0x%x)",
9459                         RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9460
9461                 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9462                 error_count++;
9463         }
9464
9465         if (msix_gw_intr) {
9466                 gaudi2_print_event(hdev, event_type, true,
9467                         "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9468                         RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9469
9470                 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9471                 error_count++;
9472         }
9473
9474         return error_count;
9475 }
9476
9477 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9478                         struct hl_eq_pcie_drain_ind_data *drain_data)
9479 {
9480         u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9481
9482         cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9483         lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9484         lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9485         hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9486         hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9487
9488         if (cause & BIT_ULL(0)) {
9489                 dev_err_ratelimited(hdev->dev,
9490                         "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9491                         !!lbw_rd, !!lbw_wr);
9492                 error_count++;
9493         }
9494
9495         if (cause & BIT_ULL(1)) {
9496                 dev_err_ratelimited(hdev->dev,
9497                         "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9498                         hbw_rd, hbw_wr);
9499                 error_count++;
9500         }
9501
9502         return error_count;
9503 }
9504
9505 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9506 {
9507         u32 error_count = 0;
9508         int i;
9509
9510         for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9511                 if (intr_cause_data & BIT_ULL(i)) {
9512                         dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9513                                 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9514                         error_count++;
9515                 }
9516         }
9517
9518         hl_check_for_glbl_errors(hdev);
9519
9520         return error_count;
9521 }
9522
9523 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9524                                         struct cpucp_pkt_sync_err *sync_err)
9525 {
9526         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9527
9528         gaudi2_print_event(hdev, event_type, false,
9529                 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9530                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9531 }
9532
9533 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9534                                         struct hl_eq_engine_arc_intr_data *data)
9535 {
9536         struct hl_engine_arc_dccm_queue_full_irq *q;
9537         u32 intr_type, engine_id;
9538         u64 payload;
9539
9540         intr_type = le32_to_cpu(data->intr_type);
9541         engine_id = le32_to_cpu(data->engine_id);
9542         payload = le64_to_cpu(data->payload);
9543
9544         switch (intr_type) {
9545         case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9546                 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9547
9548                 gaudi2_print_event(hdev, event_type, true,
9549                                 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9550                                 engine_id, intr_type, q->queue_index);
9551                 return 1;
9552         default:
9553                 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9554                 return 0;
9555         }
9556 }
9557
9558 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9559 {
9560         struct gaudi2_device *gaudi2 = hdev->asic_specific;
9561         bool reset_required = false, is_critical = false;
9562         u32 index, ctl, reset_flags = 0, error_count = 0;
9563         u64 event_mask = 0;
9564         u16 event_type;
9565
9566         ctl = le32_to_cpu(eq_entry->hdr.ctl);
9567         event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9568
9569         if (event_type >= GAUDI2_EVENT_SIZE) {
9570                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9571                                 event_type, GAUDI2_EVENT_SIZE - 1);
9572                 return;
9573         }
9574
9575         gaudi2->events_stat[event_type]++;
9576         gaudi2->events_stat_aggregate[event_type]++;
9577
9578         switch (event_type) {
9579         case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9580                 fallthrough;
9581         case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9582                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9583                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9584                 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9585                 is_critical = eq_entry->ecc_data.is_critical;
9586                 error_count++;
9587                 break;
9588
9589         case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9590                 fallthrough;
9591         case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9592                 fallthrough;
9593         case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9594                 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9595                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9596                 break;
9597
9598         case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9599                 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9600                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9601                 break;
9602
9603         case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9604                 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9605                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9606                 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9607                 break;
9608
9609         case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9610         case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9611                 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9612                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9613                 break;
9614
9615         case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9616         case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9617                 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9618                 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9619                                         &eq_entry->razwi_with_intr_cause, &event_mask);
9620                 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9621                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9622                 break;
9623
9624         case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9625                 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9626                 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9627                                                 &eq_entry->razwi_with_intr_cause, &event_mask);
9628                 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9629                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9630                 break;
9631
9632         case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9633                 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9634                 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9635                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9636                 break;
9637
9638         case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9639         case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9640         case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9641         case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9642         case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9643         case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9644         case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9645         case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9646         case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9647         case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9648         case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9649         case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9650         case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9651         case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9652         case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9653         case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9654         case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9655         case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9656         case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9657         case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9658         case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9659         case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9660         case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9661         case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9662         case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9663                 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9664                         (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9665                 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9666                                         &eq_entry->razwi_with_intr_cause, &event_mask);
9667                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9668                 break;
9669
9670         case GAUDI2_EVENT_DEC0_SPI:
9671         case GAUDI2_EVENT_DEC1_SPI:
9672         case GAUDI2_EVENT_DEC2_SPI:
9673         case GAUDI2_EVENT_DEC3_SPI:
9674         case GAUDI2_EVENT_DEC4_SPI:
9675         case GAUDI2_EVENT_DEC5_SPI:
9676         case GAUDI2_EVENT_DEC6_SPI:
9677         case GAUDI2_EVENT_DEC7_SPI:
9678         case GAUDI2_EVENT_DEC8_SPI:
9679         case GAUDI2_EVENT_DEC9_SPI:
9680                 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9681                                 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9682                 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9683                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9684                 break;
9685
9686         case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9687         case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9688         case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9689         case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9690                 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9691                                 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9692                                                 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9693                 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9694                 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9695                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9696                 break;
9697
9698         case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9699         case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9700         case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9701         case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9702                 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9703                                 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9704                                         GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9705                 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9706                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9707                 break;
9708
9709         case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9710         case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9711         case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9712         case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9713                 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9714                                 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9715                                         GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9716                 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9717                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9718                 break;
9719
9720         case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9721         case GAUDI2_EVENT_KDMA0_CORE:
9722                 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9723                                         le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9724                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9725                 break;
9726
9727         case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9728                 index = event_type - GAUDI2_EVENT_HDMA2_CORE;
9729                 error_count = gaudi2_handle_edma_core_event(hdev, event_type, index);
9730                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9731                 break;
9732
9733         case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9734                 index = event_type - GAUDI2_EVENT_PDMA0_CORE;
9735                 error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index);
9736                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9737                 break;
9738
9739         case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9740                 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9741                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9742                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9743                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9744                 break;
9745
9746         case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9747         case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9748         case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9749         case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9750                 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9751                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9752                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9753                 break;
9754
9755         case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9756                 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9757                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9758                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9759                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9760                 break;
9761
9762         case GAUDI2_EVENT_PMMU_FATAL_0:
9763                 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9764                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9765                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9766                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9767                 break;
9768
9769         case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9770                 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9771                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9772                 break;
9773
9774         case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9775                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9776                 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9777                         reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9778                         reset_required = true;
9779                 }
9780                 error_count++;
9781                 break;
9782
9783         case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9784                 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9785                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9786                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9787                 break;
9788
9789         case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9790                 error_count = gaudi2_handle_hbm_mc_spi(hdev,
9791                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9792                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9793                 break;
9794
9795         case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9796                 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9797                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9798                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9799                 break;
9800
9801         case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9802                 error_count = gaudi2_handle_psoc_drain(hdev,
9803                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9804                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9805                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9806                 break;
9807
9808         case GAUDI2_EVENT_CPU_AXI_ECC:
9809                 error_count = GAUDI2_NA_EVENT_CAUSE;
9810                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9811                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9812                 break;
9813         case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9814                 error_count = GAUDI2_NA_EVENT_CAUSE;
9815                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9816                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9817                 break;
9818         case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9819         case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9820         case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9821         case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9822                 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9823                                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9824                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9825                 break;
9826         case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9827                 error_count = GAUDI2_NA_EVENT_CAUSE;
9828                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9829                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9830                 break;
9831         case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9832                 error_count = GAUDI2_NA_EVENT_CAUSE;
9833                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9834                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9835                 break;
9836         case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9837                 error_count = GAUDI2_NA_EVENT_CAUSE;
9838                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9839                 break;
9840         case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9841                 error_count = GAUDI2_NA_EVENT_CAUSE;
9842                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9843                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9844                 break;
9845         case GAUDI2_EVENT_PCIE_FATAL_ERR:
9846                 error_count = GAUDI2_NA_EVENT_CAUSE;
9847                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9848                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9849                 break;
9850         case GAUDI2_EVENT_TPC0_BMON_SPMU:
9851         case GAUDI2_EVENT_TPC1_BMON_SPMU:
9852         case GAUDI2_EVENT_TPC2_BMON_SPMU:
9853         case GAUDI2_EVENT_TPC3_BMON_SPMU:
9854         case GAUDI2_EVENT_TPC4_BMON_SPMU:
9855         case GAUDI2_EVENT_TPC5_BMON_SPMU:
9856         case GAUDI2_EVENT_TPC6_BMON_SPMU:
9857         case GAUDI2_EVENT_TPC7_BMON_SPMU:
9858         case GAUDI2_EVENT_TPC8_BMON_SPMU:
9859         case GAUDI2_EVENT_TPC9_BMON_SPMU:
9860         case GAUDI2_EVENT_TPC10_BMON_SPMU:
9861         case GAUDI2_EVENT_TPC11_BMON_SPMU:
9862         case GAUDI2_EVENT_TPC12_BMON_SPMU:
9863         case GAUDI2_EVENT_TPC13_BMON_SPMU:
9864         case GAUDI2_EVENT_TPC14_BMON_SPMU:
9865         case GAUDI2_EVENT_TPC15_BMON_SPMU:
9866         case GAUDI2_EVENT_TPC16_BMON_SPMU:
9867         case GAUDI2_EVENT_TPC17_BMON_SPMU:
9868         case GAUDI2_EVENT_TPC18_BMON_SPMU:
9869         case GAUDI2_EVENT_TPC19_BMON_SPMU:
9870         case GAUDI2_EVENT_TPC20_BMON_SPMU:
9871         case GAUDI2_EVENT_TPC21_BMON_SPMU:
9872         case GAUDI2_EVENT_TPC22_BMON_SPMU:
9873         case GAUDI2_EVENT_TPC23_BMON_SPMU:
9874         case GAUDI2_EVENT_TPC24_BMON_SPMU:
9875         case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9876         case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9877         case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9878         case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9879         case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9880         case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9881         case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9882         case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9883         case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9884         case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9885         case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9886         case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9887         case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9888                 fallthrough;
9889         case GAUDI2_EVENT_DEC0_BMON_SPMU:
9890         case GAUDI2_EVENT_DEC1_BMON_SPMU:
9891         case GAUDI2_EVENT_DEC2_BMON_SPMU:
9892         case GAUDI2_EVENT_DEC3_BMON_SPMU:
9893         case GAUDI2_EVENT_DEC4_BMON_SPMU:
9894         case GAUDI2_EVENT_DEC5_BMON_SPMU:
9895         case GAUDI2_EVENT_DEC6_BMON_SPMU:
9896         case GAUDI2_EVENT_DEC7_BMON_SPMU:
9897         case GAUDI2_EVENT_DEC8_BMON_SPMU:
9898         case GAUDI2_EVENT_DEC9_BMON_SPMU:
9899         case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9900                 error_count = GAUDI2_NA_EVENT_CAUSE;
9901                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9902                 break;
9903
9904         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9905         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9906         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9907         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9908                 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9909                 error_count = GAUDI2_NA_EVENT_CAUSE;
9910                 break;
9911
9912         case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9913                 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9914                 error_count = GAUDI2_NA_EVENT_CAUSE;
9915                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9916                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9917                 break;
9918
9919         case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9920                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9921                 error_count = GAUDI2_NA_EVENT_CAUSE;
9922                 /* Do nothing- FW will handle it */
9923                 break;
9924
9925         case GAUDI2_EVENT_PCIE_P2P_MSIX:
9926                 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9927                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9928                 break;
9929
9930         case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9931                 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9932                 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9933                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9934                 break;
9935
9936         case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9937                 error_count = GAUDI2_NA_EVENT_CAUSE;
9938                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9939                 break;
9940
9941         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9942                 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9943                                                 le64_to_cpu(eq_entry->data[0]));
9944                 error_count = GAUDI2_NA_EVENT_CAUSE;
9945                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9946                 break;
9947         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9948                 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9949                                                 le64_to_cpu(eq_entry->data[0]));
9950                 error_count = GAUDI2_NA_EVENT_CAUSE;
9951                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9952                 break;
9953
9954         case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9955                 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9956                 error_count = GAUDI2_NA_EVENT_CAUSE;
9957                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9958                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9959                 break;
9960
9961         case GAUDI2_EVENT_ARC_DCCM_FULL:
9962                 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9963                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9964                 break;
9965
9966         case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9967         case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9968                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9969                 error_count = GAUDI2_NA_EVENT_CAUSE;
9970                 is_critical = true;
9971                 break;
9972
9973         default:
9974                 if (gaudi2_irq_map_table[event_type].valid) {
9975                         dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9976                                                 event_type);
9977                         error_count = GAUDI2_NA_EVENT_CAUSE;
9978                 }
9979         }
9980
9981         /* Make sure to dump an error in case no error cause was printed so far.
9982          * Note that although we have counted the errors, we use this number as
9983          * a boolean.
9984          */
9985         if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9986                 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9987         else if (error_count == 0)
9988                 gaudi2_print_event(hdev, event_type, true,
9989                                 "No error cause for H/W event %u", event_type);
9990
9991         if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9992                                 reset_required) {
9993                 if (reset_required ||
9994                                 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9995                         reset_flags |= HL_DRV_RESET_HARD;
9996
9997                 if (hdev->hard_reset_on_fw_events ||
9998                                 (hdev->asic_prop.fw_security_enabled && is_critical))
9999                         goto reset_device;
10000         }
10001
10002         /* Send unmask irq only for interrupts not classified as MSG */
10003         if (!gaudi2_irq_map_table[event_type].msg)
10004                 hl_fw_unmask_irq(hdev, event_type);
10005
10006         if (event_mask)
10007                 hl_notifier_event_send_all(hdev, event_mask);
10008
10009         return;
10010
10011 reset_device:
10012         if (hdev->asic_prop.fw_security_enabled && is_critical) {
10013                 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10014                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10015         } else {
10016                 reset_flags |= HL_DRV_RESET_DELAY;
10017         }
10018         /* escalate general hw errors to critical/fatal error */
10019         if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10020                 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10021
10022         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10023         hl_device_cond_reset(hdev, reset_flags, event_mask);
10024 }
10025
10026 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10027                         struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
10028                         u32 hw_queue_id, u32 size, u64 addr, u32 val)
10029 {
10030         u32 ctl, pkt_size;
10031         int rc = 0;
10032
10033         ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10034         ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10035         ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10036         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10037
10038         lin_dma_pkt->ctl = cpu_to_le32(ctl);
10039         lin_dma_pkt->src_addr = cpu_to_le64(val);
10040         lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10041         lin_dma_pkt->tsize = cpu_to_le32(size);
10042
10043         pkt_size = sizeof(struct packet_lin_dma);
10044
10045         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
10046         if (rc)
10047                 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
10048                                 hw_queue_id);
10049
10050         return rc;
10051 }
10052
10053 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10054 {
10055         u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10056                                         GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10057                                         GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10058                                         GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10059         u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10060                 old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
10061         u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10062         struct asic_fixed_properties *prop = &hdev->asic_prop;
10063         void *lin_dma_pkts_arr;
10064         dma_addr_t pkt_dma_addr;
10065         int rc = 0, dma_num = 0;
10066
10067         if (prop->edma_enabled_mask == 0) {
10068                 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10069                 return -EIO;
10070         }
10071
10072         sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10073         sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10074         comp_addr = CFG_BASE + sob_addr;
10075         comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10076                 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10077         mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10078                 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10079
10080         /* Calculate how many lin dma pkts we'll need */
10081         num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10082         pkt_size = sizeof(struct packet_lin_dma);
10083
10084         lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
10085                                         &pkt_dma_addr, GFP_KERNEL);
10086         if (!lin_dma_pkts_arr)
10087                 return -ENOMEM;
10088
10089         /*
10090          * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10091          * only the first one to restore later
10092          * also set the sob addr for all edma cores for completion.
10093          * set QM as trusted to allow it to access physical address with MMU bp.
10094          */
10095         old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10096         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10097                 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10098                         u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10099                         u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10100
10101                         if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10102                                 continue;
10103
10104                         WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10105                                         edma_offset, mmubp);
10106                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10107                                         lower_32_bits(comp_addr));
10108                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10109                                         upper_32_bits(comp_addr));
10110                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10111                                         comp_val);
10112                         gaudi2_qman_set_test_mode(hdev,
10113                                         edma_queues_id[dcore] + 4 * edma_idx, true);
10114                 }
10115         }
10116
10117         WREG32(sob_addr, 0);
10118
10119         while (cur_addr < end_addr) {
10120                 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10121                         for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10122                                 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10123
10124                                 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10125                                         continue;
10126
10127                                 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10128
10129                                 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10130                                         (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10131                                         pkt_dma_addr + dma_num * pkt_size,
10132                                         edma_queues_id[dcore] + edma_idx * 4,
10133                                         chunk_size, cur_addr, val);
10134                                 if (rc)
10135                                         goto end;
10136
10137                                 dma_num++;
10138                                 cur_addr += chunk_size;
10139                                 if (cur_addr == end_addr)
10140                                         break;
10141                         }
10142                 }
10143         }
10144
10145         rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10146         if (rc) {
10147                 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10148                 goto end;
10149         }
10150 end:
10151         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10152                 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10153                         u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10154                         u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10155
10156                         if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10157                                 continue;
10158
10159                         WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10160                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10161                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10162                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10163                         gaudi2_qman_set_test_mode(hdev,
10164                                         edma_queues_id[dcore] + 4 * edma_idx, false);
10165                 }
10166         }
10167
10168         WREG32(sob_addr, 0);
10169         hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10170
10171         return rc;
10172 }
10173
10174 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10175 {
10176         int rc;
10177         struct asic_fixed_properties *prop = &hdev->asic_prop;
10178         u64 size = prop->dram_end_address - prop->dram_user_base_address;
10179
10180         rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10181
10182         if (rc)
10183                 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10184                                 prop->dram_user_base_address, size);
10185         return rc;
10186 }
10187
10188 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10189 {
10190         int rc;
10191         struct asic_fixed_properties *prop = &hdev->asic_prop;
10192         u64 val = hdev->memory_scrub_val;
10193         u64 addr, size;
10194
10195         if (!hdev->memory_scrub)
10196                 return 0;
10197
10198         /* scrub SRAM */
10199         addr = prop->sram_user_base_address;
10200         size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10201         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10202                         addr, addr + size, val);
10203         rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10204         if (rc) {
10205                 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10206                 return rc;
10207         }
10208
10209         /* scrub DRAM */
10210         rc = gaudi2_scrub_device_dram(hdev, val);
10211         if (rc) {
10212                 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10213                 return rc;
10214         }
10215         return 0;
10216 }
10217
10218 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10219 {
10220         u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10221                 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10222         u32 val, size, offset;
10223         int dcore_id;
10224
10225         offset = hdev->asic_prop.first_available_cq[0] * 4;
10226         cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10227         cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10228         cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10229         cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10230         cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10231         cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10232         size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10233                         (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10234
10235         /* memset dcore0 CQ registers */
10236         gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10237         gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10238         gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10239         gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10240         gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10241         gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10242
10243         cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10244         cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10245         cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10246         cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10247         cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10248         cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10249         size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10250
10251         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10252                 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10253                 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10254                 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10255                 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10256                 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10257                 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10258
10259                 cq_lbw_l_addr += DCORE_OFFSET;
10260                 cq_lbw_h_addr += DCORE_OFFSET;
10261                 cq_lbw_data_addr += DCORE_OFFSET;
10262                 cq_base_l_addr += DCORE_OFFSET;
10263                 cq_base_h_addr += DCORE_OFFSET;
10264                 cq_size_addr += DCORE_OFFSET;
10265         }
10266
10267         offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10268         addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10269         val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10270         size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10271
10272         /* memset dcore0 monitors */
10273         gaudi2_memset_device_lbw(hdev, addr, size, val);
10274
10275         addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10276         gaudi2_memset_device_lbw(hdev, addr, size, 0);
10277
10278         mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10279         mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10280         size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10281
10282         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10283                 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10284                 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10285                 mon_sts_addr += DCORE_OFFSET;
10286                 mon_cfg_addr += DCORE_OFFSET;
10287         }
10288
10289         offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10290         addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10291         val = 0;
10292         size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10293                         (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10294
10295         /* memset dcore0 sobs */
10296         gaudi2_memset_device_lbw(hdev, addr, size, val);
10297
10298         addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10299         size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10300
10301         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10302                 gaudi2_memset_device_lbw(hdev, addr, size, val);
10303                 addr += DCORE_OFFSET;
10304         }
10305
10306         /* Flush all WREG to prevent race */
10307         val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10308 }
10309
10310 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10311 {
10312         u32 reg_base, hw_queue_id;
10313
10314         for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10315                                                         hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10316                 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10317                         continue;
10318
10319                 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10320
10321                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10322                 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10323         }
10324
10325         /* Flush all WREG to prevent race */
10326         RREG32(mmPDMA0_QM_ARB_CFG_0);
10327 }
10328
10329 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10330 {
10331         u32 reg_base, hw_queue_id;
10332
10333         for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10334                                                         hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10335                 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10336                         continue;
10337
10338                 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10339
10340                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10341                 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10342         }
10343
10344         /* Flush all WREG to prevent race */
10345         RREG32(mmPDMA0_QM_ARB_CFG_0);
10346 }
10347
10348 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10349 {
10350         return 0;
10351 }
10352
10353 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10354 {
10355 }
10356
10357 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10358                                                 struct dup_block_ctx *cfg_ctx)
10359 {
10360         u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10361         u8 seq;
10362         int i;
10363
10364         for (i = 0 ; i < cfg_ctx->instances ; i++) {
10365                 seq = block_idx * cfg_ctx->instances + i;
10366
10367                 /* skip disabled instance */
10368                 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10369                         continue;
10370
10371                 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10372                                         cfg_ctx->data);
10373         }
10374 }
10375
10376 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10377                                                 u64 mask)
10378 {
10379         int i;
10380
10381         cfg_ctx->enabled_mask = mask;
10382
10383         for (i = 0 ; i < cfg_ctx->blocks ; i++)
10384                 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10385 }
10386
10387 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10388 {
10389         gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10390 }
10391
10392 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10393 {
10394         void *host_mem_virtual_addr;
10395         dma_addr_t host_mem_dma_addr;
10396         u64 reserved_va_base;
10397         u32 pos, size_left, size_to_dma;
10398         struct hl_ctx *ctx;
10399         int rc = 0;
10400
10401         /* Fetch the ctx */
10402         ctx = hl_get_compute_ctx(hdev);
10403         if (!ctx) {
10404                 dev_err(hdev->dev, "No ctx available\n");
10405                 return -EINVAL;
10406         }
10407
10408         /* Allocate buffers for read and for poll */
10409         host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10410                                                                 GFP_KERNEL | __GFP_ZERO);
10411         if (host_mem_virtual_addr == NULL) {
10412                 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10413                 rc = -ENOMEM;
10414                 goto put_ctx;
10415         }
10416
10417         /* Reserve VM region on asic side */
10418         reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10419                                                 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10420         if (!reserved_va_base) {
10421                 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10422                 rc = -ENOMEM;
10423                 goto free_data_buffer;
10424         }
10425
10426         /* Create mapping on asic side */
10427         mutex_lock(&hdev->mmu_lock);
10428
10429         rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10430         if (rc) {
10431                 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10432                 goto unreserve_va;
10433         }
10434
10435         rc = hl_mmu_invalidate_cache_range(hdev, false,
10436                                       MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10437                                       ctx->asid, reserved_va_base, SZ_2M);
10438         if (rc) {
10439                 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10440                 goto unreserve_va;
10441         }
10442
10443         mutex_unlock(&hdev->mmu_lock);
10444
10445         /* Enable MMU on KDMA */
10446         gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10447
10448         pos = 0;
10449         size_left = size;
10450         size_to_dma = SZ_2M;
10451
10452         while (size_left > 0) {
10453                 if (size_left < SZ_2M)
10454                         size_to_dma = size_left;
10455
10456                 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10457                 if (rc)
10458                         break;
10459
10460                 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10461
10462                 if (size_left <= SZ_2M)
10463                         break;
10464
10465                 pos += SZ_2M;
10466                 addr += SZ_2M;
10467                 size_left -= SZ_2M;
10468         }
10469
10470         gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10471
10472         mutex_lock(&hdev->mmu_lock);
10473
10474         rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10475         if (rc)
10476                 goto unreserve_va;
10477
10478         rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10479                                       ctx->asid, reserved_va_base, SZ_2M);
10480
10481 unreserve_va:
10482         mutex_unlock(&hdev->mmu_lock);
10483         hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10484 free_data_buffer:
10485         hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10486 put_ctx:
10487         hl_ctx_put(ctx);
10488
10489         return rc;
10490 }
10491
10492 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10493 {
10494         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10495         int min_alloc_order, rc;
10496
10497         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10498                 return 0;
10499
10500         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10501                                                                 HOST_SPACE_INTERNAL_CB_SZ,
10502                                                                 &hdev->internal_cb_pool_dma_addr,
10503                                                                 GFP_KERNEL | __GFP_ZERO);
10504
10505         if (!hdev->internal_cb_pool_virt_addr)
10506                 return -ENOMEM;
10507
10508         min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10509                                         gaudi2_get_wait_cb_size(hdev)));
10510
10511         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10512         if (!hdev->internal_cb_pool) {
10513                 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10514                 rc = -ENOMEM;
10515                 goto free_internal_cb_pool;
10516         }
10517
10518         rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10519                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
10520         if (rc) {
10521                 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10522                 rc = -EFAULT;
10523                 goto destroy_internal_cb_pool;
10524         }
10525
10526         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10527                                         HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10528
10529         if (!hdev->internal_cb_va_base) {
10530                 rc = -ENOMEM;
10531                 goto destroy_internal_cb_pool;
10532         }
10533
10534         mutex_lock(&hdev->mmu_lock);
10535
10536         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10537                                         HOST_SPACE_INTERNAL_CB_SZ);
10538         if (rc)
10539                 goto unreserve_internal_cb_pool;
10540
10541         rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10542         if (rc)
10543                 goto unmap_internal_cb_pool;
10544
10545         mutex_unlock(&hdev->mmu_lock);
10546
10547         return 0;
10548
10549 unmap_internal_cb_pool:
10550         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10551 unreserve_internal_cb_pool:
10552         mutex_unlock(&hdev->mmu_lock);
10553         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10554 destroy_internal_cb_pool:
10555         gen_pool_destroy(hdev->internal_cb_pool);
10556 free_internal_cb_pool:
10557         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10558                                         hdev->internal_cb_pool_dma_addr);
10559
10560         return rc;
10561 }
10562
10563 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10564 {
10565         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10566
10567         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10568                 return;
10569
10570         mutex_lock(&hdev->mmu_lock);
10571         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10572         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10573         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10574         mutex_unlock(&hdev->mmu_lock);
10575
10576         gen_pool_destroy(hdev->internal_cb_pool);
10577
10578         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10579                                         hdev->internal_cb_pool_dma_addr);
10580 }
10581
10582 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10583 {
10584         gaudi2_restore_user_sm_registers(hdev);
10585         gaudi2_restore_user_qm_registers(hdev);
10586 }
10587
10588 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10589 {
10590         struct hl_device *hdev = ctx->hdev;
10591         struct asic_fixed_properties *prop = &hdev->asic_prop;
10592         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10593         int rc;
10594
10595         rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10596                                 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10597         if (rc)
10598                 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10599                         RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10600
10601         return rc;
10602 }
10603
10604 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10605 {
10606         struct hl_device *hdev = ctx->hdev;
10607         struct asic_fixed_properties *prop = &hdev->asic_prop;
10608         int rc;
10609
10610         rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10611                                 prop->pmmu.page_size, true);
10612         if (rc)
10613                 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10614                         RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10615 }
10616
10617 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10618 {
10619         int rc;
10620
10621         rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10622         if (rc)
10623                 return rc;
10624
10625         /* No need to clear user registers if the device has just
10626          * performed reset, we restore only nic qm registers
10627          */
10628         if (ctx->hdev->reset_upon_device_release)
10629                 gaudi2_restore_nic_qm_registers(ctx->hdev);
10630         else
10631                 gaudi2_restore_user_registers(ctx->hdev);
10632
10633         rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10634         if (rc)
10635                 return rc;
10636
10637         rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10638         if (rc)
10639                 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10640
10641         return rc;
10642 }
10643
10644 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10645 {
10646         if (ctx->asid == HL_KERNEL_ASID_ID)
10647                 return;
10648
10649         gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10650
10651         gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10652 }
10653
10654 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10655 {
10656         struct hl_device *hdev = cs->ctx->hdev;
10657         int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10658         u32 mon_payload, sob_id, mon_id;
10659
10660         if (!cs_needs_completion(cs))
10661                 return 0;
10662
10663         /*
10664          * First 64 SOB/MON are reserved for driver for QMAN auto completion
10665          * mechanism. Each SOB/MON pair are used for a pending CS with the same
10666          * cyclic index. The SOB value is increased when each of the CS jobs is
10667          * completed. When the SOB reaches the number of CS jobs, the monitor
10668          * generates MSI-X interrupt.
10669          */
10670
10671         sob_id = mon_id = index;
10672         mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10673                                 (1 << CQ_ENTRY_READY_SHIFT) | index;
10674
10675         gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10676                                 cs->jobs_cnt);
10677
10678         return 0;
10679 }
10680
10681 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10682 {
10683         return HL_INVALID_QUEUE;
10684 }
10685
10686 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10687 {
10688         struct hl_cb *cb = data;
10689         struct packet_msg_short *pkt;
10690         u32 value, ctl, pkt_size = sizeof(*pkt);
10691
10692         pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10693         memset(pkt, 0, pkt_size);
10694
10695         /* Inc by 1, Mode ADD */
10696         value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10697         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10698
10699         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10700         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10701         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10702         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10703         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10704
10705         pkt->value = cpu_to_le32(value);
10706         pkt->ctl = cpu_to_le32(ctl);
10707
10708         return size + pkt_size;
10709 }
10710
10711 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10712 {
10713         u32 ctl, pkt_size = sizeof(*pkt);
10714
10715         memset(pkt, 0, pkt_size);
10716
10717         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10718         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10719         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10720         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10721         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10722
10723         pkt->value = cpu_to_le32(value);
10724         pkt->ctl = cpu_to_le32(ctl);
10725
10726         return pkt_size;
10727 }
10728
10729 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10730                                         u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10731 {
10732         u32 ctl, value, pkt_size = sizeof(*pkt);
10733         u8 mask;
10734
10735         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10736                 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10737                 return 0;
10738         }
10739
10740         memset(pkt, 0, pkt_size);
10741
10742         value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10743         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10744         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10745         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10746
10747         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10748         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10749         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10750         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10751         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10752
10753         pkt->value = cpu_to_le32(value);
10754         pkt->ctl = cpu_to_le32(ctl);
10755
10756         return pkt_size;
10757 }
10758
10759 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10760 {
10761         u32 ctl, cfg, pkt_size = sizeof(*pkt);
10762
10763         memset(pkt, 0, pkt_size);
10764
10765         cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10766         cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10767         cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10768
10769         ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10770         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10771         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10772
10773         pkt->cfg = cpu_to_le32(cfg);
10774         pkt->ctl = cpu_to_le32(ctl);
10775
10776         return pkt_size;
10777 }
10778
10779 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10780 {
10781         struct hl_cb *cb = prop->data;
10782         void *buf = (void *) (uintptr_t) (cb->kernel_address);
10783
10784         u64 monitor_base, fence_addr = 0;
10785         u32 stream_index, size = prop->size;
10786         u16 msg_addr_offset;
10787
10788         stream_index = prop->q_idx % 4;
10789         fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10790                         QM_FENCE2_OFFSET + stream_index * 4;
10791
10792         /*
10793          * monitor_base should be the content of the base0 address registers,
10794          * so it will be added to the msg short offsets
10795          */
10796         monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10797
10798         /* First monitor config packet: low address of the sync */
10799         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10800                                 monitor_base;
10801
10802         size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10803
10804         /* Second monitor config packet: high address of the sync */
10805         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10806                                 monitor_base;
10807
10808         size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10809
10810         /*
10811          * Third monitor config packet: the payload, i.e. what to write when the
10812          * sync triggers
10813          */
10814         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10815                                 monitor_base;
10816
10817         size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10818
10819         /* Fourth monitor config packet: bind the monitor to a sync object */
10820         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10821
10822         size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10823                                                 prop->sob_val, msg_addr_offset);
10824
10825         /* Fence packet */
10826         size += gaudi2_add_fence_pkt(buf + size);
10827
10828         return size;
10829 }
10830
10831 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10832 {
10833         struct hl_hw_sob *hw_sob = data;
10834
10835         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10836
10837         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10838
10839         kref_init(&hw_sob->kref);
10840 }
10841
10842 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10843 {
10844 }
10845
10846 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10847 {
10848         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10849
10850         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10851 }
10852
10853 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10854 {
10855         return 0;
10856 }
10857
10858 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10859                                         struct hl_cs *cs, u32 wait_queue_id,
10860                                         u32 collective_engine_id, u32 encaps_signal_offset)
10861 {
10862         return -EINVAL;
10863 }
10864
10865 /*
10866  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10867  *                   to DMMU page-size address (64MB) before mapping it in
10868  *                   the MMU.
10869  * The operation is performed on both the virtual and physical addresses.
10870  * for device with 6 HBMs the scramble is:
10871  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10872  *
10873  * Example:
10874  * =============================================================================
10875  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10876  * Phys address                                                     in MMU last
10877  *                                                                    HOP
10878  * =============================================================================
10879  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10880  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10881  * =============================================================================
10882  */
10883 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10884 {
10885         struct asic_fixed_properties *prop = &hdev->asic_prop;
10886         u32 divisor, mod_va;
10887         u64 div_va;
10888
10889         /* accept any address in the DRAM address space */
10890         if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10891                                                                         VA_HBM_SPACE_END)) {
10892
10893                 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10894                 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10895                 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10896                         (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10897                         (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10898         }
10899
10900         return raw_addr;
10901 }
10902
10903 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10904 {
10905         struct asic_fixed_properties *prop = &hdev->asic_prop;
10906         u32 divisor, mod_va;
10907         u64 div_va;
10908
10909         /* accept any address in the DRAM address space */
10910         if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10911                                                                         VA_HBM_SPACE_END)) {
10912
10913                 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10914                 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10915                                         PAGE_SIZE_64MB, &mod_va);
10916
10917                 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10918                                         (div_va * divisor + mod_va));
10919         }
10920
10921         return scrambled_addr;
10922 }
10923
10924 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10925 {
10926         u32 base = 0, dcore_id, dec_id;
10927
10928         if (core_id >= NUMBER_OF_DEC) {
10929                 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10930                 goto out;
10931         }
10932
10933         if (core_id < 8) {
10934                 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10935                 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10936
10937                 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10938                                 dec_id * DCORE_VDEC_OFFSET;
10939         } else {
10940                 /* PCIe Shared Decoder */
10941                 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10942         }
10943 out:
10944         return base;
10945 }
10946
10947 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10948                                 u32 *block_size, u32 *block_id)
10949 {
10950         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10951         int i;
10952
10953         for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10954                 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10955                         *block_id = i;
10956                         if (block_size)
10957                                 *block_size = gaudi2->mapped_blocks[i].size;
10958                         return 0;
10959                 }
10960         }
10961
10962         dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10963
10964         return -EINVAL;
10965 }
10966
10967 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10968                         u32 block_id, u32 block_size)
10969 {
10970         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10971         u64 offset_in_bar;
10972         u64 address;
10973         int rc;
10974
10975         if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10976                 dev_err(hdev->dev, "Invalid block id %u", block_id);
10977                 return -EINVAL;
10978         }
10979
10980         /* we allow mapping only an entire block */
10981         if (block_size != gaudi2->mapped_blocks[block_id].size) {
10982                 dev_err(hdev->dev, "Invalid block size %u", block_size);
10983                 return -EINVAL;
10984         }
10985
10986         offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10987
10988         address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10989
10990         vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10991                         VM_DONTCOPY | VM_NORESERVE);
10992
10993         rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10994                         block_size, vma->vm_page_prot);
10995         if (rc)
10996                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10997
10998         return rc;
10999 }
11000
11001 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11002 {
11003         struct gaudi2_device *gaudi2 = hdev->asic_specific;
11004
11005         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11006         u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11007
11008         if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11009                 WREG32(irq_handler_offset,
11010                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11011 }
11012
11013 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11014 {
11015         switch (mmu_id) {
11016         case HW_CAP_DCORE0_DMMU0:
11017                 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11018                 break;
11019         case HW_CAP_DCORE0_DMMU1:
11020                 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11021                 break;
11022         case HW_CAP_DCORE0_DMMU2:
11023                 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11024                 break;
11025         case HW_CAP_DCORE0_DMMU3:
11026                 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11027                 break;
11028         case HW_CAP_DCORE1_DMMU0:
11029                 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11030                 break;
11031         case HW_CAP_DCORE1_DMMU1:
11032                 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11033                 break;
11034         case HW_CAP_DCORE1_DMMU2:
11035                 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11036                 break;
11037         case HW_CAP_DCORE1_DMMU3:
11038                 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11039                 break;
11040         case HW_CAP_DCORE2_DMMU0:
11041                 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11042                 break;
11043         case HW_CAP_DCORE2_DMMU1:
11044                 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11045                 break;
11046         case HW_CAP_DCORE2_DMMU2:
11047                 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11048                 break;
11049         case HW_CAP_DCORE2_DMMU3:
11050                 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11051                 break;
11052         case HW_CAP_DCORE3_DMMU0:
11053                 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11054                 break;
11055         case HW_CAP_DCORE3_DMMU1:
11056                 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11057                 break;
11058         case HW_CAP_DCORE3_DMMU2:
11059                 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11060                 break;
11061         case HW_CAP_DCORE3_DMMU3:
11062                 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11063                 break;
11064         case HW_CAP_PMMU:
11065                 *mmu_base = mmPMMU_HBW_MMU_BASE;
11066                 break;
11067         default:
11068                 return -EINVAL;
11069         }
11070
11071         return 0;
11072 }
11073
11074 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11075 {
11076         bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11077         struct gaudi2_device *gaudi2 = hdev->asic_specific;
11078         u32 mmu_base;
11079
11080         if (!(gaudi2->hw_cap_initialized & mmu_id))
11081                 return;
11082
11083         if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11084                 return;
11085
11086         gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11087         gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11088 }
11089
11090 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11091 {
11092         u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11093
11094         /* check all HMMUs */
11095         for (i = 0 ; i < num_of_hmmus ; i++) {
11096                 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11097
11098                 if (mmu_cap_mask & mmu_id)
11099                         gaudi2_ack_mmu_error(hdev, mmu_id);
11100         }
11101
11102         /* check PMMU */
11103         if (mmu_cap_mask & HW_CAP_PMMU)
11104                 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11105
11106         return 0;
11107 }
11108
11109 static void gaudi2_get_msi_info(__le32 *table)
11110 {
11111         table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11112 }
11113
11114 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11115 {
11116         switch (pll_idx) {
11117         case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11118         case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11119         case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11120         case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11121         case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11122         case HL_GAUDI2_MME_PLL: return MME_PLL;
11123         case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11124         case HL_GAUDI2_IF_PLL: return IF_PLL;
11125         case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11126         case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11127         case HL_GAUDI2_VID_PLL: return VID_PLL;
11128         case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11129         default: return -EINVAL;
11130         }
11131 }
11132
11133 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11134 {
11135         /* Not implemented */
11136         return 0;
11137 }
11138
11139 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11140 {
11141         /* Not implemented */
11142         return 0;
11143 }
11144
11145 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11146                                 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11147 {
11148         /* Not implemented */
11149         return 0;
11150 }
11151
11152
11153 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11154                                 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11155                                 u32 engine_id, char **buf, size_t *size, size_t *offset)
11156 {
11157         /* Not implemented */
11158         return 0;
11159 }
11160
11161
11162 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11163         .monitor_valid = gaudi2_monitor_valid,
11164         .print_single_monitor = gaudi2_print_single_monitor,
11165         .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11166         .print_fences_single_engine = gaudi2_print_fences_single_engine,
11167 };
11168
11169 static void gaudi2_state_dump_init(struct hl_device *hdev)
11170 {
11171         /* Not implemented */
11172         hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11173         hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11174 }
11175
11176 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11177 {
11178         return 0;
11179 }
11180
11181 static u32 *gaudi2_get_stream_master_qid_arr(void)
11182 {
11183         return NULL;
11184 }
11185
11186 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11187                                 struct attribute_group *dev_vrm_attr_grp)
11188 {
11189         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11190         hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11191 }
11192
11193 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11194                                         u32 page_size, u32 *real_page_size, bool is_dram_addr)
11195 {
11196         struct asic_fixed_properties *prop = &hdev->asic_prop;
11197
11198         /* for host pages the page size must be  */
11199         if (!is_dram_addr) {
11200                 if (page_size % mmu_prop->page_size)
11201                         goto page_size_err;
11202
11203                 *real_page_size = mmu_prop->page_size;
11204                 return 0;
11205         }
11206
11207         if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11208                 goto page_size_err;
11209
11210         /*
11211          * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11212          * than DRAM page size).
11213          * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11214          * this mismatch when calculating the address to place in the MMU page table.
11215          * (in that case also make sure that the dram_page_size is not greater than the
11216          * mmu page size)
11217          */
11218         *real_page_size = prop->dram_page_size;
11219
11220         return 0;
11221
11222 page_size_err:
11223         dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11224                                                         page_size, mmu_prop->page_size >> 10);
11225         return -EFAULT;
11226 }
11227
11228 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11229 {
11230         return -EOPNOTSUPP;
11231 }
11232
11233 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11234 {
11235         struct gaudi2_device *gaudi2 = hdev->asic_specific;
11236
11237         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11238                 return 0;
11239
11240         return hl_fw_send_device_activity(hdev, open);
11241 }
11242
11243 static const struct hl_asic_funcs gaudi2_funcs = {
11244         .early_init = gaudi2_early_init,
11245         .early_fini = gaudi2_early_fini,
11246         .late_init = gaudi2_late_init,
11247         .late_fini = gaudi2_late_fini,
11248         .sw_init = gaudi2_sw_init,
11249         .sw_fini = gaudi2_sw_fini,
11250         .hw_init = gaudi2_hw_init,
11251         .hw_fini = gaudi2_hw_fini,
11252         .halt_engines = gaudi2_halt_engines,
11253         .suspend = gaudi2_suspend,
11254         .resume = gaudi2_resume,
11255         .mmap = gaudi2_mmap,
11256         .ring_doorbell = gaudi2_ring_doorbell,
11257         .pqe_write = gaudi2_pqe_write,
11258         .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11259         .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11260         .scrub_device_mem = gaudi2_scrub_device_mem,
11261         .scrub_device_dram = gaudi2_scrub_device_dram,
11262         .get_int_queue_base = NULL,
11263         .test_queues = gaudi2_test_queues,
11264         .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11265         .asic_dma_pool_free = gaudi2_dma_pool_free,
11266         .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11267         .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11268         .asic_dma_unmap_single = gaudi2_dma_unmap_single,
11269         .asic_dma_map_single = gaudi2_dma_map_single,
11270         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11271         .cs_parser = gaudi2_cs_parser,
11272         .asic_dma_map_sgtable = hl_dma_map_sgtable,
11273         .add_end_of_cb_packets = NULL,
11274         .update_eq_ci = gaudi2_update_eq_ci,
11275         .context_switch = gaudi2_context_switch,
11276         .restore_phase_topology = gaudi2_restore_phase_topology,
11277         .debugfs_read_dma = gaudi2_debugfs_read_dma,
11278         .add_device_attr = gaudi2_add_device_attr,
11279         .handle_eqe = gaudi2_handle_eqe,
11280         .get_events_stat = gaudi2_get_events_stat,
11281         .read_pte = NULL,
11282         .write_pte = NULL,
11283         .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11284         .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11285         .mmu_prefetch_cache_range = NULL,
11286         .send_heartbeat = gaudi2_send_heartbeat,
11287         .debug_coresight = gaudi2_debug_coresight,
11288         .is_device_idle = gaudi2_is_device_idle,
11289         .compute_reset_late_init = gaudi2_compute_reset_late_init,
11290         .hw_queues_lock = gaudi2_hw_queues_lock,
11291         .hw_queues_unlock = gaudi2_hw_queues_unlock,
11292         .get_pci_id = gaudi2_get_pci_id,
11293         .get_eeprom_data = gaudi2_get_eeprom_data,
11294         .get_monitor_dump = gaudi2_get_monitor_dump,
11295         .send_cpu_message = gaudi2_send_cpu_message,
11296         .pci_bars_map = gaudi2_pci_bars_map,
11297         .init_iatu = gaudi2_init_iatu,
11298         .rreg = hl_rreg,
11299         .wreg = hl_wreg,
11300         .halt_coresight = gaudi2_halt_coresight,
11301         .ctx_init = gaudi2_ctx_init,
11302         .ctx_fini = gaudi2_ctx_fini,
11303         .pre_schedule_cs = gaudi2_pre_schedule_cs,
11304         .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11305         .load_firmware_to_device = NULL,
11306         .load_boot_fit_to_device = NULL,
11307         .get_signal_cb_size = gaudi2_get_signal_cb_size,
11308         .get_wait_cb_size = gaudi2_get_wait_cb_size,
11309         .gen_signal_cb = gaudi2_gen_signal_cb,
11310         .gen_wait_cb = gaudi2_gen_wait_cb,
11311         .reset_sob = gaudi2_reset_sob,
11312         .reset_sob_group = gaudi2_reset_sob_group,
11313         .get_device_time = gaudi2_get_device_time,
11314         .pb_print_security_errors = gaudi2_pb_print_security_errors,
11315         .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11316         .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11317         .get_dec_base_addr = gaudi2_get_dec_base_addr,
11318         .scramble_addr = gaudi2_mmu_scramble_addr,
11319         .descramble_addr = gaudi2_mmu_descramble_addr,
11320         .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11321         .get_hw_block_id = gaudi2_get_hw_block_id,
11322         .hw_block_mmap = gaudi2_block_mmap,
11323         .enable_events_from_fw = gaudi2_enable_events_from_fw,
11324         .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11325         .get_msi_info = gaudi2_get_msi_info,
11326         .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11327         .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11328         .init_firmware_loader = gaudi2_init_firmware_loader,
11329         .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11330         .state_dump_init = gaudi2_state_dump_init,
11331         .get_sob_addr = &gaudi2_get_sob_addr,
11332         .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11333         .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11334         .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11335         .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11336         .access_dev_mem = hl_access_dev_mem,
11337         .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11338         .set_engine_cores = gaudi2_set_engine_cores,
11339         .set_engines = gaudi2_set_engines,
11340         .send_device_activity = gaudi2_send_device_activity,
11341         .set_dram_properties = gaudi2_set_dram_properties,
11342         .set_binning_masks = gaudi2_set_binning_masks,
11343 };
11344
11345 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11346 {
11347         hdev->asic_funcs = &gaudi2_funcs;
11348 }